mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 04:43:03 +03:00
refactor parsing of feature functiona args
This commit is contained in:
parent
96d116dba0
commit
abe6bb7c22
@ -38,8 +38,8 @@ protected:
|
||||
std::vector<WordPtr> m_words;
|
||||
|
||||
public:
|
||||
Phrase()
|
||||
{}
|
||||
Phrase() {
|
||||
}
|
||||
|
||||
virtual ~Phrase() {}
|
||||
|
||||
|
@ -61,8 +61,8 @@ protected:
|
||||
UINT64 ReadScoresFromFile(std::fstream &fileTPColl);
|
||||
|
||||
public:
|
||||
TargetPhrase()
|
||||
{}
|
||||
TargetPhrase() {
|
||||
}
|
||||
TargetPhrase(size_t numScores);
|
||||
TargetPhrase(const TargetPhrase ©);
|
||||
virtual ~TargetPhrase();
|
||||
|
@ -42,8 +42,8 @@ protected:
|
||||
|
||||
public:
|
||||
Vocab()
|
||||
:m_nextId(1)
|
||||
{}
|
||||
:m_nextId(1) {
|
||||
}
|
||||
UINT64 AddVocabId(const std::string &str);
|
||||
UINT64 GetVocabId(const std::string &str, bool &found) const;
|
||||
const std::string &GetString(UINT32 vocabId) const {
|
||||
|
@ -46,13 +46,13 @@ private:
|
||||
UINT64 m_vocabId;
|
||||
|
||||
public:
|
||||
explicit Word()
|
||||
{}
|
||||
explicit Word() {
|
||||
}
|
||||
|
||||
explicit Word(bool isNonTerminal)
|
||||
:m_isNonTerminal(isNonTerminal)
|
||||
,m_vocabId(0)
|
||||
{}
|
||||
,m_vocabId(0) {
|
||||
}
|
||||
|
||||
Word(const Word ©);
|
||||
~Word();
|
||||
|
@ -38,8 +38,8 @@ public:
|
||||
,m_start_null(start_null)
|
||||
,m_end_null(end_null)
|
||||
,m_pre_null(pre_null)
|
||||
,m_post_null(post_null)
|
||||
{}
|
||||
,m_post_null(post_null) {
|
||||
}
|
||||
~PhrasePair () {}
|
||||
|
||||
void PrintTarget( std::ostream* out ) const;
|
||||
|
@ -20,8 +20,8 @@ class _fdstream
|
||||
{
|
||||
protected:
|
||||
_fdstream() :
|
||||
_file_descriptor(-1), _filebuf(NULL)
|
||||
{ }
|
||||
_file_descriptor(-1), _filebuf(NULL) {
|
||||
}
|
||||
|
||||
_fdstream(int file_descriptor, std::ios_base::openmode openmode) :
|
||||
_file_descriptor(file_descriptor), _openmode(openmode) {
|
||||
@ -59,8 +59,8 @@ class ifdstream : public _fdstream
|
||||
{
|
||||
public:
|
||||
ifdstream() :
|
||||
_fdstream(), _stream(NULL)
|
||||
{ }
|
||||
_fdstream(), _stream(NULL) {
|
||||
}
|
||||
|
||||
ifdstream(int file_descriptor) :
|
||||
_fdstream(file_descriptor, std::ios_base::in) {
|
||||
@ -116,8 +116,8 @@ class ofdstream : public _fdstream
|
||||
{
|
||||
public:
|
||||
ofdstream() :
|
||||
_fdstream(), _stream(NULL)
|
||||
{ }
|
||||
_fdstream(), _stream(NULL) {
|
||||
}
|
||||
|
||||
ofdstream(int file_descriptor) :
|
||||
_fdstream(file_descriptor, std::ios_base::out) {
|
||||
|
@ -65,7 +65,7 @@ int main(int argc, char **argv)
|
||||
sourcePhrase.CreateFromString(Input, input, line, "||dummy_string||", NULL);
|
||||
|
||||
TargetPhraseVectorPtr decodedPhraseColl
|
||||
= pdc.GetTargetPhraseCollectionRaw(sourcePhrase);
|
||||
= pdc.GetTargetPhraseCollectionRaw(sourcePhrase);
|
||||
|
||||
if(decodedPhraseColl != NULL) {
|
||||
if(reportCounts)
|
||||
|
@ -81,8 +81,8 @@ class TranslationTask : public Task
|
||||
public:
|
||||
TranslationTask(InputType *source, IOWrapper &ioWrapper)
|
||||
: m_source(source)
|
||||
, m_ioWrapper(ioWrapper)
|
||||
{}
|
||||
, m_ioWrapper(ioWrapper) {
|
||||
}
|
||||
|
||||
~TranslationTask() {
|
||||
delete m_source;
|
||||
|
@ -52,8 +52,8 @@ public:
|
||||
Stack stack=Stack())
|
||||
: m_coverage(coverage)
|
||||
, m_label(label)
|
||||
, m_stack(stack)
|
||||
{}
|
||||
, m_stack(stack) {
|
||||
}
|
||||
|
||||
const WordsRange &GetCoverage() const {
|
||||
return m_coverage;
|
||||
|
@ -95,8 +95,8 @@ public:
|
||||
* to be called after processing a sentence (which may consist of more than just calling ProcessSentence() )
|
||||
* currently an empty function
|
||||
*/
|
||||
void CalcDecoderStatistics() const
|
||||
{ }
|
||||
void CalcDecoderStatistics() const {
|
||||
}
|
||||
|
||||
void ResetSentenceStats(const InputType& source) {
|
||||
m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
|
||||
|
@ -49,8 +49,8 @@ public:
|
||||
: m_stackVec(stackVec)
|
||||
, m_targetPhraseCollection(&targetPhraseColl)
|
||||
, m_wordsRange(&wordsRange)
|
||||
, m_estimateOfBestScore(score)
|
||||
{}
|
||||
, m_estimateOfBestScore(score) {
|
||||
}
|
||||
|
||||
~ChartTranslationOptions() {}
|
||||
|
||||
|
@ -36,13 +36,12 @@ DecodeFeature::DecodeFeature( const std::string& description
|
||||
VERBOSE(2,"DecodeFeature:" << std::endl);
|
||||
size_t ind = 0;
|
||||
while (ind < m_args.size()) {
|
||||
vector<string> &args = m_args[ind];
|
||||
vector<string> &args = m_args[ind];
|
||||
bool consumed = OverrideParameter(args[0], args[1]);
|
||||
if (consumed) {
|
||||
m_args.erase(m_args.begin() + ind);
|
||||
}
|
||||
else {
|
||||
++ind;
|
||||
m_args.erase(m_args.begin() + ind);
|
||||
} else {
|
||||
++ind;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -70,19 +69,17 @@ DecodeFeature::DecodeFeature(const std::string& description
|
||||
|
||||
bool DecodeFeature::OverrideParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "input-factor") {
|
||||
m_input =Tokenize<FactorType>(value, ",");
|
||||
m_inputFactors = FactorMask(m_input);
|
||||
}
|
||||
else if (key == "output-factor") {
|
||||
m_output =Tokenize<FactorType>(value, ",");
|
||||
m_outputFactors = FactorMask(m_output);
|
||||
}
|
||||
else {
|
||||
return StatelessFeatureFunction::OverrideParameter(key, value);
|
||||
}
|
||||
if (key == "input-factor") {
|
||||
m_input =Tokenize<FactorType>(value, ",");
|
||||
m_inputFactors = FactorMask(m_input);
|
||||
} else if (key == "output-factor") {
|
||||
m_output =Tokenize<FactorType>(value, ",");
|
||||
m_outputFactors = FactorMask(m_output);
|
||||
} else {
|
||||
return StatelessFeatureFunction::OverrideParameter(key, value);
|
||||
}
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
@ -47,14 +47,14 @@ public:
|
||||
**/
|
||||
DecodeGraph(size_t position)
|
||||
: m_position(position)
|
||||
, m_maxChartSpan(NOT_FOUND)
|
||||
{}
|
||||
, m_maxChartSpan(NOT_FOUND) {
|
||||
}
|
||||
|
||||
// for chart decoding
|
||||
DecodeGraph(size_t position, size_t maxChartSpan)
|
||||
: m_position(position)
|
||||
, m_maxChartSpan(maxChartSpan)
|
||||
{}
|
||||
, m_maxChartSpan(maxChartSpan) {
|
||||
}
|
||||
|
||||
//! iterators
|
||||
typedef std::list<const DecodeStep*>::iterator iterator;
|
||||
|
@ -18,8 +18,8 @@ class DistortionScoreProducer : public StatefulFeatureFunction
|
||||
{
|
||||
public:
|
||||
DistortionScoreProducer(const std::string &line)
|
||||
: StatefulFeatureFunction("Distortion", 1, line)
|
||||
{}
|
||||
: StatefulFeatureFunction("Distortion", 1, line) {
|
||||
}
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const {
|
||||
return true;
|
||||
|
@ -22,10 +22,10 @@ std::vector<const StatefulFeatureFunction*> StatefulFeatureFunction::m_stateful
|
||||
FeatureFunction &FeatureFunction::FindFeatureFunction(const std::string& name)
|
||||
{
|
||||
for (size_t i = 0; i < m_producers.size(); ++i) {
|
||||
FeatureFunction &ff = *m_producers[i];
|
||||
if (ff.GetScoreProducerDescription() == name) {
|
||||
return ff;
|
||||
}
|
||||
FeatureFunction &ff = *m_producers[i];
|
||||
if (ff.GetScoreProducerDescription() == name) {
|
||||
return ff;
|
||||
}
|
||||
}
|
||||
|
||||
throw "Unknown feature " + name;
|
||||
@ -50,25 +50,24 @@ void FeatureFunction::Initialize(const std::string& description, const std::stri
|
||||
|
||||
size_t ind = 0;
|
||||
while (ind < m_args.size()) {
|
||||
vector<string> &args = m_args[ind];
|
||||
vector<string> &args = m_args[ind];
|
||||
bool consumed = OverrideParameter(args[0], args[1]);
|
||||
if (consumed) {
|
||||
m_args.erase(m_args.begin() + ind);
|
||||
}
|
||||
else {
|
||||
++ind;
|
||||
m_args.erase(m_args.begin() + ind);
|
||||
} else {
|
||||
++ind;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_description == "") {
|
||||
size_t index = description_counts.count(description);
|
||||
size_t index = description_counts.count(description);
|
||||
|
||||
ostringstream dstream;
|
||||
dstream << description;
|
||||
dstream << index;
|
||||
ostringstream dstream;
|
||||
dstream << description;
|
||||
dstream << index;
|
||||
|
||||
description_counts.insert(description);
|
||||
m_description = dstream.str();
|
||||
description_counts.insert(description);
|
||||
m_description = dstream.str();
|
||||
}
|
||||
|
||||
ScoreComponentCollection::RegisterScoreProducer(this);
|
||||
@ -93,18 +92,18 @@ void FeatureFunction::ParseLine(const std::string& description, const std::strin
|
||||
|
||||
bool FeatureFunction::OverrideParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "num-features") {
|
||||
m_numScoreComponents = Scan<size_t>(value);
|
||||
} else if (key == "name") {
|
||||
m_description = value;
|
||||
} else if (key == "tuneable") {
|
||||
m_tuneable = Scan<bool>(value);
|
||||
} else {
|
||||
//UTIL_THROW(util::Exception, "unknown key" << key);
|
||||
return false;
|
||||
}
|
||||
if (key == "num-features") {
|
||||
m_numScoreComponents = Scan<size_t>(value);
|
||||
} else if (key == "name") {
|
||||
m_description = value;
|
||||
} else if (key == "tuneable") {
|
||||
m_tuneable = Scan<bool>(value);
|
||||
} else {
|
||||
//UTIL_THROW(util::Exception, "unknown key" << key);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -55,8 +55,8 @@ public:
|
||||
virtual ~FeatureFunction();
|
||||
|
||||
//! override to load model files
|
||||
virtual void Load()
|
||||
{}
|
||||
virtual void Load() {
|
||||
}
|
||||
|
||||
static void ResetDescriptionCounts() {
|
||||
description_counts.clear();
|
||||
@ -80,12 +80,12 @@ public:
|
||||
}
|
||||
|
||||
//! Called before search and collecting of translation options
|
||||
virtual void InitializeForInput(InputType const& source)
|
||||
{}
|
||||
virtual void InitializeForInput(InputType const& source) {
|
||||
}
|
||||
|
||||
// clean up temporary memory, called after processing each sentence
|
||||
virtual void CleanUpAfterSentenceProcessing(const InputType& source)
|
||||
{}
|
||||
virtual void CleanUpAfterSentenceProcessing(const InputType& source) {
|
||||
}
|
||||
|
||||
const std::string &GetArgLine() const {
|
||||
return m_argLine;
|
||||
@ -99,12 +99,12 @@ public:
|
||||
virtual void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {
|
||||
}
|
||||
|
||||
virtual void Evaluate(const InputType &source
|
||||
, ScoreComponentCollection &scoreBreakdown) const
|
||||
{}
|
||||
, ScoreComponentCollection &scoreBreakdown) const {
|
||||
}
|
||||
|
||||
virtual bool OverrideParameter(const std::string& key, const std::string& value);
|
||||
};
|
||||
|
@ -45,6 +45,11 @@ PhrasePairFeature::PhrasePairFeature(const std::string &line)
|
||||
Load(filePathSource);
|
||||
}
|
||||
|
||||
bool PhrasePairFeature::OverrideParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
bool PhrasePairFeature::Load(const std::string &filePathSource/*, const std::string &filePathTarget*/)
|
||||
{
|
||||
if (m_domainTrigger) {
|
||||
|
@ -45,6 +45,7 @@ public:
|
||||
}
|
||||
|
||||
bool Load(const std::string &filePathSource/*, const std::string &filePathTarget*/);
|
||||
bool OverrideParameter(const std::string& key, const std::string& value);
|
||||
|
||||
};
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "moses/Util.h"
|
||||
|
||||
#include "util/string_piece_hash.hh"
|
||||
#include "util/exception.hh"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -22,37 +23,42 @@ SourceWordDeletionFeature::SourceWordDeletionFeature(const std::string &line)
|
||||
{
|
||||
std::cerr << "Initializing source word deletion feature.." << std::endl;
|
||||
|
||||
string filename;
|
||||
for (size_t i = 0; i < m_args.size(); ++i) {
|
||||
const vector<string> &args = m_args[i];
|
||||
|
||||
if (args[0] == "factor") {
|
||||
m_factorType = Scan<FactorType>(args[1]);
|
||||
} else if (args[0] == "path") {
|
||||
filename = args[1];
|
||||
size_t ind = 0;
|
||||
while (ind < m_args.size()) {
|
||||
vector<string> &args = m_args[ind];
|
||||
bool consumed = OverrideParameter(args[0], args[1]);
|
||||
if (consumed) {
|
||||
m_args.erase(m_args.begin() + ind);
|
||||
} else {
|
||||
throw "Unknown argument " + args[0];
|
||||
++ind;
|
||||
}
|
||||
}
|
||||
|
||||
// load word list for restricted feature set
|
||||
if (filename != "") {
|
||||
cerr << "loading source word deletion word list from " << filename << endl;
|
||||
if (!Load(filename)) {
|
||||
UserMessage::Add("Unable to load word list for source word deletion feature from file " + filename);
|
||||
//return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool SourceWordDeletionFeature::Load(const std::string &filePath)
|
||||
bool SourceWordDeletionFeature::OverrideParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
ifstream inFile(filePath.c_str());
|
||||
if (!inFile) {
|
||||
cerr << "could not open file " << filePath << endl;
|
||||
return false;
|
||||
if (key == "factor") {
|
||||
m_factorType = Scan<FactorType>(value);
|
||||
} else if (key == "path") {
|
||||
m_filename = value;
|
||||
} else {
|
||||
StatelessFeatureFunction::OverrideParameter(key, value);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void SourceWordDeletionFeature::Load()
|
||||
{
|
||||
if (m_filename == "") {
|
||||
return;
|
||||
}
|
||||
|
||||
cerr << "loading source word deletion word list from " << m_filename << endl;
|
||||
|
||||
ifstream inFile(m_filename.c_str());
|
||||
UTIL_THROW_IF(!inFile, util::Exception, "Can't open file " << m_filename);
|
||||
|
||||
std::string line;
|
||||
while (getline(inFile, line)) {
|
||||
m_vocab.insert(line);
|
||||
@ -61,7 +67,6 @@ bool SourceWordDeletionFeature::Load(const std::string &filePath)
|
||||
inFile.close();
|
||||
|
||||
m_unrestricted = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void SourceWordDeletionFeature::Evaluate(const Phrase &source
|
||||
|
@ -19,11 +19,12 @@ private:
|
||||
boost::unordered_set<std::string> m_vocab;
|
||||
FactorType m_factorType;
|
||||
bool m_unrestricted;
|
||||
std::string m_filename;
|
||||
|
||||
public:
|
||||
SourceWordDeletionFeature(const std::string &line);
|
||||
|
||||
bool Load(const std::string &filePath);
|
||||
void Load();
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const {
|
||||
return true;
|
||||
@ -38,6 +39,8 @@ public:
|
||||
const TargetPhrase& targetPhrase,
|
||||
ScoreComponentCollection* accumulator,
|
||||
const AlignmentInfo &alignmentInfo) const;
|
||||
bool OverrideParameter(const std::string& key, const std::string& value);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -24,15 +24,15 @@ public:
|
||||
* This should be implemented for features that apply to phrase-based models.
|
||||
**/
|
||||
virtual void Evaluate(const PhraseBasedFeatureContext& context,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
|
||||
/**
|
||||
* Same for chart-based features.
|
||||
**/
|
||||
virtual void EvaluateChart(const ChartBasedFeatureContext& context,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
ScoreComponentCollection* accumulator) const {
|
||||
}
|
||||
|
||||
virtual bool IsStateless() const {
|
||||
return true;
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include "moses/Hypothesis.h"
|
||||
#include "moses/ScoreComponentCollection.h"
|
||||
#include "util/string_piece_hash.hh"
|
||||
#include "util/exception.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -21,29 +22,42 @@ TargetBigramFeature::TargetBigramFeature(const std::string &line)
|
||||
{
|
||||
std::cerr << "Initializing target bigram feature.." << std::endl;
|
||||
|
||||
vector<string> tokens = Tokenize(line);
|
||||
//CHECK(tokens[0] == m_description);
|
||||
|
||||
// set factor
|
||||
m_factorType = Scan<FactorType>(tokens[1]);
|
||||
size_t ind = 0;
|
||||
while (ind < m_args.size()) {
|
||||
vector<string> &args = m_args[ind];
|
||||
bool consumed = OverrideParameter(args[0], args[1]);
|
||||
if (consumed) {
|
||||
m_args.erase(m_args.begin() + ind);
|
||||
} else {
|
||||
++ind;
|
||||
}
|
||||
}
|
||||
|
||||
FactorCollection& factorCollection = FactorCollection::Instance();
|
||||
const Factor* bosFactor =
|
||||
factorCollection.AddFactor(Output,m_factorType,BOS_);
|
||||
m_bos.SetFactor(m_factorType,bosFactor);
|
||||
|
||||
const string &filePath = tokens[2];
|
||||
Load(filePath);
|
||||
}
|
||||
|
||||
bool TargetBigramFeature::OverrideParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "factor") {
|
||||
m_factorType = Scan<FactorType>(value);
|
||||
} else if (key == "path") {
|
||||
m_filePath = value;
|
||||
} else {
|
||||
StatefulFeatureFunction::OverrideParameter(key, value);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool TargetBigramFeature::Load(const std::string &filePath)
|
||||
void TargetBigramFeature::Load()
|
||||
{
|
||||
if (filePath == "*") return true; //allow all
|
||||
ifstream inFile(filePath.c_str());
|
||||
if (!inFile) {
|
||||
return false;
|
||||
}
|
||||
if (m_filePath == "*")
|
||||
return ; //allow all
|
||||
ifstream inFile(m_filePath.c_str());
|
||||
UTIL_THROW_IF(!inFile, util::Exception, "Can't open file " << m_filePath);
|
||||
|
||||
std::string line;
|
||||
m_vocab.insert(BOS_);
|
||||
@ -53,7 +67,6 @@ bool TargetBigramFeature::Load(const std::string &filePath)
|
||||
}
|
||||
|
||||
inFile.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
@ -33,7 +33,7 @@ class TargetBigramFeature : public StatefulFeatureFunction
|
||||
public:
|
||||
TargetBigramFeature(const std::string &line);
|
||||
|
||||
bool Load(const std::string &filePath);
|
||||
void Load();
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const;
|
||||
|
||||
@ -47,10 +47,12 @@ public:
|
||||
ScoreComponentCollection* ) const {
|
||||
abort();
|
||||
}
|
||||
bool OverrideParameter(const std::string& key, const std::string& value);
|
||||
|
||||
private:
|
||||
FactorType m_factorType;
|
||||
Word m_bos;
|
||||
std::string m_filePath;
|
||||
boost::unordered_set<std::string> m_vocab;
|
||||
};
|
||||
|
||||
|
@ -42,13 +42,29 @@ TargetNgramFeature::TargetNgramFeature(const std::string &line)
|
||||
{
|
||||
std::cerr << "Initializing target ngram feature.." << std::endl;
|
||||
|
||||
vector<string> tokens = Tokenize(line);
|
||||
//CHECK(tokens[0] == m_description);
|
||||
size_t ind = 0;
|
||||
while (ind < m_args.size()) {
|
||||
vector<string> &args = m_args[ind];
|
||||
bool consumed = OverrideParameter(args[0], args[1]);
|
||||
if (consumed) {
|
||||
m_args.erase(m_args.begin() + ind);
|
||||
} else {
|
||||
++ind;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CHECK(tokens.size() == 4);
|
||||
m_factorType = Scan<FactorType>(tokens[1]);
|
||||
m_n = Scan<size_t>(tokens[2]);
|
||||
m_lower_ngrams = Scan<bool>(tokens[3]);
|
||||
bool TargetNgramFeature::OverrideParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "factor") {
|
||||
m_factorType = Scan<FactorType>(value);
|
||||
} else if (key == "n") {
|
||||
m_n = Scan<size_t>(value);
|
||||
} else if (key == "lower-ngrams") {
|
||||
m_lower_ngrams = Scan<bool>(value);
|
||||
} else {
|
||||
StatefulFeatureFunction::OverrideParameter(key, value);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -191,6 +191,7 @@ public:
|
||||
|
||||
virtual FFState* EvaluateChart(const ChartHypothesis& cur_hypo, int featureId,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
bool OverrideParameter(const std::string& key, const std::string& value);
|
||||
|
||||
private:
|
||||
FactorType m_factorType;
|
||||
|
@ -20,18 +20,28 @@ TargetWordInsertionFeature::TargetWordInsertionFeature(const std::string &line)
|
||||
m_unrestricted(true)
|
||||
{
|
||||
std::cerr << "Initializing target word insertion feature.." << std::endl;
|
||||
|
||||
for (size_t i = 0; i < m_args.size(); ++i) {
|
||||
const vector<string> &args = m_args[i];
|
||||
|
||||
if (args[0] == "factor") {
|
||||
m_factorType = Scan<FactorType>(args[1]);
|
||||
} else if (args[0] == "path") {
|
||||
m_filename = args[1];
|
||||
size_t ind = 0;
|
||||
while (ind < m_args.size()) {
|
||||
vector<string> &args = m_args[ind];
|
||||
bool consumed = OverrideParameter(args[0], args[1]);
|
||||
if (consumed) {
|
||||
m_args.erase(m_args.begin() + ind);
|
||||
} else {
|
||||
throw "Unknown argument " + args[0];
|
||||
++ind;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool TargetWordInsertionFeature::OverrideParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "factor") {
|
||||
m_factorType = Scan<FactorType>(value);
|
||||
} else if (key == "path") {
|
||||
m_filename = value;
|
||||
} else {
|
||||
StatelessFeatureFunction::OverrideParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
void TargetWordInsertionFeature::Load()
|
||||
|
@ -37,6 +37,7 @@ public:
|
||||
const TargetPhrase& targetPhrase,
|
||||
ScoreComponentCollection* accumulator,
|
||||
const AlignmentInfo &alignmentInfo) const;
|
||||
bool OverrideParameter(const std::string& key, const std::string& value);
|
||||
|
||||
};
|
||||
|
||||
|
@ -27,33 +27,14 @@ WordTranslationFeature::WordTranslationFeature(const std::string &line)
|
||||
{
|
||||
std::cerr << "Initializing word translation feature.. " << endl;
|
||||
|
||||
string texttype;
|
||||
|
||||
for (size_t i = 0; i < m_args.size(); ++i) {
|
||||
const vector<string> &args = m_args[i];
|
||||
|
||||
if (args[0] == "input-factor") {
|
||||
m_factorTypeSource = Scan<FactorType>(args[1]);
|
||||
} else if (args[0] == "output-factor") {
|
||||
m_factorTypeTarget = Scan<FactorType>(args[1]);
|
||||
} else if (args[0] == "simple") {
|
||||
m_simple = Scan<bool>(args[1]);
|
||||
} else if (args[0] == "source-context") {
|
||||
m_sourceContext = Scan<bool>(args[1]);
|
||||
} else if (args[0] == "target-context") {
|
||||
m_targetContext = Scan<bool>(args[1]);
|
||||
} else if (args[0] == "ignore-punctuation") {
|
||||
m_ignorePunctuation = Scan<bool>(args[1]);
|
||||
} else if (args[0] == "domain-trigger") {
|
||||
m_domainTrigger = Scan<bool>(args[1]);
|
||||
} else if (args[0] == "texttype") {
|
||||
texttype = args[1];
|
||||
} else if (args[0] == "source-path") {
|
||||
m_filePathSource = args[1];
|
||||
} else if (args[0] == "target-path") {
|
||||
m_filePathTarget = args[1];
|
||||
size_t ind = 0;
|
||||
while (ind < m_args.size()) {
|
||||
vector<string> &args = m_args[ind];
|
||||
bool consumed = OverrideParameter(args[0], args[1]);
|
||||
if (consumed) {
|
||||
m_args.erase(m_args.begin() + ind);
|
||||
} else {
|
||||
throw "Unknown argument " + args[0];
|
||||
++ind;
|
||||
}
|
||||
}
|
||||
|
||||
@ -89,6 +70,33 @@ WordTranslationFeature::WordTranslationFeature(const std::string &line)
|
||||
|
||||
}
|
||||
|
||||
bool WordTranslationFeature::OverrideParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "input-factor") {
|
||||
m_factorTypeSource = Scan<FactorType>(value);
|
||||
} else if (key == "output-factor") {
|
||||
m_factorTypeTarget = Scan<FactorType>(value);
|
||||
} else if (key == "simple") {
|
||||
m_simple = Scan<bool>(value);
|
||||
} else if (key == "source-context") {
|
||||
m_sourceContext = Scan<bool>(value);
|
||||
} else if (key == "target-context") {
|
||||
m_targetContext = Scan<bool>(value);
|
||||
} else if (key == "ignore-punctuation") {
|
||||
m_ignorePunctuation = Scan<bool>(value);
|
||||
} else if (key == "domain-trigger") {
|
||||
m_domainTrigger = Scan<bool>(value);
|
||||
} else if (key == "texttype") {
|
||||
//texttype = value; TODO not used
|
||||
} else if (key == "source-path") {
|
||||
m_filePathSource = value;
|
||||
} else if (key == "target-path") {
|
||||
m_filePathTarget = value;
|
||||
} else {
|
||||
StatelessFeatureFunction::OverrideParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
void WordTranslationFeature::Load()
|
||||
{
|
||||
// load word list for restricted feature set
|
||||
|
@ -52,6 +52,7 @@ public:
|
||||
|
||||
void EvaluateChart(const ChartBasedFeatureContext& context,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
bool OverrideParameter(const std::string& key, const std::string& value);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -87,8 +87,8 @@ class FactorCollection
|
||||
|
||||
//! constructor. only the 1 static variable can be created
|
||||
FactorCollection()
|
||||
:m_factorId(0)
|
||||
{}
|
||||
:m_factorId(0) {
|
||||
}
|
||||
|
||||
public:
|
||||
static FactorCollection& Instance() {
|
||||
|
@ -62,8 +62,8 @@ protected:
|
||||
//! Usually <s> and </s>
|
||||
|
||||
LanguageModelImplementation(const std::string& description, const std::string &line)
|
||||
:LanguageModel(description, line)
|
||||
{}
|
||||
:LanguageModel(description, line) {
|
||||
}
|
||||
public:
|
||||
|
||||
virtual ~LanguageModelImplementation() {}
|
||||
|
@ -42,8 +42,8 @@ protected:
|
||||
FactorMask m_factorTypes;
|
||||
|
||||
LanguageModelMultiFactor(const std::string& description, const std::string &line)
|
||||
:LanguageModelImplementation(description, line)
|
||||
{}
|
||||
:LanguageModelImplementation(description, line) {
|
||||
}
|
||||
|
||||
public:
|
||||
virtual bool Load(const std::string &filePath
|
||||
|
@ -23,8 +23,8 @@ public:
|
||||
typedef count_t T; // type for ORLM filter
|
||||
LanguageModelORLM(const std::string &line)
|
||||
:LanguageModelSingleFactor("ORLM", line)
|
||||
,m_lm(0)
|
||||
{}
|
||||
,m_lm(0) {
|
||||
}
|
||||
bool Load(const std::string &filePath, FactorType factorType, size_t nGramOrder);
|
||||
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const;
|
||||
~LanguageModelORLM() {
|
||||
|
@ -70,8 +70,8 @@ private:
|
||||
|
||||
public:
|
||||
LanguageModelParallelBackoff(const std::string &line)
|
||||
:LanguageModelMultiFactor("ParallelBackoffLM", line)
|
||||
{}
|
||||
:LanguageModelMultiFactor("ParallelBackoffLM", line) {
|
||||
}
|
||||
|
||||
~LanguageModelParallelBackoff();
|
||||
|
||||
|
@ -46,8 +46,8 @@ class LanguageModelRandLM : public LanguageModelSingleFactor
|
||||
public:
|
||||
LanguageModelRandLM(const std::string &line)
|
||||
:LanguageModelSingleFactor("RandLM", line)
|
||||
, m_lm(0)
|
||||
{}
|
||||
, m_lm(0) {
|
||||
}
|
||||
bool Load(const std::string &filePath, FactorType factorType, size_t nGramOrder);
|
||||
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const;
|
||||
~LanguageModelRandLM() {
|
||||
|
@ -38,7 +38,7 @@ public:
|
||||
}
|
||||
LabelId add(const Key& k) {
|
||||
std::pair<typename M::iterator,bool> p
|
||||
=m.insert(std::make_pair(k,data.size()));
|
||||
=m.insert(std::make_pair(k,data.size()));
|
||||
if(p.second) data.push_back(k);
|
||||
CHECK(static_cast<size_t>(p.first->second)<data.size());
|
||||
return p.first->second;
|
||||
|
@ -40,18 +40,17 @@ protected:
|
||||
m_obj(p),
|
||||
useCache(1),
|
||||
totalE(0),
|
||||
distinctE(0)
|
||||
{
|
||||
distinctE(0) {
|
||||
m_numInputScores = 0;
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
m_inputFeature = staticData.GetInputFeature();
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
m_inputFeature = staticData.GetInputFeature();
|
||||
|
||||
if (m_inputFeature) {
|
||||
const PhraseDictionary *firstPt = staticData.GetPhraseDictionaries()[0];
|
||||
if (firstPt == m_obj) {
|
||||
m_numInputScores = m_inputFeature->GetNumScoreComponents();
|
||||
}
|
||||
}
|
||||
if (m_inputFeature) {
|
||||
const PhraseDictionary *firstPt = staticData.GetPhraseDictionaries()[0];
|
||||
if (firstPt == m_obj) {
|
||||
m_numInputScores = m_inputFeature->GetNumScoreComponents();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
@ -179,7 +178,7 @@ public:
|
||||
//sparse features.
|
||||
//These are already in log-space
|
||||
for (size_t j = 0; j < cands[i].fnames.size(); ++j) {
|
||||
targetPhrase.GetScoreBreakdown().Assign(m_obj, *cands[i].fnames[j], cands[i].fvalues[j]);
|
||||
targetPhrase.GetScoreBreakdown().Assign(m_obj, *cands[i].fnames[j], cands[i].fvalues[j]);
|
||||
}
|
||||
|
||||
CreateTargetPhrase(targetPhrase,factorStrings,scoreVector, Scores(0), &wacands[i], &src);
|
||||
@ -294,7 +293,7 @@ public:
|
||||
}
|
||||
|
||||
if (m_numInputScores) {
|
||||
targetPhrase.GetScoreBreakdown().Assign(m_inputFeature, inputVector);
|
||||
targetPhrase.GetScoreBreakdown().Assign(m_inputFeature, inputVector);
|
||||
}
|
||||
|
||||
targetPhrase.GetScoreBreakdown().Assign(m_obj, transVector);
|
||||
@ -452,9 +451,9 @@ public:
|
||||
|
||||
//put in phrase table scores, logging as we insert
|
||||
std::transform(tcands[i].scores.begin()
|
||||
,tcands[i].scores.end()
|
||||
,transcores.begin()
|
||||
,TransformScore);
|
||||
,tcands[i].scores.end()
|
||||
,transcores.begin()
|
||||
,TransformScore);
|
||||
|
||||
|
||||
//tally up
|
||||
@ -514,11 +513,11 @@ public:
|
||||
TScores const & scores=j->second;
|
||||
TargetPhrase targetPhrase;
|
||||
CreateTargetPhrase(targetPhrase
|
||||
, j ->first
|
||||
, scores.transScore
|
||||
, scores.inputScores
|
||||
, NULL
|
||||
, scores.src);
|
||||
, j ->first
|
||||
, scores.transScore
|
||||
, scores.inputScores
|
||||
, NULL
|
||||
, scores.src);
|
||||
costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
|
||||
tCands.push_back(targetPhrase);
|
||||
//std::cerr << i->first.first << "-" << i->first.second << ": " << targetPhrase << std::endl;
|
||||
|
@ -43,8 +43,8 @@ public:
|
||||
TranslationDimension(std::size_t pos,
|
||||
const std::vector<TargetPhrase*> &orderedTargetPhrases)
|
||||
: m_pos(pos)
|
||||
, m_orderedTargetPhrases(&orderedTargetPhrases)
|
||||
{}
|
||||
, m_orderedTargetPhrases(&orderedTargetPhrases) {
|
||||
}
|
||||
|
||||
std::size_t IncrementPos() {
|
||||
return m_pos++;
|
||||
@ -80,8 +80,8 @@ class HypothesisDimension
|
||||
public:
|
||||
HypothesisDimension(std::size_t pos, const HypoList &orderedHypos)
|
||||
: m_pos(pos)
|
||||
, m_orderedHypos(&orderedHypos)
|
||||
{}
|
||||
, m_orderedHypos(&orderedHypos) {
|
||||
}
|
||||
|
||||
std::size_t IncrementPos() {
|
||||
return m_pos++;
|
||||
|
@ -91,8 +91,8 @@ public:
|
||||
|
||||
//! Clone a score collection
|
||||
ScoreComponentCollection(const ScoreComponentCollection& rhs)
|
||||
: m_scores(rhs.m_scores)
|
||||
{}
|
||||
: m_scores(rhs.m_scores) {
|
||||
}
|
||||
|
||||
ScoreComponentCollection& operator=( const ScoreComponentCollection& rhs ) {
|
||||
m_scores = rhs.m_scores;
|
||||
|
@ -38,8 +38,8 @@ public:
|
||||
virtual void EvaluateChart(const ChartBasedFeatureContext&, ScoreComponentCollection*) const {}
|
||||
virtual void Evaluate(const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{ }
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {
|
||||
}
|
||||
};
|
||||
|
||||
class MockSingleFeature : public MockStatelessFeatureFunction
|
||||
|
@ -938,7 +938,7 @@ const TranslationOptionList* StaticData::FindTransOptListInCache(const DecodeGra
|
||||
boost::mutex::scoped_lock lock(m_transOptCacheMutex);
|
||||
#endif
|
||||
std::map<std::pair<std::pair<size_t, std::string>, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter
|
||||
= m_transOptCache.find(key);
|
||||
= m_transOptCache.find(key);
|
||||
if (iter == m_transOptCache.end())
|
||||
return NULL;
|
||||
iter->second.second = clock(); // update last used time
|
||||
@ -1296,19 +1296,19 @@ void StaticData::OverrideFeatures()
|
||||
{
|
||||
const PARAM_VEC ¶ms = m_parameter->GetParam("feature-overwrite");
|
||||
for (size_t i = 0; i < params.size(); ++i) {
|
||||
const string &str = params[i];
|
||||
vector<string> toks = Tokenize(str);
|
||||
CHECK(toks.size() > 1);
|
||||
const string &str = params[i];
|
||||
vector<string> toks = Tokenize(str);
|
||||
CHECK(toks.size() > 1);
|
||||
|
||||
FeatureFunction &ff = FeatureFunction::FindFeatureFunction(toks[0]);
|
||||
FeatureFunction &ff = FeatureFunction::FindFeatureFunction(toks[0]);
|
||||
|
||||
for (size_t j = 1; j < toks.size(); ++j) {
|
||||
const string &keyValStr = toks[j];
|
||||
vector<string> keyVal = Tokenize(keyValStr, "=");
|
||||
CHECK(keyVal.size() == 2);
|
||||
ff.OverrideParameter(keyVal[0], keyVal[1]);
|
||||
for (size_t j = 1; j < toks.size(); ++j) {
|
||||
const string &keyValStr = toks[j];
|
||||
vector<string> keyVal = Tokenize(keyValStr, "=");
|
||||
CHECK(keyVal.size() == 2);
|
||||
ff.OverrideParameter(keyVal[0], keyVal[1]);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -673,7 +673,7 @@ public:
|
||||
return false;
|
||||
}
|
||||
std::map< std::string, std::set< std::string > >::const_iterator lookupIgnoreFF
|
||||
= m_weightSettingIgnoreFF.find( m_currentWeightSetting );
|
||||
= m_weightSettingIgnoreFF.find( m_currentWeightSetting );
|
||||
if (lookupIgnoreFF == m_weightSettingIgnoreFF.end()) {
|
||||
return false;
|
||||
}
|
||||
@ -691,7 +691,7 @@ public:
|
||||
return false;
|
||||
}
|
||||
std::map< std::string, std::set< size_t > >::const_iterator lookupIgnoreDP
|
||||
= m_weightSettingIgnoreDP.find( m_currentWeightSetting );
|
||||
= m_weightSettingIgnoreDP.find( m_currentWeightSetting );
|
||||
if (lookupIgnoreDP == m_weightSettingIgnoreDP.end()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -20,8 +20,8 @@ public:
|
||||
std::vector<wordID_t> words;
|
||||
|
||||
SAPhrase(size_t phraseSize)
|
||||
:words(phraseSize)
|
||||
{}
|
||||
:words(phraseSize) {
|
||||
}
|
||||
|
||||
void SetId(size_t pos, wordID_t id) {
|
||||
CHECK(pos < words.size());
|
||||
@ -43,8 +43,8 @@ public:
|
||||
, m_endTarget(endTarget)
|
||||
, m_startSource(startSource)
|
||||
, m_endSource(endSource)
|
||||
, m_sntIndex(sntIndex)
|
||||
{}
|
||||
, m_sntIndex(sntIndex) {
|
||||
}
|
||||
|
||||
size_t GetTargetSize() const {
|
||||
return m_endTarget - m_startTarget + 1;
|
||||
|
@ -234,12 +234,12 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
|
||||
|
||||
std::vector<float> weightT = staticData.GetWeights(&m_dictionary);
|
||||
targetPhraseCollection
|
||||
= tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec
|
||||
,m_outputFactorsVec
|
||||
,m_dictionary
|
||||
,weightT
|
||||
,m_filePath
|
||||
, m_dbWrapper.GetVocab());
|
||||
= tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec
|
||||
,m_outputFactorsVec
|
||||
,m_dictionary
|
||||
,weightT
|
||||
,m_filePath
|
||||
, m_dbWrapper.GetVocab());
|
||||
|
||||
delete tpcollBerkeleyDb;
|
||||
m_cache[tpCollFilePos] = targetPhraseCollection;
|
||||
|
@ -83,8 +83,8 @@ public:
|
||||
}
|
||||
|
||||
DottedRuleColl(size_t size)
|
||||
: m_coll(size)
|
||||
{}
|
||||
: m_coll(size) {
|
||||
}
|
||||
|
||||
~DottedRuleColl();
|
||||
|
||||
|
@ -428,7 +428,7 @@ void CompressionTaskReordering::operator()()
|
||||
while(scoresNum < m_encodedScores.size()) {
|
||||
std::string scores = m_encodedScores[scoresNum];
|
||||
std::string compressedScores
|
||||
= m_creator.CompressEncodedScores(scores);
|
||||
= m_creator.CompressEncodedScores(scores);
|
||||
|
||||
std::string dummy;
|
||||
PackedItem packedItem(scoresNum, dummy, compressedScores, 0);
|
||||
|
@ -57,26 +57,26 @@ public:
|
||||
MmapAllocator() throw()
|
||||
: m_file_ptr(std::tmpfile()), m_file_desc(fileno(m_file_ptr)),
|
||||
m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
|
||||
m_data_offset(0), m_fixed(false), m_count(new size_t(0))
|
||||
{ }
|
||||
m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
|
||||
}
|
||||
|
||||
MmapAllocator(std::FILE* f_ptr) throw()
|
||||
: m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)),
|
||||
m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
|
||||
m_data_offset(0), m_fixed(false), m_count(new size_t(0))
|
||||
{ }
|
||||
m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
|
||||
}
|
||||
|
||||
MmapAllocator(std::FILE* f_ptr, size_t data_offset) throw()
|
||||
: m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)),
|
||||
m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
|
||||
m_data_offset(data_offset), m_fixed(true), m_count(new size_t(0))
|
||||
{ }
|
||||
m_data_offset(data_offset), m_fixed(true), m_count(new size_t(0)) {
|
||||
}
|
||||
|
||||
MmapAllocator(std::string fileName) throw()
|
||||
: m_file_ptr(std::fopen(fileName.c_str(), "wb+")), m_file_desc(fileno(m_file_ptr)),
|
||||
m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
|
||||
m_data_offset(0), m_fixed(false), m_count(new size_t(0))
|
||||
{ }
|
||||
m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
|
||||
}
|
||||
|
||||
MmapAllocator(const MmapAllocator& c) throw()
|
||||
: m_file_ptr(c.m_file_ptr), m_file_desc(c.m_file_desc),
|
||||
|
@ -61,7 +61,7 @@ PhraseDecoder::~PhraseDecoder()
|
||||
inline unsigned PhraseDecoder::GetSourceSymbolId(std::string& symbol)
|
||||
{
|
||||
boost::unordered_map<std::string, unsigned>::iterator it
|
||||
= m_sourceSymbolsMap.find(symbol);
|
||||
= m_sourceSymbolsMap.find(symbol);
|
||||
if(it != m_sourceSymbolsMap.end())
|
||||
return it->second;
|
||||
|
||||
@ -200,7 +200,7 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &
|
||||
|
||||
if(m_coding == PREnc) {
|
||||
std::pair<TargetPhraseVectorPtr, size_t> cachedPhraseColl
|
||||
= m_decodingCache.Retrieve(sourcePhrase);
|
||||
= m_decodingCache.Retrieve(sourcePhrase);
|
||||
|
||||
// Has been cached and is complete or does not need to be completed
|
||||
if(cachedPhraseColl.first != NULL && (!topLevel || cachedPhraseColl.second == 0))
|
||||
@ -255,7 +255,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
|
||||
if(m_coding == REnc) {
|
||||
for(size_t i = 0; i < sourcePhrase.GetSize(); i++) {
|
||||
std::string sourceWord
|
||||
= sourcePhrase.GetWord(i).GetString(*m_input, false);
|
||||
= sourcePhrase.GetWord(i).GetString(*m_input, false);
|
||||
unsigned idx = GetSourceSymbolId(sourceWord);
|
||||
sourceWords.push_back(idx);
|
||||
}
|
||||
|
@ -106,7 +106,7 @@ PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) c
|
||||
|
||||
// Retrieve target phrase collection from phrase table
|
||||
TargetPhraseVectorPtr decodedPhraseColl
|
||||
= m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
|
||||
= m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
|
||||
|
||||
if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
|
||||
TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
|
||||
|
@ -426,7 +426,7 @@ void PhraseTableCreator::AddTargetSymbolId(std::string& symbol)
|
||||
unsigned PhraseTableCreator::GetSourceSymbolId(std::string& symbol)
|
||||
{
|
||||
boost::unordered_map<std::string, unsigned>::iterator it
|
||||
= m_sourceSymbolsMap.find(symbol);
|
||||
= m_sourceSymbolsMap.find(symbol);
|
||||
|
||||
if(it != m_sourceSymbolsMap.end())
|
||||
return it->second;
|
||||
@ -437,7 +437,7 @@ unsigned PhraseTableCreator::GetSourceSymbolId(std::string& symbol)
|
||||
unsigned PhraseTableCreator::GetTargetSymbolId(std::string& symbol)
|
||||
{
|
||||
boost::unordered_map<std::string, unsigned>::iterator it
|
||||
= m_targetSymbolsMap.find(symbol);
|
||||
= m_targetSymbolsMap.find(symbol);
|
||||
|
||||
if(it != m_targetSymbolsMap.end())
|
||||
return it->second;
|
||||
@ -451,7 +451,7 @@ unsigned PhraseTableCreator::GetOrAddTargetSymbolId(std::string& symbol)
|
||||
boost::mutex::scoped_lock lock(m_mutex);
|
||||
#endif
|
||||
boost::unordered_map<std::string, unsigned>::iterator it
|
||||
= m_targetSymbolsMap.find(symbol);
|
||||
= m_targetSymbolsMap.find(symbol);
|
||||
|
||||
if(it != m_targetSymbolsMap.end())
|
||||
return it->second;
|
||||
@ -1200,7 +1200,7 @@ void CompressionTask::operator()()
|
||||
while(collectionNum < m_encodedCollections.size()) {
|
||||
std::string collection = m_encodedCollections[collectionNum];
|
||||
std::string compressedCollection
|
||||
= m_creator.CompressEncodedCollection(collection);
|
||||
= m_creator.CompressEncodedCollection(collection);
|
||||
|
||||
std::string dummy;
|
||||
PackedItem packedItem(collectionNum, dummy, compressedCollection, 0);
|
||||
|
@ -143,7 +143,7 @@ public:
|
||||
return data;
|
||||
else {
|
||||
typename std::vector<DataType>::iterator it
|
||||
= std::lower_bound(m_bestVec.begin(), m_bestVec.end(), data);
|
||||
= std::lower_bound(m_bestVec.begin(), m_bestVec.end(), data);
|
||||
if(it != m_bestVec.end())
|
||||
return *it;
|
||||
else
|
||||
|
@ -75,8 +75,8 @@ public:
|
||||
typedef CacheMap::const_iterator const_iterator;
|
||||
|
||||
TargetPhraseCollectionCache(size_t max = 5000, float tolerance = 0.2)
|
||||
: m_max(max), m_tolerance(tolerance)
|
||||
{}
|
||||
: m_max(max), m_tolerance(tolerance) {
|
||||
}
|
||||
|
||||
iterator Begin() {
|
||||
return m_phraseCache.begin();
|
||||
|
@ -338,7 +338,7 @@ template<typename T>
|
||||
const void* OnlineRLM<T>::getContext(const wordID_t* ngram, int len)
|
||||
{
|
||||
int dummy(0);
|
||||
float* *addresses = new float*[len]; // only interested in addresses of cache
|
||||
float**addresses = new float*[len]; // only interested in addresses of cache
|
||||
CHECK(cache_->getCache2(ngram, len, &addresses[0], &dummy) == len);
|
||||
// return address of cache node
|
||||
|
||||
|
@ -32,24 +32,18 @@ namespace Moses
|
||||
|
||||
PhraseDictionary::PhraseDictionary(const std::string &description, const std::string &line)
|
||||
:DecodeFeature(description, line)
|
||||
,m_tableLimit(20) // default
|
||||
{
|
||||
m_tableLimit= 20; // TODO default?
|
||||
|
||||
for (size_t i = 0; i < m_args.size(); ++i) {
|
||||
const vector<string> &args = m_args[i];
|
||||
|
||||
if (args[0] == "path") {
|
||||
m_filePath = args[1];
|
||||
} else if (args[0] == "table-limit") {
|
||||
m_tableLimit = Scan<size_t>(args[1]);
|
||||
} else if (args[0] == "target-path") {
|
||||
m_targetFile = args[1];
|
||||
} else if (args[0] == "alignment-path") {
|
||||
m_alignmentsFile = args[1];
|
||||
size_t ind = 0;
|
||||
while (ind < m_args.size()) {
|
||||
vector<string> &args = m_args[ind];
|
||||
bool consumed = OverrideParameter(args[0], args[1]);
|
||||
if (consumed) {
|
||||
m_args.erase(m_args.begin() + ind);
|
||||
} else {
|
||||
//throw "Unknown argument " + args[0];
|
||||
++ind;
|
||||
}
|
||||
} // for (size_t i = 0; i < toks.size(); ++i) {
|
||||
}
|
||||
|
||||
// find out which feature function can be applied in this decode step
|
||||
const std::vector<FeatureFunction*> &allFeatures = FeatureFunction::GetFeatureFunctions();
|
||||
@ -72,11 +66,16 @@ GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const
|
||||
|
||||
bool PhraseDictionary::OverrideParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "table-limit") {
|
||||
m_tableLimit = Scan<size_t>(value);
|
||||
}
|
||||
else {
|
||||
return DecodeFeature::OverrideParameter(key, value);
|
||||
if (key == "path") {
|
||||
m_filePath = value;
|
||||
} else if (key == "table-limit") {
|
||||
m_tableLimit = Scan<size_t>(value);
|
||||
} else if (key == "target-path") {
|
||||
m_targetFile = value;
|
||||
} else if (key == "alignment-path") {
|
||||
m_alignmentsFile = value;
|
||||
} else {
|
||||
return DecodeFeature::OverrideParameter(key, value);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -56,8 +56,8 @@ class PhraseDictionary : public DecodeFeature
|
||||
public:
|
||||
PhraseDictionary(const std::string &description, const std::string &line);
|
||||
|
||||
virtual ~PhraseDictionary()
|
||||
{}
|
||||
virtual ~PhraseDictionary() {
|
||||
}
|
||||
|
||||
//! table limit number.
|
||||
size_t GetTableLimit() const {
|
||||
@ -70,11 +70,11 @@ public:
|
||||
virtual const TargetPhraseCollection *GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const;
|
||||
|
||||
//! Create entry for translation of source to targetPhrase
|
||||
virtual void InitializeForInput(InputType const& source)
|
||||
{}
|
||||
virtual void InitializeForInput(InputType const& source) {
|
||||
}
|
||||
// clean up temporary memory, called after processing each sentence
|
||||
virtual void CleanUpAfterSentenceProcessing(const InputType& source)
|
||||
{}
|
||||
virtual void CleanUpAfterSentenceProcessing(const InputType& source) {
|
||||
}
|
||||
|
||||
//! Create a sentence-specific manager for SCFG rule lookup.
|
||||
virtual ChartRuleLookupManager *CreateRuleLookupManager(
|
||||
|
@ -12,17 +12,14 @@ PhraseDictionaryDynSuffixArray::PhraseDictionaryDynSuffixArray(const std::string
|
||||
:PhraseDictionary("PhraseDictionaryDynSuffixArray", line)
|
||||
,m_biSA(new BilingualDynSuffixArray())
|
||||
{
|
||||
|
||||
for (size_t i = 0; i < m_args.size(); ++i) {
|
||||
const vector<string> &args = m_args[i];
|
||||
if (args[0] == "source") {
|
||||
m_source = args[1];
|
||||
} else if (args[0] == "target") {
|
||||
m_target = args[1];
|
||||
} else if (args[0] == "alignment") {
|
||||
m_alignments = args[1];
|
||||
size_t ind = 0;
|
||||
while (ind < m_args.size()) {
|
||||
vector<string> &args = m_args[ind];
|
||||
bool consumed = OverrideParameter(args[0], args[1]);
|
||||
if (consumed) {
|
||||
m_args.erase(m_args.begin() + ind);
|
||||
} else {
|
||||
//throw "Unknown argument " + args[0];
|
||||
++ind;
|
||||
}
|
||||
}
|
||||
|
||||
@ -80,4 +77,17 @@ ChartRuleLookupManager *PhraseDictionaryDynSuffixArray::CreateRuleLookupManager(
|
||||
throw "Chart decoding not supported by PhraseDictionaryDynSuffixArray";
|
||||
}
|
||||
|
||||
bool PhraseDictionaryDynSuffixArray::OverrideParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "source") {
|
||||
m_source = value;
|
||||
} else if (key == "target") {
|
||||
m_target = value;
|
||||
} else if (key == "alignment") {
|
||||
m_alignments = value;
|
||||
} else {
|
||||
PhraseDictionary::OverrideParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
}// end namepsace
|
||||
|
@ -25,6 +25,9 @@ public:
|
||||
void insertSnt(string&, string&, string&);
|
||||
void deleteSnt(unsigned, unsigned);
|
||||
ChartRuleLookupManager *CreateRuleLookupManager(const InputType&, const ChartCellCollectionBase&);
|
||||
|
||||
bool OverrideParameter(const std::string& key, const std::string& value);
|
||||
|
||||
private:
|
||||
BilingualDynSuffixArray *m_biSA;
|
||||
std::string m_source, m_target, m_alignments;
|
||||
|
@ -38,13 +38,13 @@ class PhraseDictionaryMemory : public RuleTableTrie
|
||||
|
||||
protected:
|
||||
PhraseDictionaryMemory(const std::string &description, const std::string &line)
|
||||
: RuleTableTrie(description, line)
|
||||
{}
|
||||
: RuleTableTrie(description, line) {
|
||||
}
|
||||
|
||||
public:
|
||||
PhraseDictionaryMemory(const std::string &line)
|
||||
: RuleTableTrie("PhraseDictionaryMemory", line)
|
||||
{}
|
||||
: RuleTableTrie("PhraseDictionaryMemory", line) {
|
||||
}
|
||||
|
||||
const PhraseDictionaryNodeMemory &GetRootNode() const {
|
||||
return m_collection;
|
||||
|
@ -28,22 +28,16 @@ namespace Moses
|
||||
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
|
||||
:PhraseDictionary("PhraseDictionaryMultiModel", line)
|
||||
{
|
||||
for (size_t i = 0; i < m_args.size(); ++i) {
|
||||
const vector<string> &args = m_args[i];
|
||||
if (args[0] == "mode") {
|
||||
m_mode =args[1];
|
||||
if (m_mode != "interpolate") {
|
||||
ostringstream msg;
|
||||
msg << "combination mode unknown: " << m_mode;
|
||||
throw runtime_error(msg.str());
|
||||
}
|
||||
} else if (args[0] == "components") {
|
||||
m_pdStr = Tokenize(args[1], ",");
|
||||
m_numModels = m_pdStr.size();
|
||||
} else if (args[0] == "lambda") {
|
||||
m_multimodelweights = Tokenize<float>(args[1], ",");
|
||||
size_t ind = 0;
|
||||
while (ind < m_args.size()) {
|
||||
vector<string> &args = m_args[ind];
|
||||
bool consumed = OverrideParameter(args[0], args[1]);
|
||||
if (consumed) {
|
||||
m_args.erase(m_args.begin() + ind);
|
||||
} else {
|
||||
++ind;
|
||||
}
|
||||
} // for
|
||||
}
|
||||
|
||||
size_t numWeights = m_numScoreComponents;
|
||||
if (m_mode == "interpolate") {
|
||||
@ -55,15 +49,16 @@ PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &line)
|
||||
PhraseDictionaryMultiModel::PhraseDictionaryMultiModel(const std::string &description, const std::string &line)
|
||||
:PhraseDictionary(description, line)
|
||||
{
|
||||
for (size_t i = 0; i < m_args.size(); ++i) {
|
||||
const vector<string> &args = m_args[i];
|
||||
if (args[0] == "components") {
|
||||
m_pdStr = Tokenize(args[1], ",");
|
||||
m_numModels = m_pdStr.size();
|
||||
} else if (args[0] == "lambda") {
|
||||
m_multimodelweights = Tokenize<float>(args[1], ",");
|
||||
size_t ind = 0;
|
||||
while (ind < m_args.size()) {
|
||||
vector<string> &args = m_args[ind];
|
||||
bool consumed = OverrideParameter(args[0], args[1]);
|
||||
if (consumed) {
|
||||
m_args.erase(m_args.begin() + ind);
|
||||
} else {
|
||||
++ind;
|
||||
}
|
||||
} // for
|
||||
}
|
||||
|
||||
if (description == "PhraseDictionaryMultiModelCounts") {
|
||||
CHECK(m_pdStr.size() == m_multimodelweights.size() || m_pdStr.size()*4 == m_multimodelweights.size());
|
||||
@ -329,6 +324,24 @@ void PhraseDictionaryMultiModel::CleanUpComponentModels(const InputType &source
|
||||
}
|
||||
}
|
||||
|
||||
bool PhraseDictionaryMultiModel::OverrideParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "mode") {
|
||||
m_mode = value;
|
||||
if (m_mode != "interpolate") {
|
||||
ostringstream msg;
|
||||
msg << "combination mode unknown: " << m_mode;
|
||||
throw runtime_error(msg.str());
|
||||
}
|
||||
} else if (key == "components") {
|
||||
m_pdStr = Tokenize(value, ",");
|
||||
m_numModels = m_pdStr.size();
|
||||
} else if (key == "lambda") {
|
||||
m_multimodelweights = Tokenize<float>(value, ",");
|
||||
} else {
|
||||
PhraseDictionary::OverrideParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef WITH_DLIB
|
||||
vector<float> PhraseDictionaryMultiModel::MinimizePerplexity(vector<pair<string, string> > &phrase_pair_vector)
|
||||
|
@ -80,6 +80,7 @@ public:
|
||||
/* Don't do anything source specific here as this object is shared between threads.*/
|
||||
}
|
||||
ChartRuleLookupManager *CreateRuleLookupManager(const InputType&, const ChartCellCollectionBase&);
|
||||
bool OverrideParameter(const std::string& key, const std::string& value);
|
||||
|
||||
protected:
|
||||
std::string m_mode;
|
||||
|
@ -68,69 +68,16 @@ PhraseDictionaryMultiModelCounts::PhraseDictionaryMultiModelCounts(const std::st
|
||||
//m_mode = "interpolate";
|
||||
//m_combineFunction = LinearInterpolationFromCounts;
|
||||
|
||||
for (size_t i = 0; i < m_args.size(); ++i) {
|
||||
const vector<string> &args = m_args[i];
|
||||
if (args[0] == "mode") {
|
||||
m_mode = args[1];
|
||||
if (m_mode == "instance_weighting")
|
||||
m_combineFunction = InstanceWeighting;
|
||||
else if (m_mode == "interpolate") {
|
||||
m_combineFunction = LinearInterpolationFromCounts;
|
||||
} else {
|
||||
ostringstream msg;
|
||||
msg << "combination mode unknown: " << m_mode;
|
||||
throw runtime_error(msg.str());
|
||||
}
|
||||
|
||||
} else if (args[0] == "lex-e2f") {
|
||||
m_lexE2FStr = Tokenize(args[1], ",");
|
||||
CHECK(m_lexE2FStr.size() == m_pdStr.size());
|
||||
} else if (args[0] == "lex-f2e") {
|
||||
m_lexF2EStr = Tokenize(args[1], ",");
|
||||
CHECK(m_lexF2EStr.size() == m_pdStr.size());
|
||||
}
|
||||
|
||||
else if (args[0] == "target-table") {
|
||||
m_targetTable = Tokenize(args[1], ",");
|
||||
CHECK(m_targetTable.size() == m_pdStr.size());
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // for
|
||||
|
||||
}
|
||||
|
||||
bool PhraseDictionaryMultiModelCounts::OverrideParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "mode") {
|
||||
m_mode = value;
|
||||
if (m_mode == "instance_weighting")
|
||||
m_combineFunction = InstanceWeighting;
|
||||
else if (m_mode == "interpolate") {
|
||||
m_combineFunction = LinearInterpolationFromCounts;
|
||||
} else {
|
||||
ostringstream msg;
|
||||
msg << "combination mode unknown: " << m_mode;
|
||||
throw runtime_error(msg.str());
|
||||
}
|
||||
|
||||
} else if (key == "lex-e2f") {
|
||||
m_lexE2FStr = Tokenize(value, ",");
|
||||
CHECK(m_lexE2FStr.size() == m_pdStr.size());
|
||||
} else if (key == "lex-f2e") {
|
||||
m_lexF2EStr = Tokenize(value, ",");
|
||||
CHECK(m_lexF2EStr.size() == m_pdStr.size());
|
||||
}
|
||||
|
||||
else if (key == "target-table") {
|
||||
m_targetTable = Tokenize(value, ",");
|
||||
CHECK(m_targetTable.size() == m_pdStr.size());
|
||||
}
|
||||
|
||||
else {
|
||||
PhraseDictionaryMultiModel::OverrideParameter(key, value);
|
||||
size_t ind = 0;
|
||||
while (ind < m_args.size()) {
|
||||
vector<string> &args = m_args[ind];
|
||||
bool consumed = OverrideParameter(args[0], args[1]);
|
||||
if (consumed) {
|
||||
m_args.erase(m_args.begin() + ind);
|
||||
} else {
|
||||
++ind;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PhraseDictionaryMultiModelCounts::~PhraseDictionaryMultiModelCounts()
|
||||
@ -742,5 +689,36 @@ double LinearInterpolationFromCounts(vector<float> &joint_counts, vector<float>
|
||||
return p_weighted;
|
||||
}
|
||||
|
||||
bool PhraseDictionaryMultiModelCounts::OverrideParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "mode") {
|
||||
m_mode = value;
|
||||
if (m_mode == "instance_weighting")
|
||||
m_combineFunction = InstanceWeighting;
|
||||
else if (m_mode == "interpolate") {
|
||||
m_combineFunction = LinearInterpolationFromCounts;
|
||||
} else {
|
||||
ostringstream msg;
|
||||
msg << "combination mode unknown: " << m_mode;
|
||||
throw runtime_error(msg.str());
|
||||
}
|
||||
|
||||
} else if (key == "lex-e2f") {
|
||||
m_lexE2FStr = Tokenize(value, ",");
|
||||
CHECK(m_lexE2FStr.size() == m_pdStr.size());
|
||||
} else if (key == "lex-f2e") {
|
||||
m_lexF2EStr = Tokenize(value, ",");
|
||||
CHECK(m_lexF2EStr.size() == m_pdStr.size());
|
||||
}
|
||||
|
||||
else if (key == "target-table") {
|
||||
m_targetTable = Tokenize(value, ",");
|
||||
CHECK(m_targetTable.size() == m_pdStr.size());
|
||||
}
|
||||
|
||||
else {
|
||||
PhraseDictionaryMultiModel::OverrideParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
} //namespace
|
||||
|
@ -126,8 +126,8 @@ protected:
|
||||
TargetPhraseCollection *m_targetPhraseCollection;
|
||||
|
||||
PhraseDictionaryNodeMemory()
|
||||
:m_targetPhraseCollection(NULL)
|
||||
{}
|
||||
:m_targetPhraseCollection(NULL) {
|
||||
}
|
||||
public:
|
||||
virtual ~PhraseDictionaryNodeMemory();
|
||||
|
||||
|
@ -41,8 +41,8 @@ public:
|
||||
TgtCand(const IPhrase& a, const Scores& b , const std::string& alignment)
|
||||
: e(a)
|
||||
, sc(b)
|
||||
, m_alignment(alignment)
|
||||
{}
|
||||
, m_alignment(alignment) {
|
||||
}
|
||||
|
||||
TgtCand(const IPhrase& a,const Scores& b) : e(a),sc(b) {}
|
||||
|
||||
|
@ -87,10 +87,5 @@ void PhraseDictionaryOnDisk::InitializeForInput(InputType const& source)
|
||||
return;
|
||||
}
|
||||
|
||||
void PhraseDictionaryOnDisk::CleanUpAfterSentenceProcessing(InputType const& source)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -49,8 +49,8 @@ protected:
|
||||
|
||||
public:
|
||||
PhraseDictionaryOnDisk(const std::string &line)
|
||||
: MyBase("PhraseDictionaryOnDisk", line)
|
||||
{}
|
||||
: MyBase("PhraseDictionaryOnDisk", line) {
|
||||
}
|
||||
|
||||
virtual ~PhraseDictionaryOnDisk();
|
||||
|
||||
@ -67,7 +67,6 @@ public:
|
||||
const ChartCellCollectionBase &);
|
||||
|
||||
virtual void InitializeForInput(InputType const& source);
|
||||
virtual void CleanUpAfterSentenceProcessing(InputType const& source);
|
||||
|
||||
};
|
||||
|
||||
|
@ -41,8 +41,8 @@ class RuleTableTrie : public PhraseDictionary
|
||||
{
|
||||
public:
|
||||
RuleTableTrie(const std::string &description, const std::string &line)
|
||||
: PhraseDictionary(description, line)
|
||||
{}
|
||||
: PhraseDictionary(description, line) {
|
||||
}
|
||||
|
||||
virtual ~RuleTableTrie();
|
||||
|
||||
|
@ -45,8 +45,8 @@ class RuleTableUTrie : public RuleTableTrie
|
||||
{
|
||||
public:
|
||||
RuleTableUTrie(const std::string &line)
|
||||
: RuleTableTrie("RuleTableUTrie", line)
|
||||
{}
|
||||
: RuleTableTrie("RuleTableUTrie", line) {
|
||||
}
|
||||
|
||||
const UTrieNode &GetRootNode() const {
|
||||
return m_root;
|
||||
|
@ -25,8 +25,8 @@ public:
|
||||
int max_cost;
|
||||
int internal_cost;
|
||||
Match( int is, int ie, int ts, int te, int min, int max, int i )
|
||||
:input_start(is), input_end(ie), tm_start(ts), tm_end(te), min_cost(min), max_cost(max), internal_cost(i)
|
||||
{}
|
||||
:input_start(is), input_end(ie), tm_start(ts), tm_end(te), min_cost(min), max_cost(max), internal_cost(i) {
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -21,8 +21,8 @@ struct SentenceAlignment {
|
||||
std::vector< WORD_ID > target;
|
||||
std::vector< std::pair<int,int> > alignment;
|
||||
|
||||
SentenceAlignment()
|
||||
{}
|
||||
SentenceAlignment() {
|
||||
}
|
||||
|
||||
std::string getTargetString(const Vocabulary &vocab) const;
|
||||
|
||||
|
@ -17,8 +17,8 @@ public:
|
||||
|
||||
XMLParseOutput(const std::string &label, const WordsRange &range)
|
||||
: m_label(label)
|
||||
, m_range(range)
|
||||
{}
|
||||
, m_range(range) {
|
||||
}
|
||||
};
|
||||
|
||||
/** An input to the decoder that represent a parse tree.
|
||||
@ -45,8 +45,8 @@ protected:
|
||||
bool ProcessAndStripXMLTags(std::string &line, std::vector<XMLParseOutput> &sourceLabels, std::vector<XmlOption*> &res);
|
||||
|
||||
public:
|
||||
TreeInput()
|
||||
{}
|
||||
TreeInput() {
|
||||
}
|
||||
|
||||
InputTypeEnum GetType() const {
|
||||
return TreeInputType;
|
||||
|
@ -108,27 +108,27 @@ enum DistortionOrientationOptions {
|
||||
|
||||
enum PhraseTableImplementation {
|
||||
Memory = 0
|
||||
,Binary = 1
|
||||
,OnDisk = 2
|
||||
//,GlueRule = 3
|
||||
//,Joshua = 4
|
||||
//,MemorySourceLabel = 5
|
||||
,SCFG = 6
|
||||
//,BerkeleyDb = 7
|
||||
,SuffixArray = 8
|
||||
,Hiero = 9
|
||||
,ALSuffixArray = 10
|
||||
,FuzzyMatch = 11
|
||||
,Compact = 12
|
||||
,Interpolated = 13
|
||||
,Binary = 1
|
||||
,OnDisk = 2
|
||||
//,GlueRule = 3
|
||||
//,Joshua = 4
|
||||
//,MemorySourceLabel = 5
|
||||
,SCFG = 6
|
||||
//,BerkeleyDb = 7
|
||||
,SuffixArray = 8
|
||||
,Hiero = 9
|
||||
,ALSuffixArray = 10
|
||||
,FuzzyMatch = 11
|
||||
,Compact = 12
|
||||
,Interpolated = 13
|
||||
};
|
||||
|
||||
enum InputTypeEnum {
|
||||
SentenceInput = 0
|
||||
,ConfusionNetworkInput = 1
|
||||
,WordLatticeInput = 2
|
||||
,TreeInputType = 3
|
||||
,WordLatticeInput2 = 4
|
||||
,ConfusionNetworkInput = 1
|
||||
,WordLatticeInput = 2
|
||||
,TreeInputType = 3
|
||||
,WordLatticeInput2 = 4
|
||||
|
||||
};
|
||||
|
||||
@ -141,7 +141,7 @@ enum XmlInputType {
|
||||
|
||||
enum DictionaryFind {
|
||||
Best = 0
|
||||
,All = 1
|
||||
,All = 1
|
||||
};
|
||||
|
||||
enum ParsingAlgorithm {
|
||||
@ -151,22 +151,22 @@ enum ParsingAlgorithm {
|
||||
|
||||
enum SearchAlgorithm {
|
||||
Normal = 0
|
||||
,CubePruning = 1
|
||||
,CubeGrowing = 2
|
||||
,ChartDecoding= 3
|
||||
,NormalBatch = 4
|
||||
,ChartIncremental = 5
|
||||
,CubePruning = 1
|
||||
,CubeGrowing = 2
|
||||
,ChartDecoding= 3
|
||||
,NormalBatch = 4
|
||||
,ChartIncremental = 5
|
||||
};
|
||||
|
||||
enum SourceLabelOverlap {
|
||||
SourceLabelOverlapAdd = 0
|
||||
,SourceLabelOverlapReplace = 1
|
||||
,SourceLabelOverlapDiscard = 2
|
||||
,SourceLabelOverlapReplace = 1
|
||||
,SourceLabelOverlapDiscard = 2
|
||||
};
|
||||
|
||||
enum WordAlignmentSort {
|
||||
NoSort = 0
|
||||
,TargetOrder = 1
|
||||
,TargetOrder = 1
|
||||
};
|
||||
|
||||
enum FormatType {
|
||||
|
@ -45,8 +45,8 @@ public:
|
||||
inline WordsRange(size_t startPos, size_t endPos) : m_startPos(startPos), m_endPos(endPos) {}
|
||||
inline WordsRange(const WordsRange ©)
|
||||
: m_startPos(copy.GetStartPos())
|
||||
, m_endPos(copy.GetEndPos())
|
||||
{}
|
||||
, m_endPos(copy.GetEndPos()) {
|
||||
}
|
||||
|
||||
inline size_t GetStartPos() const {
|
||||
return m_startPos;
|
||||
|
@ -20,8 +20,8 @@ struct XmlOption {
|
||||
TargetPhrase targetPhrase;
|
||||
|
||||
XmlOption(const WordsRange &r, const TargetPhrase &tp)
|
||||
: range(r), targetPhrase(tp)
|
||||
{}
|
||||
: range(r), targetPhrase(tp) {
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
@ -42,8 +42,8 @@ public:
|
||||
return m_elements.end();
|
||||
}
|
||||
|
||||
AlignmentElement()
|
||||
{}
|
||||
AlignmentElement() {
|
||||
}
|
||||
|
||||
size_t GetSize() const {
|
||||
return m_elements.size();
|
||||
@ -58,8 +58,8 @@ protected:
|
||||
std::vector<AlignmentElement> m_elements;
|
||||
public:
|
||||
AlignmentPhrase(size_t size)
|
||||
:m_elements(size)
|
||||
{}
|
||||
:m_elements(size) {
|
||||
}
|
||||
void Merge(const AlignmentPhrase &newAlignment, const WordsRange &newAlignmentRange);
|
||||
void Merge(const std::vector< std::vector<size_t> > &source);
|
||||
size_t GetSize() const {
|
||||
|
@ -62,8 +62,8 @@ public:
|
||||
, startS(sS)
|
||||
, endS(eS)
|
||||
, count(0)
|
||||
, pcfgScore(0.0)
|
||||
{}
|
||||
, pcfgScore(0.0) {
|
||||
}
|
||||
|
||||
void SetSpanLength(size_t sourcePos, size_t sourceLength, size_t targetLength) {
|
||||
m_ntLengths[sourcePos] = std::pair<size_t, size_t>(sourceLength, targetLength);
|
||||
|
@ -40,15 +40,15 @@ public:
|
||||
: m_start(2)
|
||||
, m_end(2)
|
||||
, m_pos(2)
|
||||
, m_label(2)
|
||||
{}
|
||||
, m_label(2) {
|
||||
}
|
||||
|
||||
Hole(const Hole ©)
|
||||
: m_start(copy.m_start)
|
||||
, m_end(copy.m_end)
|
||||
, m_pos(copy.m_pos)
|
||||
, m_label(copy.m_label)
|
||||
{}
|
||||
, m_label(copy.m_label) {
|
||||
}
|
||||
|
||||
Hole(int startS, int endS, int startT, int endT)
|
||||
: m_start(2)
|
||||
|
@ -44,8 +44,8 @@ public:
|
||||
HoleCollection(int sourcePhraseStart, int sourcePhraseEnd)
|
||||
: m_scope(1, 0)
|
||||
, m_sourcePhraseStart(1, sourcePhraseStart)
|
||||
, m_sourcePhraseEnd(1, sourcePhraseEnd)
|
||||
{}
|
||||
, m_sourcePhraseEnd(1, sourcePhraseEnd) {
|
||||
}
|
||||
|
||||
const HoleList &GetHoles() const {
|
||||
return m_holes;
|
||||
|
@ -86,8 +86,8 @@ public:
|
||||
, gzOutput(false)
|
||||
, unpairedExtractFormat(false)
|
||||
, conditionOnTargetLhs(false)
|
||||
, boundaryRules(false)
|
||||
{}
|
||||
, boundaryRules(false) {
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -70,8 +70,8 @@ struct ScoreFeatureContext {
|
||||
) :
|
||||
phrasePair(thePhrasePair),
|
||||
count(theCount),
|
||||
maybeLog(theMaybeLog)
|
||||
{}
|
||||
maybeLog(theMaybeLog) {
|
||||
}
|
||||
|
||||
const PhraseAlignmentCollection& phrasePair;
|
||||
float count;
|
||||
|
@ -53,8 +53,8 @@ public:
|
||||
, m_sourceLabelCollection(srcLabelColl)
|
||||
, m_targetTopLabelCollection(tgtTopLabelColl)
|
||||
, m_sourceTopLabelCollection(srcTopLabelColl)
|
||||
, m_options(options)
|
||||
{}
|
||||
, m_options(options) {
|
||||
}
|
||||
|
||||
virtual ~SentenceAlignmentWithSyntax() {}
|
||||
|
||||
|
@ -44,8 +44,8 @@ public:
|
||||
,m_end(endPos)
|
||||
,m_label(label)
|
||||
,m_parent(0)
|
||||
,m_pcfgScore(0.0f)
|
||||
{}
|
||||
,m_pcfgScore(0.0f) {
|
||||
}
|
||||
int GetStart() const {
|
||||
return m_start;
|
||||
}
|
||||
|
@ -29,8 +29,8 @@ class XmlException
|
||||
{
|
||||
public:
|
||||
XmlException(const std::string & msg)
|
||||
: m_msg(msg)
|
||||
{}
|
||||
: m_msg(msg) {
|
||||
}
|
||||
|
||||
const std::string &
|
||||
getMsg() const {
|
||||
|
@ -71,14 +71,14 @@ public:
|
||||
std::map<const std::string*, WordCount> m_coll;
|
||||
|
||||
WordCount()
|
||||
:m_count(0)
|
||||
{}
|
||||
:m_count(0) {
|
||||
}
|
||||
|
||||
//WordCount(const WordCount ©);
|
||||
|
||||
WordCount(float count)
|
||||
:m_count(count)
|
||||
{}
|
||||
:m_count(count) {
|
||||
}
|
||||
|
||||
void AddCount(float incr);
|
||||
|
||||
|
@ -137,7 +137,7 @@ void LeftBinarize( SyntaxTree &tree, ParentNodes &parents )
|
||||
const SplitPoints &point = *p;
|
||||
if (point.size() > 3) {
|
||||
const vector< SyntaxNode* >& topNodes
|
||||
= tree.GetNodes( point[0], point[point.size()-1]-1);
|
||||
= tree.GetNodes( point[0], point[point.size()-1]-1);
|
||||
string topLabel = topNodes[0]->GetLabel();
|
||||
|
||||
for(size_t i=2; i<point.size()-1; i++) {
|
||||
@ -155,7 +155,7 @@ void RightBinarize( SyntaxTree &tree, ParentNodes &parents )
|
||||
if (point.size() > 3) {
|
||||
int endPoint = point[point.size()-1]-1;
|
||||
const vector< SyntaxNode* >& topNodes
|
||||
= tree.GetNodes( point[0], endPoint);
|
||||
= tree.GetNodes( point[0], endPoint);
|
||||
string topLabel = topNodes[0]->GetLabel();
|
||||
|
||||
for(size_t i=1; i<point.size()-2; i++) {
|
||||
|
@ -5,7 +5,7 @@ use File::Basename;
|
||||
|
||||
sub Beautify($);
|
||||
|
||||
Beautify("/home/hieu/workspace/github/mosesdecoder");
|
||||
Beautify("/Users/hieuhoang/unison/workspace/github/mosesdecoder");
|
||||
|
||||
sub Beautify($)
|
||||
{
|
||||
|
10
symal/cmd.h
10
symal/cmd.h
@ -33,14 +33,14 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__STDC__)
|
||||
int DeclareParams(char *, ...);
|
||||
int DeclareParams(char *, ...);
|
||||
#else
|
||||
int DeclareParams();
|
||||
int DeclareParams();
|
||||
#endif
|
||||
|
||||
int GetParams(int *n, char ***a,char *CmdFileName),
|
||||
SPrintParams(),
|
||||
PrintParams();
|
||||
int GetParams(int *n, char ***a,char *CmdFileName),
|
||||
SPrintParams(),
|
||||
PrintParams();
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user