mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-08-16 15:00:33 +03:00
Reduce dependence on StaticData.
This commit is contained in:
parent
0db3e544fd
commit
c1a008bf6d
@ -210,10 +210,14 @@ void ChartParser::Create(const Range &range, ChartParserCallback &to)
|
||||
ruleLookupManager.GetChartRuleCollection(inputPath, last, to);
|
||||
}
|
||||
}
|
||||
|
||||
if (range.GetNumWordsCovered() == 1 && range.GetStartPos() != 0 && range.GetStartPos() != m_source.GetSize()-1) {
|
||||
bool alwaysCreateDirectTranslationOption = StaticData::Instance().IsAlwaysCreateDirectTranslationOption();
|
||||
if (to.Empty() || alwaysCreateDirectTranslationOption) {
|
||||
|
||||
if (range.GetNumWordsCovered() == 1
|
||||
&& range.GetStartPos() != 0
|
||||
&& range.GetStartPos() != m_source.GetSize()-1) {
|
||||
bool always = m_ttask.lock()->options().unk.always_create_direct_transopt;
|
||||
// bool alwaysCreateDirectTranslationOption
|
||||
// = StaticData::Instance().IsAlwaysCreateDirectTranslationOption();
|
||||
if (to.Empty() || always) {
|
||||
// create unknown words for 1 word coverage where we don't have any trans options
|
||||
const Word &sourceWord = m_source.GetWord(range.GetStartPos());
|
||||
m_unknown.Process(sourceWord, range, to);
|
||||
|
@ -25,7 +25,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "HypothesisStackNormal.h"
|
||||
#include "TypeDef.h"
|
||||
#include "Util.h"
|
||||
#include "StaticData.h"
|
||||
#include "Manager.h"
|
||||
#include "util/exception.hh"
|
||||
|
||||
@ -76,7 +75,7 @@ pair<HypothesisStackNormal::iterator, bool> HypothesisStackNormal::Add(Hypothesi
|
||||
size_t toleratedSize = 2*m_maxHypoStackSize-1;
|
||||
// add in room for stack diversity
|
||||
if (m_minHypoStackDiversity)
|
||||
toleratedSize += m_minHypoStackDiversity << StaticData::Instance().GetMaxDistortion();
|
||||
toleratedSize += m_minHypoStackDiversity << m_manager.options().reordering.max_distortion;
|
||||
if (m_hypos.size() > toleratedSize) {
|
||||
PruneToSize(m_maxHypoStackSize);
|
||||
} else {
|
||||
@ -97,8 +96,8 @@ bool HypothesisStackNormal::AddPrune(Hypothesis *hypo)
|
||||
}
|
||||
|
||||
// too bad for stack. don't bother adding hypo into collection
|
||||
if (!StaticData::Instance().GetDisableDiscarding() &&
|
||||
hypo->GetFutureScore() < m_worstScore
|
||||
if (m_manager.options().search.disable_discarding == false
|
||||
&& hypo->GetFutureScore() < m_worstScore
|
||||
&& ! ( m_minHypoStackDiversity > 0
|
||||
&& hypo->GetFutureScore() >= GetWorstScoreForBitmap( hypo->GetWordsBitmap() ) ) ) {
|
||||
m_manager.GetSentenceStats().AddDiscarded();
|
||||
|
@ -63,7 +63,7 @@ StaticData StaticData::s_instance;
|
||||
StaticData::StaticData()
|
||||
: m_sourceStartPosMattersForRecombination(false)
|
||||
, m_requireSortingAfterSourceContext(false)
|
||||
, m_isAlwaysCreateDirectTranslationOption(false)
|
||||
// , m_isAlwaysCreateDirectTranslationOption(false)
|
||||
, m_currentWeightSetting("default")
|
||||
, m_treeStructure(NULL)
|
||||
{
|
||||
@ -132,9 +132,6 @@ StaticData
|
||||
m_parameter->SetParameter(m_continuePartialTranslation,
|
||||
"continue-partial-translation", false );
|
||||
|
||||
// use of xml in input
|
||||
// m_parameter->SetParameter<XmlInputType>(m_xmlInputType, "xml-input", XmlPassThrough);
|
||||
|
||||
// specify XML tags opening and closing brackets for XML option
|
||||
params = m_parameter->GetParam("xml-brackets");
|
||||
if (params && params->size()) {
|
||||
@ -170,45 +167,18 @@ StaticData
|
||||
m_parameter->SetParameter<string>(m_outputUnknownsFile,
|
||||
"output-unknowns", "");
|
||||
|
||||
//Print Translation Options
|
||||
// m_parameter->SetParameter(m_printTranslationOptions,
|
||||
// "print-translation-option", false );
|
||||
|
||||
//Print All Derivations
|
||||
// m_parameter->SetParameter(m_printAllDerivations ,
|
||||
// "print-all-derivations", false );
|
||||
|
||||
m_parameter->SetParameter<long>(m_startTranslationId,
|
||||
"start-translation-id", 0);
|
||||
|
||||
//lattice samples
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
StaticData::
|
||||
ini_compact_table_options()
|
||||
{
|
||||
// Compact phrase table and reordering model
|
||||
m_parameter->SetParameter(m_minphrMemory, "minphr-memory", false );
|
||||
m_parameter->SetParameter(m_minlexrMemory, "minlexr-memory", false );
|
||||
}
|
||||
|
||||
void
|
||||
StaticData::
|
||||
ini_lm_options()
|
||||
{
|
||||
m_parameter->SetParameter<size_t>(m_lmcache_cleanup_threshold, "clean-lm-cache", 1);
|
||||
}
|
||||
|
||||
// threads, timeouts, etc.
|
||||
bool
|
||||
StaticData
|
||||
::ini_performance_options()
|
||||
{
|
||||
const PARAM_VEC *params;
|
||||
// m_parameter->SetParameter<size_t>(m_timeout_threshold, "time-out", -1);
|
||||
// m_timeout = (GetTimeoutThreshold() == (size_t)-1) ? false : true;
|
||||
|
||||
m_threadCount = 1;
|
||||
params = m_parameter->GetParam("threads");
|
||||
@ -242,62 +212,6 @@ StaticData
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
StaticData::
|
||||
ini_factor_maps()
|
||||
{
|
||||
const PARAM_VEC *params;
|
||||
// factor delimiter
|
||||
m_parameter->SetParameter<string>(m_factorDelimiter, "factor-delimiter", "|");
|
||||
if (m_factorDelimiter == "none") {
|
||||
m_factorDelimiter = "";
|
||||
}
|
||||
|
||||
// //input factors
|
||||
// params = m_parameter->GetParam("input-factors");
|
||||
// if (params) {
|
||||
// m_inputFactorOrder = Scan<FactorType>(*params);
|
||||
// }
|
||||
// if(m_inputFactorOrder.empty()) {
|
||||
// m_inputFactorOrder.push_back(0);
|
||||
// }
|
||||
|
||||
//output factors
|
||||
// params = m_parameter->GetParam("output-factors");
|
||||
// if (params) {
|
||||
// m_outputFactorOrder = Scan<FactorType>(*params);
|
||||
// }
|
||||
// if(m_outputFactorOrder.empty()) {
|
||||
// // default. output factor 0
|
||||
// m_outputFactorOrder.push_back(0);
|
||||
// }
|
||||
}
|
||||
|
||||
void
|
||||
StaticData::
|
||||
ini_oov_options()
|
||||
{
|
||||
// unknown word processing
|
||||
// m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false );
|
||||
// m_parameter->SetParameter(m_markUnknown, "mark-unknown", false );
|
||||
// m_parameter->SetParameter<string>(m_unknownWordPrefix, "unknown-word-prefix", "UNK" );
|
||||
// m_parameter->SetParameter<string>(m_unknownWordSuffix, "unknown-word-suffix", "" );
|
||||
|
||||
//source word deletion
|
||||
m_parameter->SetParameter(m_wordDeletionEnabled, "phrase-drop-allowed", false );
|
||||
|
||||
m_parameter->SetParameter(m_isAlwaysCreateDirectTranslationOption, "always-create-direct-transopt", false );
|
||||
}
|
||||
|
||||
void
|
||||
StaticData::
|
||||
ini_zombie_options()
|
||||
{
|
||||
//Disable discarding
|
||||
m_parameter->SetParameter(m_disableDiscarding, "disable-discarding", false);
|
||||
|
||||
}
|
||||
|
||||
bool StaticData::LoadData(Parameter *parameter)
|
||||
{
|
||||
m_parameter = parameter;
|
||||
@ -311,7 +225,10 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
|
||||
// ORDER HERE MATTERS, SO DON'T CHANGE IT UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
// input, output
|
||||
ini_factor_maps();
|
||||
|
||||
m_parameter->SetParameter<string>(m_factorDelimiter, "factor-delimiter", "|");
|
||||
m_parameter->SetParameter<size_t>(m_lmcache_cleanup_threshold, "clean-lm-cache", 1);
|
||||
|
||||
ini_input_options();
|
||||
m_bookkeeping_options.init(*parameter);
|
||||
if (!ini_output_options()) return false;
|
||||
@ -319,21 +236,11 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
// threading etc.
|
||||
if (!ini_performance_options()) return false;
|
||||
|
||||
// model loading
|
||||
ini_compact_table_options();
|
||||
|
||||
// search
|
||||
ini_oov_options();
|
||||
// Compact phrase table and reordering model
|
||||
m_parameter->SetParameter(m_minphrMemory, "minphr-memory", false );
|
||||
m_parameter->SetParameter(m_minlexrMemory, "minlexr-memory", false );
|
||||
|
||||
// S2T decoder
|
||||
m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm",
|
||||
RecursiveCYKPlus);
|
||||
|
||||
|
||||
ini_zombie_options(); // probably dead, or maybe not
|
||||
|
||||
// m_parameter->SetParameter(m_placeHolderFactor, "placeholder-factor",
|
||||
// NOT_FOUND);
|
||||
|
||||
// FEATURE FUNCTION INITIALIZATION HAPPENS HERE ===============================
|
||||
initialize_features();
|
||||
|
@ -101,7 +101,7 @@ protected:
|
||||
// std::string m_unknownWordSuffix;
|
||||
bool m_wordDeletionEnabled;
|
||||
|
||||
bool m_disableDiscarding;
|
||||
// bool m_disableDiscarding;
|
||||
bool m_printAllDerivations;
|
||||
bool m_printTranslationOptions;
|
||||
|
||||
@ -117,7 +117,7 @@ protected:
|
||||
|
||||
size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1)
|
||||
|
||||
bool m_isAlwaysCreateDirectTranslationOption;
|
||||
// bool m_isAlwaysCreateDirectTranslationOption;
|
||||
//! constructor. only the 1 static variable can be created
|
||||
|
||||
bool m_includeLHSInSearchGraph; //! include LHS of rules in search graph
|
||||
@ -174,29 +174,17 @@ protected:
|
||||
|
||||
const StatefulFeatureFunction* m_treeStructure;
|
||||
|
||||
void ini_compact_table_options();
|
||||
void ini_consensus_decoding_options();
|
||||
void ini_cube_pruning_options();
|
||||
void ini_distortion_options();
|
||||
void ini_factor_maps();
|
||||
void ini_input_options();
|
||||
void ini_lm_options();
|
||||
void ini_lmbr_options();
|
||||
void ini_mbr_options();
|
||||
void ini_mira_options();
|
||||
void ini_oov_options();
|
||||
bool ini_output_options();
|
||||
bool ini_performance_options();
|
||||
void ini_phrase_lookup_options();
|
||||
bool ini_stack_decoding_options();
|
||||
void ini_zombie_options();
|
||||
|
||||
void initialize_features();
|
||||
public:
|
||||
|
||||
bool IsAlwaysCreateDirectTranslationOption() const {
|
||||
return m_isAlwaysCreateDirectTranslationOption;
|
||||
}
|
||||
// bool IsAlwaysCreateDirectTranslationOption() const {
|
||||
// return m_isAlwaysCreateDirectTranslationOption;
|
||||
// }
|
||||
//! destructor
|
||||
~StaticData();
|
||||
|
||||
@ -241,42 +229,15 @@ public:
|
||||
return m_options;
|
||||
}
|
||||
|
||||
// const std::vector<FactorType> &GetInputFactorOrder() const {
|
||||
// return m_options.input.factor_order;
|
||||
// }
|
||||
|
||||
// const std::vector<FactorType> &GetOutputFactorOrder() const {
|
||||
// return m_options.output.factor_order;
|
||||
// }
|
||||
|
||||
inline bool
|
||||
GetSourceStartPosMattersForRecombination() const {
|
||||
return m_sourceStartPosMattersForRecombination;
|
||||
}
|
||||
|
||||
inline bool
|
||||
GetDisableDiscarding() const {
|
||||
return m_disableDiscarding;
|
||||
}
|
||||
|
||||
// inline size_t
|
||||
// GetMaxNoTransOptPerCoverage() const {
|
||||
// return m_options.search.max_trans_opt_per_cov;
|
||||
// }
|
||||
|
||||
// inline size_t
|
||||
// GetMaxNoPartTransOpt() const {
|
||||
// return m_options.search.max_partial_trans_opt;
|
||||
// }
|
||||
|
||||
// inline size_t
|
||||
// GetMaxPhraseLength() const {
|
||||
// return m_options.search.max_phrase_length;
|
||||
// }
|
||||
|
||||
bool
|
||||
IsWordDeletionEnabled() const {
|
||||
return m_wordDeletionEnabled;
|
||||
// return m_wordDeletionEnabled;
|
||||
return m_options.unk.word_deletion_enabled;
|
||||
}
|
||||
|
||||
int
|
||||
@ -564,9 +525,9 @@ public:
|
||||
return m_defaultNonTermOnlyForEmptyRange;
|
||||
}
|
||||
|
||||
S2TParsingAlgorithm GetS2TParsingAlgorithm() const {
|
||||
return m_s2tParsingAlgorithm;
|
||||
}
|
||||
// S2TParsingAlgorithm GetS2TParsingAlgorithm() const {
|
||||
// return m_s2tParsingAlgorithm;
|
||||
// }
|
||||
|
||||
bool RequireSortingAfterSourceContext() const {
|
||||
return m_requireSortingAfterSourceContext;
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "moses/ChartTranslationOptionList.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
#include "moses/Syntax/RuleTableFF.h"
|
||||
#include "moses/parameters/AllOptions.h"
|
||||
#include "util/file_piece.hh"
|
||||
#include "util/string_piece.hh"
|
||||
#include "util/tokenize_piece.hh"
|
||||
@ -32,12 +33,14 @@
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
namespace Syntax
|
||||
{
|
||||
namespace F2S
|
||||
{
|
||||
|
||||
bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
|
||||
bool HyperTreeLoader::Load(AllOptions const& opts,
|
||||
const std::vector<FactorType> &input,
|
||||
const std::vector<FactorType> &output,
|
||||
const std::string &inFile,
|
||||
const RuleTableFF &ff,
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class AllOptions;
|
||||
namespace Syntax
|
||||
{
|
||||
namespace F2S
|
||||
@ -22,7 +23,8 @@ namespace F2S
|
||||
class HyperTreeLoader : public HyperTreeCreator
|
||||
{
|
||||
public:
|
||||
bool Load(const std::vector<FactorType> &input,
|
||||
bool Load(AllOptions const& opts,
|
||||
const std::vector<FactorType> &input,
|
||||
const std::vector<FactorType> &output,
|
||||
const std::string &inFile,
|
||||
const RuleTableFF &,
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include "RuleTableFF.h"
|
||||
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/parameters/AllOptions.h"
|
||||
#include "moses/Syntax/F2S/HyperTree.h"
|
||||
#include "moses/Syntax/F2S/HyperTreeLoader.h"
|
||||
#include "moses/Syntax/S2T/RuleTrieCYKPlus.h"
|
||||
@ -8,7 +7,6 @@
|
||||
#include "moses/Syntax/S2T/RuleTrieScope3.h"
|
||||
#include "moses/Syntax/T2S/RuleTrie.h"
|
||||
#include "moses/Syntax/T2S/RuleTrieLoader.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
namespace Syntax
|
||||
@ -26,37 +24,34 @@ RuleTableFF::RuleTableFF(const std::string &line)
|
||||
s_instances.push_back(this);
|
||||
}
|
||||
|
||||
void RuleTableFF::Load()
|
||||
void RuleTableFF::Load(Moses::AllOptions const& opts)
|
||||
{
|
||||
SetFeaturesToApply();
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
if (staticData.options().search.algo == SyntaxF2S ||
|
||||
staticData.options().search.algo == SyntaxT2S) {
|
||||
if (opts.search.algo == SyntaxF2S || opts.search.algo == SyntaxT2S) {
|
||||
F2S::HyperTree *trie = new F2S::HyperTree(this);
|
||||
F2S::HyperTreeLoader loader;
|
||||
loader.Load(m_input, m_output, m_filePath, *this, *trie,
|
||||
m_sourceTerminalSet);
|
||||
loader.Load(opts, m_input, m_output, m_filePath, *this, *trie, m_sourceTerminalSet);
|
||||
m_table = trie;
|
||||
} else if (staticData.options().search.algo == SyntaxS2T) {
|
||||
S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();
|
||||
} else if (opts.search.algo == SyntaxS2T) {
|
||||
S2TParsingAlgorithm algorithm = opts.syntax.s2t_parsing_algo; // staticData.GetS2TParsingAlgorithm();
|
||||
if (algorithm == RecursiveCYKPlus) {
|
||||
S2T::RuleTrieCYKPlus *trie = new S2T::RuleTrieCYKPlus(this);
|
||||
S2T::RuleTrieLoader loader;
|
||||
loader.Load(m_input, m_output, m_filePath, *this, *trie);
|
||||
loader.Load(opts,m_input, m_output, m_filePath, *this, *trie);
|
||||
m_table = trie;
|
||||
} else if (algorithm == Scope3) {
|
||||
S2T::RuleTrieScope3 *trie = new S2T::RuleTrieScope3(this);
|
||||
S2T::RuleTrieLoader loader;
|
||||
loader.Load(m_input, m_output, m_filePath, *this, *trie);
|
||||
loader.Load(opts, m_input, m_output, m_filePath, *this, *trie);
|
||||
m_table = trie;
|
||||
} else {
|
||||
UTIL_THROW2("ERROR: unhandled S2T parsing algorithm");
|
||||
}
|
||||
} else if (staticData.options().search.algo == SyntaxT2S_SCFG) {
|
||||
} else if (opts.search.algo == SyntaxT2S_SCFG) {
|
||||
T2S::RuleTrie *trie = new T2S::RuleTrie(this);
|
||||
T2S::RuleTrieLoader loader;
|
||||
loader.Load(m_input, m_output, m_filePath, *this, *trie);
|
||||
loader.Load(opts, m_input, m_output, m_filePath, *this, *trie);
|
||||
m_table = trie;
|
||||
} else {
|
||||
UTIL_THROW2(
|
||||
|
@ -9,7 +9,7 @@ namespace Moses
|
||||
|
||||
class ChartParser;
|
||||
class ChartCellCollectionBase;
|
||||
|
||||
class AllOptions;
|
||||
namespace Syntax
|
||||
{
|
||||
|
||||
@ -27,7 +27,7 @@ public:
|
||||
// FIXME Delete m_table?
|
||||
~RuleTableFF() {}
|
||||
|
||||
void Load();
|
||||
void Load(AllOptions const& opts);
|
||||
|
||||
const RuleTable *GetTable() const {
|
||||
return m_table;
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "util/exception.hh"
|
||||
|
||||
#include "RuleTrie.h"
|
||||
#include "moses/parameters/AllOptions.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -35,7 +36,8 @@ namespace Syntax
|
||||
namespace S2T
|
||||
{
|
||||
|
||||
bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
|
||||
bool RuleTrieLoader::Load(Moses::AllOptions const& opts,
|
||||
const std::vector<FactorType> &input,
|
||||
const std::vector<FactorType> &output,
|
||||
const std::string &inFile,
|
||||
const RuleTableFF &ff,
|
||||
@ -43,7 +45,7 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
|
||||
{
|
||||
PrintUserTime(std::string("Start loading text phrase table. Moses format"));
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
// const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
std::size_t count = 0;
|
||||
|
||||
@ -76,7 +78,7 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
|
||||
}
|
||||
|
||||
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == std::string::npos);
|
||||
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
|
||||
if (isLHSEmpty && !opts.unk.word_deletion_enabled) { // staticData.IsWordDeletionEnabled()) {
|
||||
TRACE_ERR( ff.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
|
||||
continue;
|
||||
}
|
||||
|
@ -11,6 +11,8 @@
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class AllOptions;
|
||||
|
||||
namespace Syntax
|
||||
{
|
||||
namespace S2T
|
||||
@ -19,7 +21,8 @@ namespace S2T
|
||||
class RuleTrieLoader : public RuleTrieCreator
|
||||
{
|
||||
public:
|
||||
bool Load(const std::vector<FactorType> &input,
|
||||
bool Load(Moses::AllOptions const& opts,
|
||||
const std::vector<FactorType> &input,
|
||||
const std::vector<FactorType> &output,
|
||||
const std::string &inFile,
|
||||
const RuleTableFF &,
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "util/exception.hh"
|
||||
|
||||
#include "RuleTrie.h"
|
||||
#include "moses/parameters/AllOptions.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -34,7 +35,8 @@ namespace Syntax
|
||||
namespace T2S
|
||||
{
|
||||
|
||||
bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
|
||||
bool RuleTrieLoader::Load(Moses::AllOptions const& opts,
|
||||
const std::vector<FactorType> &input,
|
||||
const std::vector<FactorType> &output,
|
||||
const std::string &inFile,
|
||||
const RuleTableFF &ff,
|
||||
@ -42,7 +44,7 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
|
||||
{
|
||||
PrintUserTime(std::string("Start loading text phrase table. Moses format"));
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
// const StaticData &staticData = StaticData::Instance();
|
||||
// const std::string &factorDelimiter = staticData.GetFactorDelimiter();
|
||||
|
||||
std::size_t count = 0;
|
||||
@ -80,7 +82,7 @@ bool RuleTrieLoader::Load(const std::vector<FactorType> &input,
|
||||
++pipes; // counts
|
||||
|
||||
bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == std::string::npos);
|
||||
if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
|
||||
if (isLHSEmpty && !opts.unk.word_deletion_enabled) { // staticData.IsWordDeletionEnabled()) {
|
||||
TRACE_ERR( ff.GetFilePath() << ":" << count << ": pt entry contains empty target, skipping\n");
|
||||
continue;
|
||||
}
|
||||
|
@ -11,6 +11,7 @@
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class AllOptions;
|
||||
namespace Syntax
|
||||
{
|
||||
namespace T2S
|
||||
@ -19,7 +20,8 @@ namespace T2S
|
||||
class RuleTrieLoader : public RuleTrieCreator
|
||||
{
|
||||
public:
|
||||
bool Load(const std::vector<FactorType> &input,
|
||||
bool Load(Moses::AllOptions const& opts,
|
||||
const std::vector<FactorType> &input,
|
||||
const std::vector<FactorType> &output,
|
||||
const std::string &inFile,
|
||||
const RuleTableFF &,
|
||||
|
@ -145,12 +145,14 @@ ProcessUnknownWord()
|
||||
}
|
||||
}
|
||||
|
||||
bool alwaysCreateDirectTranslationOption
|
||||
= StaticData::Instance().IsAlwaysCreateDirectTranslationOption();
|
||||
// bool alwaysCreateDirectTranslationOption
|
||||
// = StaticData::Instance().IsAlwaysCreateDirectTranslationOption();
|
||||
bool always = m_ttask.lock()->options().unk.always_create_direct_transopt;
|
||||
|
||||
// create unknown words for 1 word coverage where we don't have any trans options
|
||||
for (size_t pos = 0 ; pos < size ; ++pos) {
|
||||
TranslationOptionList* fullList = GetTranslationOptionList(pos, pos);
|
||||
if (!fullList || fullList->size() == 0 || alwaysCreateDirectTranslationOption)
|
||||
if (!fullList || fullList->size() == 0 || always)
|
||||
ProcessUnknownWord(pos);
|
||||
}
|
||||
}
|
||||
|
@ -104,7 +104,7 @@ TranslationTask
|
||||
|
||||
else if (algo == SyntaxS2T) {
|
||||
// new-style string-to-tree decoding (ask Phil Williams)
|
||||
S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();
|
||||
S2TParsingAlgorithm algorithm = options().syntax.s2t_parsing_algo; // staticData.GetS2TParsingAlgorithm();
|
||||
if (algorithm == RecursiveCYKPlus) {
|
||||
typedef Syntax::S2T::EagerParserCallback Callback;
|
||||
typedef Syntax::S2T::RecursiveCYKPlusParser<Callback> Parser;
|
||||
|
@ -23,6 +23,7 @@ namespace Moses
|
||||
if (!lmbr.init(param)) return false;
|
||||
if (!output.init(param)) return false;
|
||||
if (!unk.init(param)) return false;
|
||||
if (!syntax.init(param)) return false;
|
||||
|
||||
param.SetParameter(mira, "mira", false);
|
||||
|
||||
@ -91,6 +92,7 @@ namespace Moses
|
||||
if (!lmbr.update(param)) return false;
|
||||
if (!output.update(param)) return false;
|
||||
if (!unk.update(param)) return false;
|
||||
if (!syntax.update(param)) return false;
|
||||
return sanity_check();
|
||||
}
|
||||
#endif
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "LMBR_Options.h"
|
||||
#include "ReportingOptions.h"
|
||||
#include "OOVHandlingOptions.h"
|
||||
#include "SyntaxOptions.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -29,6 +30,7 @@ namespace Moses
|
||||
LMBR_Options lmbr;
|
||||
ReportingOptions output;
|
||||
OOVHandlingOptions unk;
|
||||
SyntaxOptions syntax;
|
||||
bool mira;
|
||||
|
||||
// StackOptions stack;
|
||||
|
@ -2,7 +2,7 @@
|
||||
#include "InputOptions.h"
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "moses/StaticData.h"
|
||||
// #include "moses/StaticData.h"
|
||||
#include "moses/TypeDef.h"
|
||||
|
||||
namespace Moses {
|
||||
@ -20,6 +20,7 @@ namespace Moses {
|
||||
init(Parameter const& param)
|
||||
{
|
||||
param.SetParameter(input_type, "inputtype", SentenceInput);
|
||||
#if 0
|
||||
if (input_type == SentenceInput)
|
||||
{ VERBOSE(2, "input type is: text input"); }
|
||||
else if (input_type == ConfusionNetworkInput)
|
||||
@ -32,7 +33,7 @@ namespace Moses {
|
||||
{ VERBOSE(2, "input type is: tabbed sentence"); }
|
||||
else if (input_type == ForestInputType)
|
||||
{ VERBOSE(2, "input type is: forest"); }
|
||||
|
||||
#endif
|
||||
param.SetParameter(continue_partial_translation,
|
||||
"continue-partial-translation", false);
|
||||
param.SetParameter(default_non_term_only_for_empty_range,
|
||||
@ -59,9 +60,11 @@ namespace Moses {
|
||||
xml_brackets.first= brackets[0];
|
||||
xml_brackets.second=brackets[1];
|
||||
|
||||
#if 0
|
||||
VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
|
||||
<< xml_brackets.first << " and "
|
||||
<< xml_brackets.second << std::endl);
|
||||
#endif
|
||||
}
|
||||
|
||||
pspec = param.GetParam("input-factors");
|
||||
|
105
moses/parameters/InputOptions.cpp.orig
Normal file
105
moses/parameters/InputOptions.cpp.orig
Normal file
@ -0,0 +1,105 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "InputOptions.h"
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/TypeDef.h"
|
||||
|
||||
namespace Moses {
|
||||
|
||||
InputOptions::
|
||||
InputOptions()
|
||||
{
|
||||
xml_brackets.first = "<";
|
||||
xml_brackets.second = ">";
|
||||
input_type = SentenceInput;
|
||||
}
|
||||
|
||||
bool
|
||||
InputOptions::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
param.SetParameter(input_type, "inputtype", SentenceInput);
|
||||
if (input_type == SentenceInput)
|
||||
{ VERBOSE(2, "input type is: text input"); }
|
||||
else if (input_type == ConfusionNetworkInput)
|
||||
{ VERBOSE(2, "input type is: confusion net"); }
|
||||
else if (input_type == WordLatticeInput)
|
||||
{ VERBOSE(2, "input type is: word lattice"); }
|
||||
else if (input_type == TreeInputType)
|
||||
{ VERBOSE(2, "input type is: tree"); }
|
||||
else if (input_type == TabbedSentenceInput)
|
||||
{ VERBOSE(2, "input type is: tabbed sentence"); }
|
||||
else if (input_type == ForestInputType)
|
||||
{ VERBOSE(2, "input type is: forest"); }
|
||||
|
||||
param.SetParameter(continue_partial_translation,
|
||||
"continue-partial-translation", false);
|
||||
param.SetParameter(default_non_term_only_for_empty_range,
|
||||
"default-non-term-for-empty-range-only", false);
|
||||
|
||||
|
||||
param.SetParameter<XmlInputType>(xml_policy, "xml-input", XmlPassThrough);
|
||||
|
||||
// specify XML tags opening and closing brackets for XML option
|
||||
// Do we really want this to be configurable???? UG
|
||||
const PARAM_VEC *pspec;
|
||||
pspec = param.GetParam("xml-brackets");
|
||||
if (pspec && pspec->size())
|
||||
{
|
||||
std::vector<std::string> brackets = Tokenize(pspec->at(0));
|
||||
if(brackets.size()!=2)
|
||||
{
|
||||
std::cerr << "invalid xml-brackets value, "
|
||||
<< "must specify exactly 2 blank-delimited strings "
|
||||
<<<<<<< HEAD
|
||||
<< "for XML tags opening and closing brackets" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
xml_brackets.first= brackets[0];
|
||||
xml_brackets.second=brackets[1];
|
||||
=======
|
||||
<< "for XML tags opening and closing brackets"
|
||||
<< std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
xml_brackets.first= brackets[0];
|
||||
xml_brackets.second=brackets[1];
|
||||
|
||||
>>>>>>> b733804fdcf20a5a9e822861471248c8fdbc0e2d
|
||||
VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
|
||||
<< xml_brackets.first << " and "
|
||||
<< xml_brackets.second << std::endl);
|
||||
}
|
||||
|
||||
<<<<<<< HEAD
|
||||
param.SetParameter(placeholder_factor, "placeholder-factor", NOT_FOUND);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
bool
|
||||
InputOptions::
|
||||
update(std::map<std::string,xmlrpc_c::value>const& param)
|
||||
{
|
||||
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||
params_t::const_iterator si = param.find("xml-input");
|
||||
if (si != param.end())
|
||||
xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
=======
|
||||
pspec = param.GetParam("input-factors");
|
||||
if (pspec) factor_order = Scan<FactorType>(*pspec);
|
||||
if (factor_order.empty()) factor_order.assign(1,0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
>>>>>>> b733804fdcf20a5a9e822861471248c8fdbc0e2d
|
||||
}
|
38
moses/parameters/InputOptions.h.orig
Normal file
38
moses/parameters/InputOptions.h.orig
Normal file
@ -0,0 +1,38 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "moses/Parameter.h"
|
||||
#include <string>
|
||||
#include "OptionsBaseClass.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
struct
|
||||
InputOptions : public OptionsBaseClass
|
||||
{
|
||||
bool continue_partial_translation;
|
||||
bool default_non_term_only_for_empty_range; // whatever that means
|
||||
InputTypeEnum input_type;
|
||||
XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive
|
||||
<<<<<<< HEAD
|
||||
|
||||
FactorType placeholder_factor; // where to store original text for placeholders
|
||||
|
||||
|
||||
=======
|
||||
std::vector<FactorType> factor_order; // input factor order
|
||||
|
||||
>>>>>>> b733804fdcf20a5a9e822861471248c8fdbc0e2d
|
||||
std::pair<std::string,std::string> xml_brackets;
|
||||
// strings to use as XML tags' opening and closing brackets.
|
||||
// Default are "<" and ">"
|
||||
|
||||
InputOptions();
|
||||
|
||||
bool init(Parameter const& param);
|
||||
bool update(std::map<std::string,xmlrpc_c::value>const& param);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -14,6 +14,8 @@ namespace Moses {
|
||||
mark = false;
|
||||
prefix = "UNK";
|
||||
suffix = "";
|
||||
word_deletion_enabled = false;
|
||||
always_create_direct_transopt = false;
|
||||
}
|
||||
|
||||
bool
|
||||
@ -22,6 +24,8 @@ namespace Moses {
|
||||
{
|
||||
param.SetParameter(drop,"drop-unknown",false);
|
||||
param.SetParameter(mark,"mark-unknown",false);
|
||||
param.SetParameter(word_deletion_enabled, "phrase-drop-allowed", false);
|
||||
param.SetParameter(always_create_direct_transopt, "always-create-direct-transopt", false);
|
||||
param.SetParameter<std::string>(prefix,"unknown-word-prefix","UNK");
|
||||
param.SetParameter<std::string>(suffix,"unknown-word-suffix","");
|
||||
return true;
|
||||
|
@ -15,7 +15,8 @@ namespace Moses
|
||||
std::string prefix;
|
||||
std::string suffix;
|
||||
|
||||
|
||||
bool word_deletion_enabled;
|
||||
bool always_create_direct_transopt;
|
||||
OOVHandlingOptions();
|
||||
|
||||
bool init(Parameter const& param);
|
||||
|
@ -31,7 +31,8 @@ namespace Moses
|
||||
DEFAULT_MAX_PART_TRANS_OPT_SIZE);
|
||||
|
||||
param.SetParameter(consensus, "consensus-decoding", false);
|
||||
|
||||
param.SetParameter(disable_discarding, "disable-discarding", false);
|
||||
|
||||
// transformation to log of a few scores
|
||||
beam_width = TransformScore(beam_width);
|
||||
trans_opt_threshold = TransformScore(trans_opt_threshold);
|
||||
|
@ -14,9 +14,10 @@ namespace Moses
|
||||
SearchAlgorithm algo;
|
||||
|
||||
// stack decoding
|
||||
size_t stack_size; // maxHypoStackSize;
|
||||
size_t stack_diversity; // minHypoStackDiversity;
|
||||
|
||||
size_t stack_size; // maxHypoStackSize;
|
||||
size_t stack_diversity; // minHypoStackDiversity;
|
||||
bool disable_discarding;
|
||||
// Disable discarding of bad hypotheses from HypothesisStackNormal
|
||||
size_t max_phrase_length;
|
||||
size_t max_trans_opt_per_cov;
|
||||
size_t max_partial_trans_opt;
|
||||
|
37
moses/parameters/SyntaxOptions.cpp
Normal file
37
moses/parameters/SyntaxOptions.cpp
Normal file
@ -0,0 +1,37 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#include "SyntaxOptions.h"
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/TypeDef.h"
|
||||
|
||||
namespace Moses {
|
||||
|
||||
SyntaxOptions::
|
||||
SyntaxOptions()
|
||||
{
|
||||
}
|
||||
|
||||
bool
|
||||
SyntaxOptions::
|
||||
init(Parameter const& param)
|
||||
{
|
||||
param.SetParameter(s2t_parsing_algo, "s2t-parsing-algorithm", RecursiveCYKPlus);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
bool
|
||||
SyntaxOptions::
|
||||
update(std::map<std::string,xmlrpc_c::value>const& param)
|
||||
{
|
||||
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||
// params_t::const_iterator si = param.find("xml-input");
|
||||
// if (si != param.end())
|
||||
// xml_policy = Scan<XmlInputType>(xmlrpc_c::value_string(si->second));
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
23
moses/parameters/SyntaxOptions.h
Normal file
23
moses/parameters/SyntaxOptions.h
Normal file
@ -0,0 +1,23 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "moses/Parameter.h"
|
||||
#include <string>
|
||||
#include "OptionsBaseClass.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
struct
|
||||
SyntaxOptions : public OptionsBaseClass
|
||||
{
|
||||
S2TParsingAlgorithm s2t_parsing_algo;
|
||||
|
||||
SyntaxOptions();
|
||||
|
||||
bool init(Parameter const& param);
|
||||
bool update(std::map<std::string,xmlrpc_c::value>const& param);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user