mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 21:42:19 +03:00
Code reorganization.
This commit is contained in:
parent
37bb1de9ed
commit
ba6e17ce26
@ -99,25 +99,55 @@ bool StaticData::LoadDataStatic(Parameter *parameter, const std::string &execPat
|
||||
return s_instance.LoadData(parameter);
|
||||
}
|
||||
|
||||
bool StaticData::LoadData(Parameter *parameter)
|
||||
void
|
||||
StaticData
|
||||
::initialize_features()
|
||||
{
|
||||
ResetUserTime();
|
||||
m_parameter = parameter;
|
||||
std::map<std::string, std::string> featureNameOverride = OverrideFeatureNames();
|
||||
// all features
|
||||
map<string, int> featureIndexMap;
|
||||
|
||||
const PARAM_VEC* params = m_parameter->GetParam("feature");
|
||||
for (size_t i = 0; params && i < params->size(); ++i) {
|
||||
const string &line = Trim(params->at(i));
|
||||
VERBOSE(1,"line=" << line << endl);
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
vector<string> toks = Tokenize(line);
|
||||
|
||||
string &feature = toks[0];
|
||||
std::map<std::string, std::string>::const_iterator iter
|
||||
= featureNameOverride.find(feature);
|
||||
if (iter == featureNameOverride.end()) {
|
||||
// feature name not override
|
||||
m_registry.Construct(feature, line);
|
||||
} else {
|
||||
// replace feature name with new name
|
||||
string newName = iter->second;
|
||||
feature = newName;
|
||||
string newLine = Join(" ", toks);
|
||||
m_registry.Construct(newName, newLine);
|
||||
}
|
||||
}
|
||||
|
||||
NoCache();
|
||||
OverrideFeatures();
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_input_options()
|
||||
{
|
||||
const PARAM_VEC *params;
|
||||
|
||||
// verbose level
|
||||
m_parameter->SetParameter(m_verboseLevel, "verbose", (size_t) 1);
|
||||
|
||||
// to cube or not to cube
|
||||
m_parameter->SetParameter(m_searchAlgorithm, "search-algorithm", Normal);
|
||||
|
||||
if (IsSyntax())
|
||||
LoadChartDecodingParameters();
|
||||
|
||||
// input type has to be specified BEFORE loading the phrase tables!
|
||||
m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput);
|
||||
|
||||
m_parameter->SetParameter(m_continuePartialTranslation,
|
||||
"continue-partial-translation", false );
|
||||
|
||||
std::string s_it = "text input";
|
||||
if (m_inputType == 1) {
|
||||
s_it = "confusion net";
|
||||
@ -130,31 +160,55 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
}
|
||||
VERBOSE(2,"input type is: "<<s_it<<"\n");
|
||||
|
||||
// use of xml in input
|
||||
m_parameter->SetParameter<XmlInputType>(m_xmlInputType, "xml-input", XmlPassThrough);
|
||||
|
||||
// specify XML tags opening and closing brackets for XML option
|
||||
params = m_parameter->GetParam("xml-brackets");
|
||||
if (params && params->size()) {
|
||||
std::vector<std::string> brackets = Tokenize(params->at(0));
|
||||
if(brackets.size()!=2) {
|
||||
cerr << "invalid xml-brackets value, must specify exactly 2 blank-delimited strings for XML tags opening and closing brackets" << endl;
|
||||
exit(1);
|
||||
}
|
||||
m_xmlBrackets.first= brackets[0];
|
||||
m_xmlBrackets.second=brackets[1];
|
||||
VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
|
||||
<< m_xmlBrackets.first << " and " << m_xmlBrackets.second << endl);
|
||||
}
|
||||
|
||||
m_parameter->SetParameter(m_defaultNonTermOnlyForEmptyRange,
|
||||
"default-non-term-for-empty-range-only", false );
|
||||
|
||||
}
|
||||
|
||||
bool
|
||||
StaticData
|
||||
::ini_output_options()
|
||||
{
|
||||
const PARAM_VEC *params;
|
||||
|
||||
// verbose level
|
||||
m_parameter->SetParameter(m_verboseLevel, "verbose", (size_t) 1);
|
||||
|
||||
|
||||
m_parameter->SetParameter(m_recoverPath, "recover-input-path", false);
|
||||
if (m_recoverPath && m_inputType == SentenceInput) {
|
||||
TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n");
|
||||
m_recoverPath = false;
|
||||
}
|
||||
|
||||
// factor delimiter
|
||||
m_parameter->SetParameter<string>(m_factorDelimiter, "factor-delimiter", "|");
|
||||
if (m_factorDelimiter == "none") {
|
||||
m_factorDelimiter = "";
|
||||
}
|
||||
|
||||
m_parameter->SetParameter( m_continuePartialTranslation, "continue-partial-translation", false );
|
||||
m_parameter->SetParameter( m_outputHypoScore, "output-hypo-score", false );
|
||||
m_parameter->SetParameter(m_outputHypoScore, "output-hypo-score", false );
|
||||
|
||||
//word-to-word alignment
|
||||
// alignments
|
||||
m_parameter->SetParameter( m_PrintAlignmentInfo, "print-alignment-info", false );
|
||||
m_parameter->SetParameter(m_PrintAlignmentInfo, "print-alignment-info", false );
|
||||
if (m_PrintAlignmentInfo) {
|
||||
m_needAlignmentInfo = true;
|
||||
}
|
||||
|
||||
m_parameter->SetParameter(m_wordAlignmentSort, "sort-word-alignment", NoSort);
|
||||
|
||||
m_parameter->SetParameter( m_PrintAlignmentInfoNbest, "print-alignment-info-in-n-best", false );
|
||||
if (m_PrintAlignmentInfoNbest) {
|
||||
m_needAlignmentInfo = true;
|
||||
}
|
||||
@ -169,37 +223,6 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
m_parameter->SetParameter( m_PrintPassthroughInformation, "print-passthrough", false );
|
||||
m_parameter->SetParameter( m_PrintPassthroughInformationInNBest, "print-passthrough-in-n-best", false );
|
||||
|
||||
// n-best
|
||||
params = m_parameter->GetParam("n-best-list");
|
||||
if (params) {
|
||||
if (params->size() >= 2) {
|
||||
m_nBestFilePath = params->at(0);
|
||||
m_nBestSize = Scan<size_t>( params->at(1) );
|
||||
m_onlyDistinctNBest=(params->size()>2 && params->at(2)=="distinct");
|
||||
} else {
|
||||
std::cerr << "wrong format for switch -n-best-list file size [disinct]";
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
m_nBestSize = 0;
|
||||
}
|
||||
|
||||
m_parameter->SetParameter<size_t>(m_nBestFactor, "n-best-factor", 20);
|
||||
|
||||
//lattice samples
|
||||
params = m_parameter->GetParam("lattice-samples");
|
||||
if (params) {
|
||||
if (params->size() ==2 ) {
|
||||
m_latticeSamplesFilePath = params->at(0);
|
||||
m_latticeSamplesSize = Scan<size_t>(params->at(1));
|
||||
} else {
|
||||
std::cerr <<"wrong format for switch -lattice-samples file size";
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
m_latticeSamplesSize = 0;
|
||||
}
|
||||
|
||||
// word graph
|
||||
params = m_parameter->GetParam("output-word-graph");
|
||||
if (params && params->size() == 2)
|
||||
@ -260,12 +283,6 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
|
||||
m_parameter->SetParameter<string>(m_outputUnknownsFile, "output-unknowns", "");
|
||||
|
||||
// include feature names in the n-best list
|
||||
m_parameter->SetParameter( m_labeledNBestList, "labeled-n-best-list", true );
|
||||
|
||||
// include word alignment in the n-best list
|
||||
m_parameter->SetParameter( m_nBestIncludesSegmentation, "include-segmentation-in-n-best", false );
|
||||
|
||||
// printing source phrase spans
|
||||
m_parameter->SetParameter( m_reportSegmentation, "report-segmentation", false );
|
||||
m_parameter->SetParameter( m_reportSegmentationEnriched, "report-segmentation-enriched", false );
|
||||
@ -273,34 +290,6 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
// print all factors of output translations
|
||||
m_parameter->SetParameter( m_reportAllFactors, "report-all-factors", false );
|
||||
|
||||
// print all factors of output translations
|
||||
m_parameter->SetParameter( m_reportAllFactorsNBest, "report-all-factors-in-n-best", false );
|
||||
|
||||
//input factors
|
||||
params = m_parameter->GetParam("input-factors");
|
||||
if (params) {
|
||||
m_inputFactorOrder = Scan<FactorType>(*params);
|
||||
}
|
||||
if(m_inputFactorOrder.empty()) {
|
||||
m_inputFactorOrder.push_back(0);
|
||||
}
|
||||
|
||||
//output factors
|
||||
params = m_parameter->GetParam("output-factors");
|
||||
if (params) {
|
||||
m_outputFactorOrder = Scan<FactorType>(*params);
|
||||
}
|
||||
if(m_outputFactorOrder.empty()) {
|
||||
// default. output factor 0
|
||||
m_outputFactorOrder.push_back(0);
|
||||
}
|
||||
|
||||
//source word deletion
|
||||
m_parameter->SetParameter(m_wordDeletionEnabled, "phrase-drop-allowed", false );
|
||||
|
||||
//Disable discarding
|
||||
m_parameter->SetParameter(m_disableDiscarding, "disable-discarding", false);
|
||||
|
||||
//Print Translation Options
|
||||
m_parameter->SetParameter(m_printTranslationOptions, "print-translation-option", false );
|
||||
|
||||
@ -314,106 +303,92 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
//DIMw
|
||||
m_parameter->SetParameter<string>(m_detailedAllTranslationReportingFilePath, "translation-all-details", "");
|
||||
|
||||
// reordering constraints
|
||||
m_parameter->SetParameter(m_maxDistortion, "distortion-limit", -1);
|
||||
m_parameter->SetParameter<long>(m_startTranslationId, "start-translation-id", 0);
|
||||
|
||||
m_parameter->SetParameter(m_reorderingConstraint, "monotone-at-punctuation", false );
|
||||
|
||||
// settings for pruning
|
||||
m_parameter->SetParameter(m_maxHypoStackSize, "stack", DEFAULT_MAX_HYPOSTACK_SIZE);
|
||||
|
||||
m_minHypoStackDiversity = 0;
|
||||
params = m_parameter->GetParam("stack-diversity");
|
||||
if (params && params->size()) {
|
||||
if (m_maxDistortion > 15) {
|
||||
std::cerr << "stack diversity > 0 is not allowed for distortion limits larger than 15";
|
||||
return false;
|
||||
}
|
||||
if (m_inputType == WordLatticeInput) {
|
||||
std::cerr << "stack diversity > 0 is not allowed for lattice input";
|
||||
return false;
|
||||
}
|
||||
m_minHypoStackDiversity = Scan<size_t>(params->at(0));
|
||||
}
|
||||
|
||||
m_parameter->SetParameter(m_beamWidth, "beam-threshold", DEFAULT_BEAM_WIDTH);
|
||||
m_beamWidth = TransformScore(m_beamWidth);
|
||||
|
||||
m_parameter->SetParameter(m_earlyDiscardingThreshold, "early-discarding-threshold", DEFAULT_EARLY_DISCARDING_THRESHOLD);
|
||||
m_earlyDiscardingThreshold = TransformScore(m_earlyDiscardingThreshold);
|
||||
|
||||
m_parameter->SetParameter(m_translationOptionThreshold, "translation-option-threshold", DEFAULT_TRANSLATION_OPTION_THRESHOLD);
|
||||
m_translationOptionThreshold = TransformScore(m_translationOptionThreshold);
|
||||
|
||||
m_parameter->SetParameter(m_maxNoTransOptPerCoverage, "max-trans-opt-per-coverage", DEFAULT_MAX_TRANS_OPT_SIZE);
|
||||
m_parameter->SetParameter(m_maxNoPartTransOpt, "max-partial-trans-opt", DEFAULT_MAX_PART_TRANS_OPT_SIZE);
|
||||
m_parameter->SetParameter(m_maxPhraseLength, "max-phrase-length", DEFAULT_MAX_PHRASE_LENGTH);
|
||||
m_parameter->SetParameter(m_cubePruningPopLimit, "cube-pruning-pop-limit", DEFAULT_CUBE_PRUNING_POP_LIMIT);
|
||||
m_parameter->SetParameter(m_cubePruningDiversity, "cube-pruning-diversity", DEFAULT_CUBE_PRUNING_DIVERSITY);
|
||||
|
||||
m_parameter->SetParameter(m_cubePruningLazyScoring, "cube-pruning-lazy-scoring", false);
|
||||
|
||||
// early distortion cost
|
||||
m_parameter->SetParameter(m_useEarlyDistortionCost, "early-distortion-cost", false );
|
||||
|
||||
// unknown word processing
|
||||
m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false );
|
||||
m_parameter->SetParameter(m_markUnknown, "mark-unknown", false );
|
||||
|
||||
m_parameter->SetParameter(m_lmEnableOOVFeature, "lmodel-oov-feature", false);
|
||||
|
||||
// minimum Bayes risk decoding
|
||||
m_parameter->SetParameter(m_mbr, "minimum-bayes-risk", false );
|
||||
m_parameter->SetParameter<size_t>(m_mbrSize, "mbr-size", 200);
|
||||
m_parameter->SetParameter(m_mbrScale, "mbr-scale", 1.0f);
|
||||
|
||||
//lattice mbr
|
||||
m_parameter->SetParameter(m_useLatticeMBR, "lminimum-bayes-risk", false );
|
||||
if (m_useLatticeMBR && m_mbr) {
|
||||
cerr << "Error: Cannot use both n-best mbr and lattice mbr together" << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
//mira training
|
||||
m_parameter->SetParameter(m_mira, "mira", false );
|
||||
|
||||
// lattice MBR
|
||||
if (m_useLatticeMBR) m_mbr = true;
|
||||
|
||||
m_parameter->SetParameter<size_t>(m_lmbrPruning, "lmbr-pruning-factor", 30);
|
||||
m_parameter->SetParameter(m_lmbrPrecision, "lmbr-p", 0.8f);
|
||||
m_parameter->SetParameter(m_lmbrPRatio, "lmbr-r", 0.6f);
|
||||
m_parameter->SetParameter(m_lmbrMapWeight, "lmbr-map-weight", 0.0f);
|
||||
m_parameter->SetParameter(m_useLatticeHypSetForLatticeMBR, "lattice-hypo-set", false );
|
||||
|
||||
params = m_parameter->GetParam("lmbr-thetas");
|
||||
//lattice samples
|
||||
params = m_parameter->GetParam("lattice-samples");
|
||||
if (params) {
|
||||
m_lmbrThetas = Scan<float>(*params);
|
||||
if (params->size() ==2 ) {
|
||||
m_latticeSamplesFilePath = params->at(0);
|
||||
m_latticeSamplesSize = Scan<size_t>(params->at(1));
|
||||
} else {
|
||||
std::cerr <<"wrong format for switch -lattice-samples file size";
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
m_latticeSamplesSize = 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
StaticData
|
||||
::ini_nbest_options()
|
||||
{
|
||||
const PARAM_VEC *params;
|
||||
// n-best
|
||||
params = m_parameter->GetParam("n-best-list");
|
||||
if (params) {
|
||||
if (params->size() >= 2) {
|
||||
m_nBestFilePath = params->at(0);
|
||||
m_nBestSize = Scan<size_t>( params->at(1) );
|
||||
m_onlyDistinctNBest=(params->size()>2 && params->at(2)=="distinct");
|
||||
} else {
|
||||
std::cerr << "wrong format for switch -n-best-list file size [disinct]";
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
m_nBestSize = 0;
|
||||
}
|
||||
|
||||
//consensus decoding
|
||||
m_parameter->SetParameter(m_useConsensusDecoding, "consensus-decoding", false );
|
||||
if (m_useConsensusDecoding && m_mbr) {
|
||||
cerr<< "Error: Cannot use consensus decoding together with mbr" << endl;
|
||||
exit(1);
|
||||
}
|
||||
if (m_useConsensusDecoding) m_mbr=true;
|
||||
m_parameter->SetParameter<size_t>(m_nBestFactor, "n-best-factor", 20);
|
||||
|
||||
m_parameter->SetParameter(m_defaultNonTermOnlyForEmptyRange, "default-non-term-for-empty-range-only", false );
|
||||
|
||||
m_parameter->SetParameter(m_PrintAlignmentInfoNbest,
|
||||
"print-alignment-info-in-n-best", false );
|
||||
|
||||
// include feature names in the n-best list
|
||||
m_parameter->SetParameter(m_labeledNBestList, "labeled-n-best-list", true );
|
||||
|
||||
// include word alignment in the n-best list
|
||||
m_parameter->SetParameter(m_nBestIncludesSegmentation,
|
||||
"include-segmentation-in-n-best", false );
|
||||
|
||||
// print all factors of output translations
|
||||
m_parameter->SetParameter(m_reportAllFactorsNBest,
|
||||
"report-all-factors-in-n-best", false );
|
||||
|
||||
m_parameter->SetParameter(m_printNBestTrees, "n-best-trees", false );
|
||||
return true;
|
||||
}
|
||||
|
||||
// S2T decoder
|
||||
m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm", RecursiveCYKPlus);
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_compact_table_options()
|
||||
{
|
||||
// Compact phrase table and reordering model
|
||||
m_parameter->SetParameter(m_minphrMemory, "minphr-memory", false );
|
||||
m_parameter->SetParameter(m_minlexrMemory, "minlexr-memory", false );
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_lm_options()
|
||||
{
|
||||
m_parameter->SetParameter<size_t>(m_lmcache_cleanup_threshold, "clean-lm-cache", 1);
|
||||
}
|
||||
|
||||
// threads, timeouts, etc.
|
||||
bool
|
||||
StaticData
|
||||
::ini_performance_options()
|
||||
{
|
||||
const PARAM_VEC *params;
|
||||
m_parameter->SetParameter<size_t>(m_timeout_threshold, "time-out", -1);
|
||||
m_timeout = (GetTimeoutThreshold() == (size_t)-1) ? false : true;
|
||||
|
||||
m_parameter->SetParameter<size_t>(m_lmcache_cleanup_threshold, "clean-lm-cache", 1);
|
||||
|
||||
m_threadCount = 1;
|
||||
params = m_parameter->GetParam("threads");
|
||||
if (params && params->size()) {
|
||||
@ -436,80 +411,260 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
}
|
||||
#ifndef WITH_THREADS
|
||||
if (m_threadCount > 1) {
|
||||
std::cerr << "Error: Thread count of " << params->at(0) << " but moses not built with thread support";
|
||||
std::cerr << "Error: Thread count of " << params->at(0)
|
||||
<< " but moses not built with thread support";
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
m_parameter->SetParameter<long>(m_startTranslationId, "start-translation-id", 0);
|
||||
void
|
||||
StaticData
|
||||
::ini_cube_pruning_options()
|
||||
{
|
||||
m_parameter->SetParameter(m_cubePruningPopLimit, "cube-pruning-pop-limit",
|
||||
DEFAULT_CUBE_PRUNING_POP_LIMIT);
|
||||
m_parameter->SetParameter(m_cubePruningDiversity, "cube-pruning-diversity",
|
||||
DEFAULT_CUBE_PRUNING_DIVERSITY);
|
||||
m_parameter->SetParameter(m_cubePruningLazyScoring, "cube-pruning-lazy-scoring",
|
||||
false);
|
||||
}
|
||||
|
||||
// use of xml in input
|
||||
m_parameter->SetParameter<XmlInputType>(m_xmlInputType, "xml-input", XmlPassThrough);
|
||||
|
||||
// specify XML tags opening and closing brackets for XML option
|
||||
params = m_parameter->GetParam("xml-brackets");
|
||||
if (params && params->size()) {
|
||||
std::vector<std::string> brackets = Tokenize(params->at(0));
|
||||
if(brackets.size()!=2) {
|
||||
cerr << "invalid xml-brackets value, must specify exactly 2 blank-delimited strings for XML tags opening and closing brackets" << endl;
|
||||
exit(1);
|
||||
}
|
||||
m_xmlBrackets.first= brackets[0];
|
||||
m_xmlBrackets.second=brackets[1];
|
||||
VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
|
||||
<< m_xmlBrackets.first << " and " << m_xmlBrackets.second << endl);
|
||||
void
|
||||
StaticData
|
||||
::ini_factor_maps()
|
||||
{
|
||||
const PARAM_VEC *params;
|
||||
// factor delimiter
|
||||
m_parameter->SetParameter<string>(m_factorDelimiter, "factor-delimiter", "|");
|
||||
if (m_factorDelimiter == "none") {
|
||||
m_factorDelimiter = "";
|
||||
}
|
||||
|
||||
//input factors
|
||||
params = m_parameter->GetParam("input-factors");
|
||||
if (params) {
|
||||
m_inputFactorOrder = Scan<FactorType>(*params);
|
||||
}
|
||||
if(m_inputFactorOrder.empty()) {
|
||||
m_inputFactorOrder.push_back(0);
|
||||
}
|
||||
|
||||
//output factors
|
||||
params = m_parameter->GetParam("output-factors");
|
||||
if (params) {
|
||||
m_outputFactorOrder = Scan<FactorType>(*params);
|
||||
}
|
||||
if(m_outputFactorOrder.empty()) {
|
||||
// default. output factor 0
|
||||
m_outputFactorOrder.push_back(0);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_oov_options()
|
||||
{
|
||||
// unknown word processing
|
||||
m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false );
|
||||
m_parameter->SetParameter(m_markUnknown, "mark-unknown", false );
|
||||
|
||||
m_parameter->SetParameter(m_lmEnableOOVFeature, "lmodel-oov-feature", false);
|
||||
|
||||
//source word deletion
|
||||
m_parameter->SetParameter(m_wordDeletionEnabled, "phrase-drop-allowed", false );
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_distortion_options()
|
||||
{
|
||||
// reordering constraints
|
||||
m_parameter->SetParameter(m_maxDistortion, "distortion-limit", -1);
|
||||
|
||||
m_parameter->SetParameter(m_reorderingConstraint, "monotone-at-punctuation", false );
|
||||
|
||||
// early distortion cost
|
||||
m_parameter->SetParameter(m_useEarlyDistortionCost, "early-distortion-cost", false );
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
bool
|
||||
StaticData
|
||||
::ini_stack_decoding_options()
|
||||
{
|
||||
const PARAM_VEC *params;
|
||||
// settings for pruning
|
||||
m_parameter->SetParameter(m_maxHypoStackSize, "stack", DEFAULT_MAX_HYPOSTACK_SIZE);
|
||||
|
||||
m_minHypoStackDiversity = 0;
|
||||
params = m_parameter->GetParam("stack-diversity");
|
||||
if (params && params->size()) {
|
||||
if (m_maxDistortion > 15) {
|
||||
std::cerr << "stack diversity > 0 is not allowed for distortion limits larger than 15";
|
||||
return false;
|
||||
}
|
||||
if (m_inputType == WordLatticeInput) {
|
||||
std::cerr << "stack diversity > 0 is not allowed for lattice input";
|
||||
return false;
|
||||
}
|
||||
m_minHypoStackDiversity = Scan<size_t>(params->at(0));
|
||||
}
|
||||
|
||||
m_parameter->SetParameter(m_beamWidth, "beam-threshold", DEFAULT_BEAM_WIDTH);
|
||||
m_beamWidth = TransformScore(m_beamWidth);
|
||||
|
||||
m_parameter->SetParameter(m_earlyDiscardingThreshold, "early-discarding-threshold", DEFAULT_EARLY_DISCARDING_THRESHOLD);
|
||||
m_earlyDiscardingThreshold = TransformScore(m_earlyDiscardingThreshold);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_phrase_lookup_options()
|
||||
{
|
||||
m_parameter->SetParameter(m_translationOptionThreshold, "translation-option-threshold", DEFAULT_TRANSLATION_OPTION_THRESHOLD);
|
||||
m_translationOptionThreshold = TransformScore(m_translationOptionThreshold);
|
||||
|
||||
m_parameter->SetParameter(m_maxNoTransOptPerCoverage, "max-trans-opt-per-coverage", DEFAULT_MAX_TRANS_OPT_SIZE);
|
||||
m_parameter->SetParameter(m_maxNoPartTransOpt, "max-partial-trans-opt", DEFAULT_MAX_PART_TRANS_OPT_SIZE);
|
||||
m_parameter->SetParameter(m_maxPhraseLength, "max-phrase-length", DEFAULT_MAX_PHRASE_LENGTH);
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_zombie_options()
|
||||
{
|
||||
//Disable discarding
|
||||
m_parameter->SetParameter(m_disableDiscarding, "disable-discarding", false);
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_mbr_options()
|
||||
{
|
||||
// minimum Bayes risk decoding
|
||||
m_parameter->SetParameter(m_mbr, "minimum-bayes-risk", false );
|
||||
m_parameter->SetParameter<size_t>(m_mbrSize, "mbr-size", 200);
|
||||
m_parameter->SetParameter(m_mbrScale, "mbr-scale", 1.0f);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_lmbr_options()
|
||||
{
|
||||
const PARAM_VEC *params;
|
||||
//lattice mbr
|
||||
m_parameter->SetParameter(m_useLatticeMBR, "lminimum-bayes-risk", false );
|
||||
if (m_useLatticeMBR && m_mbr) {
|
||||
cerr << "Error: Cannot use both n-best mbr and lattice mbr together" << endl;
|
||||
exit(1);
|
||||
}
|
||||
// lattice MBR
|
||||
if (m_useLatticeMBR) m_mbr = true;
|
||||
|
||||
m_parameter->SetParameter<size_t>(m_lmbrPruning, "lmbr-pruning-factor", 30);
|
||||
m_parameter->SetParameter(m_lmbrPrecision, "lmbr-p", 0.8f);
|
||||
m_parameter->SetParameter(m_lmbrPRatio, "lmbr-r", 0.6f);
|
||||
m_parameter->SetParameter(m_lmbrMapWeight, "lmbr-map-weight", 0.0f);
|
||||
m_parameter->SetParameter(m_useLatticeHypSetForLatticeMBR, "lattice-hypo-set", false );
|
||||
|
||||
params = m_parameter->GetParam("lmbr-thetas");
|
||||
if (params) {
|
||||
m_lmbrThetas = Scan<float>(*params);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_consensus_decoding_options()
|
||||
{
|
||||
//consensus decoding
|
||||
m_parameter->SetParameter(m_useConsensusDecoding, "consensus-decoding", false );
|
||||
if (m_useConsensusDecoding && m_mbr) {
|
||||
cerr<< "Error: Cannot use consensus decoding together with mbr" << endl;
|
||||
exit(1);
|
||||
}
|
||||
if (m_useConsensusDecoding) m_mbr=true;
|
||||
}
|
||||
|
||||
void
|
||||
StaticData
|
||||
::ini_mira_options()
|
||||
{
|
||||
//mira training
|
||||
m_parameter->SetParameter(m_mira, "mira", false );
|
||||
}
|
||||
|
||||
bool StaticData::LoadData(Parameter *parameter)
|
||||
{
|
||||
ResetUserTime();
|
||||
m_parameter = parameter;
|
||||
|
||||
const PARAM_VEC *params;
|
||||
|
||||
// to cube or not to cube
|
||||
m_parameter->SetParameter(m_searchAlgorithm, "search-algorithm", Normal);
|
||||
|
||||
if (IsSyntax())
|
||||
LoadChartDecodingParameters();
|
||||
|
||||
// ORDER HERE MATTERS, SO DON'T CHANGE IT UNLESS YOU KNOW WHAT YOU ARE DOING!
|
||||
// input, output
|
||||
ini_factor_maps();
|
||||
ini_input_options();
|
||||
if (!ini_output_options()) return false;
|
||||
if (!ini_nbest_options()) return false;
|
||||
|
||||
// threading etc.
|
||||
if (!ini_performance_options()) return false;
|
||||
|
||||
// model loading
|
||||
ini_compact_table_options();
|
||||
|
||||
// search
|
||||
ini_distortion_options();
|
||||
if (!ini_stack_decoding_options()) return false;
|
||||
ini_phrase_lookup_options();
|
||||
ini_cube_pruning_options();
|
||||
|
||||
ini_oov_options();
|
||||
ini_mbr_options();
|
||||
ini_lmbr_options();
|
||||
ini_consensus_decoding_options();
|
||||
|
||||
ini_mira_options();
|
||||
|
||||
// S2T decoder
|
||||
m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm",
|
||||
RecursiveCYKPlus);
|
||||
|
||||
|
||||
ini_zombie_options(); // probably dead, or maybe not
|
||||
|
||||
m_parameter->SetParameter(m_placeHolderFactor, "placeholder-factor", NOT_FOUND);
|
||||
|
||||
std::map<std::string, std::string> featureNameOverride = OverrideFeatureNames();
|
||||
|
||||
|
||||
// FEATURE FUNCTION INITIALIZATION HAPPENS HERE ===============================
|
||||
// all features
|
||||
map<string, int> featureIndexMap;
|
||||
initialize_features();
|
||||
|
||||
params = m_parameter->GetParam("feature");
|
||||
for (size_t i = 0; params && i < params->size(); ++i) {
|
||||
const string &line = Trim(params->at(i));
|
||||
VERBOSE(1,"line=" << line << endl);
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
vector<string> toks = Tokenize(line);
|
||||
|
||||
string &feature = toks[0];
|
||||
std::map<std::string, std::string>::const_iterator iter = featureNameOverride.find(feature);
|
||||
if (iter == featureNameOverride.end()) {
|
||||
// feature name not override
|
||||
m_registry.Construct(feature, line);
|
||||
} else {
|
||||
// replace feature name with new name
|
||||
string newName = iter->second;
|
||||
feature = newName;
|
||||
string newLine = Join(" ", toks);
|
||||
m_registry.Construct(newName, newLine);
|
||||
}
|
||||
}
|
||||
|
||||
NoCache();
|
||||
OverrideFeatures();
|
||||
|
||||
if (m_parameter->GetParam("show-weights") == NULL) {
|
||||
if (m_parameter->GetParam("show-weights") == NULL)
|
||||
LoadFeatureFunctions();
|
||||
}
|
||||
|
||||
LoadDecodeGraphs();
|
||||
|
||||
|
||||
if (!CheckWeights()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
//Add any other features here.
|
||||
|
||||
// sanity check that there are no weights without an associated FF
|
||||
if (!CheckWeights()) return false;
|
||||
|
||||
//Load extra feature weights
|
||||
string weightFile;
|
||||
m_parameter->SetParameter<string>(weightFile, "weight-file", "");
|
||||
@ -525,13 +680,21 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
//Load sparse features from config (overrules weight file)
|
||||
LoadSparseWeightsFromConfig();
|
||||
|
||||
// alternate weight settings
|
||||
// load alternate weight settings
|
||||
//
|
||||
// When and where are these used??? [UG]
|
||||
//
|
||||
// Update: Just checked the manual. The config file is NOT the right
|
||||
// place to do this. [UG]
|
||||
//
|
||||
// <TODO>
|
||||
// * Eliminate alternate-weight-setting. Alternate weight settings should
|
||||
// be provided with the input, not in the config file.
|
||||
// </TODO>
|
||||
params = m_parameter->GetParam("alternate-weight-setting");
|
||||
if (params && params->size()) {
|
||||
if (!LoadAlternateWeightSettings()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (params && params->size() && !LoadAlternateWeightSettings())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -852,7 +1015,8 @@ void
|
||||
StaticData
|
||||
::InitializeForInput(ttasksptr const& ttask) const
|
||||
{
|
||||
const std::vector<FeatureFunction*> &producers = FeatureFunction::GetFeatureFunctions();
|
||||
const std::vector<FeatureFunction*> &producers
|
||||
= FeatureFunction::GetFeatureFunctions();
|
||||
for(size_t i=0; i<producers.size(); ++i) {
|
||||
FeatureFunction &ff = *producers[i];
|
||||
if (! IsFeatureFunctionIgnored(ff)) {
|
||||
@ -946,14 +1110,15 @@ bool StaticData::CheckWeights() const
|
||||
}
|
||||
|
||||
if (!weightNames.empty()) {
|
||||
cerr << "The following weights have no feature function. Maybe incorrectly spelt weights: ";
|
||||
cerr << "The following weights have no feature function. "
|
||||
<< "Maybe incorrectly spelt weights: ";
|
||||
set<string>::iterator iter;
|
||||
for (iter = weightNames.begin(); iter != weightNames.end(); ++iter) {
|
||||
cerr << *iter << ",";
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -236,6 +236,26 @@ protected:
|
||||
// number of nonterminal labels
|
||||
// size_t m_nonTerminalSize;
|
||||
|
||||
|
||||
void ini_compact_table_options();
|
||||
void ini_consensus_decoding_options();
|
||||
void ini_cube_pruning_options();
|
||||
void ini_distortion_options();
|
||||
void ini_factor_maps();
|
||||
void ini_input_options();
|
||||
void ini_lm_options();
|
||||
void ini_lmbr_options();
|
||||
void ini_mbr_options();
|
||||
void ini_mira_options();
|
||||
bool ini_nbest_options();
|
||||
void ini_oov_options();
|
||||
bool ini_output_options();
|
||||
bool ini_performance_options();
|
||||
void ini_phrase_lookup_options();
|
||||
bool ini_stack_decoding_options();
|
||||
void ini_zombie_options();
|
||||
|
||||
void initialize_features();
|
||||
public:
|
||||
|
||||
bool IsAlwaysCreateDirectTranslationOption() const {
|
||||
|
Loading…
Reference in New Issue
Block a user