Option bundling.

This commit is contained in:
Ulrich Germann 2015-05-27 20:45:55 +01:00
parent c086a8ee50
commit 7ff1f9c063
6 changed files with 179 additions and 81 deletions

View File

@ -63,8 +63,8 @@ StaticData::StaticData()
: m_sourceStartPosMattersForRecombination(false)
, m_requireSortingAfterSourceContext(false)
, m_inputType(SentenceInput)
, m_onlyDistinctNBest(false)
, m_needAlignmentInfo(false)
// , m_onlyDistinctNBest(false)
// , m_needAlignmentInfo(false)
, m_lmEnableOOVFeature(false)
, m_isAlwaysCreateDirectTranslationOption(false)
, m_currentWeightSetting("default")
@ -203,25 +203,26 @@ StaticData
//word-to-word alignment
// alignments
m_parameter->SetParameter(m_PrintAlignmentInfo, "print-alignment-info", false );
if (m_PrintAlignmentInfo) {
m_needAlignmentInfo = true;
}
// if (m_PrintAlignmentInfo) { // => now in BookkeepingOptions::init()
// m_needAlignmentInfo = true;
// }
m_parameter->SetParameter(m_wordAlignmentSort, "sort-word-alignment", NoSort);
if (m_PrintAlignmentInfoNbest) {
m_needAlignmentInfo = true;
}
// if (m_PrintAlignmentInfoNbest) { // => now in BookkeepingOptions::init()
// m_needAlignmentInfo = true;
// }
params = m_parameter->GetParam("alignment-output-file");
if (params && params->size()) {
m_alignmentOutputFile = Scan<std::string>(params->at(0));
m_needAlignmentInfo = true;
// m_needAlignmentInfo = true; // => now in BookkeepingOptions::init()
}
m_parameter->SetParameter( m_PrintID, "print-id", false );
m_parameter->SetParameter( m_PrintPassthroughInformation, "print-passthrough", false );
m_parameter->SetParameter( m_PrintPassthroughInformationInNBest, "print-passthrough-in-n-best", false );
// m_parameter->SetParameter( m_PrintPassthroughInformationInNBest, "print-passthrough-in-n-best", false ); // => now in BookkeepingOptions::init()
// word graph
params = m_parameter->GetParam("output-word-graph");
@ -327,41 +328,7 @@ bool
StaticData
::ini_nbest_options()
{
const PARAM_VEC *params;
// n-best
params = m_parameter->GetParam("n-best-list");
if (params) {
if (params->size() >= 2) {
m_nBestFilePath = params->at(0);
m_nBestSize = Scan<size_t>( params->at(1) );
m_onlyDistinctNBest=(params->size()>2 && params->at(2)=="distinct");
} else {
std::cerr << "wrong format for switch -n-best-list file size [disinct]";
return false;
}
} else {
m_nBestSize = 0;
}
m_parameter->SetParameter<size_t>(m_nBestFactor, "n-best-factor", 20);
m_parameter->SetParameter(m_PrintAlignmentInfoNbest,
"print-alignment-info-in-n-best", false );
// include feature names in the n-best list
m_parameter->SetParameter(m_labeledNBestList, "labeled-n-best-list", true );
// include word alignment in the n-best list
m_parameter->SetParameter(m_nBestIncludesSegmentation,
"include-segmentation-in-n-best", false );
// print all factors of output translations
m_parameter->SetParameter(m_reportAllFactorsNBest,
"report-all-factors-in-n-best", false );
m_parameter->SetParameter(m_printNBestTrees, "n-best-trees", false );
return true;
return m_nbest_options.init(*m_parameter);
}
void
@ -625,8 +592,9 @@ bool StaticData::LoadData(Parameter *parameter)
// input, output
ini_factor_maps();
ini_input_options();
m_bookkeeping_options.init(*parameter);
m_nbest_options.init(*parameter); // if (!ini_nbest_options()) return false;
if (!ini_output_options()) return false;
if (!ini_nbest_options()) return false;
// threading etc.
if (!ini_performance_options()) return false;
@ -647,6 +615,17 @@ bool StaticData::LoadData(Parameter *parameter)
ini_mira_options();
// set m_nbest_options.enabled = true if necessary:
if (m_mbr || m_useLatticeMBR || m_outputSearchGraph || m_outputSearchGraphSLF
|| m_mira || m_outputSearchGraphHypergraph || m_useConsensusDecoding
#ifdef HAVE_PROTOBUF
|| m_outputSearchGraphPB
#endif
|| m_latticeSamplesFilePath.size())
{
m_nbest_options.enabled = true;
}
// S2T decoder
m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm",
RecursiveCYKPlus);

View File

@ -45,6 +45,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/PP/Factory.h"
#include "moses/parameters/ContextParameters.h"
#include "moses/parameters/NBestOptions.h"
#include "moses/parameters/BookkeepingOptions.h"
namespace Moses
{
@ -95,18 +97,21 @@ protected:
// 0 = no disortion (monotone in old pharaoh)
bool m_reorderingConstraint; //! use additional reordering constraints
bool m_useEarlyDistortionCost;
size_t
m_maxHypoStackSize //! hypothesis-stack size that triggers pruning
, m_minHypoStackDiversity //! minimum number of hypothesis in stack for each source word coverage
, m_nBestSize
, m_latticeSamplesSize
, m_nBestFactor
, m_maxNoTransOptPerCoverage
, m_maxNoPartTransOpt
, m_maxPhraseLength;
size_t m_maxHypoStackSize; //! hypothesis-stack size that triggers pruning
size_t m_minHypoStackDiversity; //! minimum number of hypothesis in stack for each source word coverage;
NBestOptions m_nbest_options;
BookkeepingOptions m_bookkeeping_options;
// size_t m_nBestSize;
// size_t m_nBestFactor;
size_t m_latticeSamplesSize;
size_t m_maxNoTransOptPerCoverage;
size_t m_maxNoPartTransOpt;
size_t m_maxPhraseLength;
std::string m_nBestFilePath, m_latticeSamplesFilePath;
bool m_labeledNBestList,m_nBestIncludesSegmentation;
// std::string m_nBestFilePath;
std::string m_latticeSamplesFilePath;
// bool m_labeledNBestList,m_nBestIncludesSegmentation;
bool m_dropUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = drop (ignore) them
bool m_markUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = mark and (ignore) them
bool m_wordDeletionEnabled;
@ -128,21 +133,21 @@ protected:
bool m_reportSegmentation;
bool m_reportSegmentationEnriched;
bool m_reportAllFactors;
bool m_reportAllFactorsNBest;
// bool m_reportAllFactorsNBest;
std::string m_detailedTranslationReportingFilePath;
std::string m_detailedTreeFragmentsTranslationReportingFilePath;
//DIMw
std::string m_detailedAllTranslationReportingFilePath;
bool m_onlyDistinctNBest;
// bool m_onlyDistinctNBest;
bool m_PrintAlignmentInfo;
bool m_needAlignmentInfo;
bool m_PrintAlignmentInfoNbest;
// bool m_needAlignmentInfo; // => BookkeepingOptions
// bool m_PrintAlignmentInfoNbest;
bool m_PrintID;
bool m_PrintPassthroughInformation;
bool m_PrintPassthroughInformationInNBest;
// bool m_PrintPassthroughInformationInNBest;
std::string m_alignmentOutputFile;
@ -214,7 +219,7 @@ protected:
bool m_useLegacyPT;
bool m_defaultNonTermOnlyForEmptyRange;
S2TParsingAlgorithm m_s2tParsingAlgorithm;
bool m_printNBestTrees;
// bool m_printNBestTrees;
FeatureRegistry m_registry;
PhrasePropertyFactory m_phrasePropertyFactory;
@ -361,7 +366,8 @@ public:
return m_PrintPassthroughInformation;
}
bool IsPassthroughInNBestEnabled() const {
return m_PrintPassthroughInformationInNBest;
return m_nbest_options.include_passthrough;
// return m_PrintPassthroughInformationInNBest;
}
int GetMaxDistortion() const {
return m_maxDistortion;
@ -410,7 +416,8 @@ public:
return m_reportAllFactors;
}
bool GetReportAllFactorsNBest() const {
return m_reportAllFactorsNBest;
return m_nbest_options.include_all_factors;
// return m_reportAllFactorsNBest;
}
bool IsDetailedTranslationReportingEnabled() const {
return !m_detailedTranslationReportingFilePath.empty();
@ -430,7 +437,8 @@ public:
return m_detailedTreeFragmentsTranslationReportingFilePath;
}
bool IsLabeledNBestList() const {
return m_labeledNBestList;
return m_nbest_options.include_feature_labels;
// return m_labeledNBestList;
}
bool UseMinphrInMemory() const {
@ -443,21 +451,24 @@ public:
// for mert
size_t GetNBestSize() const {
return m_nBestSize;
return m_nbest_options.nbest_size;
// return m_nBestSize;
}
const std::string &GetNBestFilePath() const {
return m_nBestFilePath;
return m_nbest_options.output_file_path;
// return m_nBestFilePath;
}
bool IsNBestEnabled() const {
return (!m_nBestFilePath.empty() || m_mbr || m_useLatticeMBR || m_mira ||
m_outputSearchGraph || m_outputSearchGraphSLF ||
m_outputSearchGraphHypergraph || m_useConsensusDecoding ||
#ifdef HAVE_PROTOBUF
m_outputSearchGraphPB ||
#endif
!m_latticeSamplesFilePath.empty());
return m_nbest_options.enabled;
// return (!m_nBestFilePath.empty() || m_mbr || m_useLatticeMBR || m_mira ||
// m_outputSearchGraph || m_outputSearchGraphSLF ||
// m_outputSearchGraphHypergraph || m_useConsensusDecoding ||
// #ifdef HAVE_PROTOBUF
// m_outputSearchGraphPB ||
// #endif
// !m_latticeSamplesFilePath.empty());
}
size_t GetLatticeSamplesSize() const {
@ -469,7 +480,8 @@ public:
}
size_t GetNBestFactor() const {
return m_nBestFactor;
return m_nbest_options.factor;
// return m_nBestFactor;
}
bool GetOutputWordGraph() const {
return m_outputWordGraph;
@ -527,7 +539,8 @@ public:
void SetWeights(const FeatureFunction* sp, const std::vector<float>& weights);
bool GetDistinctNBest() const {
return m_onlyDistinctNBest;
return m_nbest_options.only_distinct;
// return m_onlyDistinctNBest;
}
const std::string& GetFactorDelimiter() const {
return m_factorDelimiter;
@ -692,7 +705,8 @@ public:
const std::string &GetBinDirectory() const;
bool NeedAlignmentInfo() const {
return m_needAlignmentInfo;
return m_bookkeeping_options.need_alignment_info;
// return m_needAlignmentInfo;
}
const std::string &GetAlignmentOutputFile() const {
return m_alignmentOutputFile;
@ -701,14 +715,16 @@ public:
return m_PrintAlignmentInfo;
}
bool PrintAlignmentInfoInNbest() const {
return m_PrintAlignmentInfoNbest;
return m_nbest_options.include_alignment_info;
// return m_PrintAlignmentInfoNbest;
}
WordAlignmentSort GetWordAlignmentSort() const {
return m_wordAlignmentSort;
}
bool NBestIncludesSegmentation() const {
return m_nBestIncludesSegmentation;
return m_nbest_options.include_segmentation;
// return m_nBestIncludesSegmentation;
}
bool GetHasAlternateWeightSettings() const {
@ -849,7 +865,8 @@ public:
}
bool PrintNBestTrees() const {
return m_printNBestTrees;
return m_nbest_options.print_trees;
// return m_printNBestTrees;
}
bool RequireSortingAfterSourceContext() const {

View File

@ -0,0 +1,18 @@
#include "BookkeepingOptions.h"
namespace Moses {
bool
BookkeepingOptions::
init(Parameter const& P)
{
bool& x = need_alignment_info;
P.SetParameter(x, "print-alignment-info", false);
if (!x) P.SetParameter(x, "print-alignment-info-in-n-best", false);
if (!x)
{
PARAM_VEC const* params = P.GetParam("alignment-output-file");
x = params && params->size();
}
return true;
}
}

View File

@ -0,0 +1,15 @@
// -*- mode: c++; cc-style: gnu -*-
#include "moses/Parameter.h"
// #include <string>
namespace Moses {
struct BookkeepingOptions
{
bool need_alignment_info;
bool init(Parameter const& param);
};
}

View File

@ -0,0 +1,40 @@
// -*- mode: c++; cc-style: gnu -*-
#include "moses/Parameter.h"
#include "NBestOptions.h"
namespace Moses {
bool
NBestOptions::
init(Parameter const& P)
{
const PARAM_VEC *params;
params = P.GetParam("n-best-list");
if (params)
{
if (params->size() >= 2)
{
output_file_path = params->at(0);
nbest_size = Scan<size_t>( params->at(1) );
only_distinct = (params->size()>2 && params->at(2)=="distinct");
}
else
{
std::cerr << "wrong format for switch -n-best-list file size [disinct]";
return false;
}
}
else nbest_size = 0;
P.SetParameter<size_t>(factor, "n-best-factor", 20);
P.SetParameter(include_alignment_info, "print-alignment-info-in-n-best", false );
P.SetParameter(include_feature_labels, "labeled-n-best-list", true );
P.SetParameter(include_segmentation, "include-segmentation-in-n-best", false );
P.SetParameter(include_passthrough, "print-passthrough-in-n-best", false );
P.SetParameter(include_all_factors, "report-all-factors-in-n-best", false );
P.SetParameter(print_trees, "n-best-trees", false );
enabled = output_file_path.size();
return true;
}
} // namespace Moses

View File

@ -0,0 +1,29 @@
// -*- mode: c++; cc-style: gnu -*-
#include <string>
namespace Moses {
struct NBestOptions
{
size_t nbest_size;
size_t factor;
bool enabled;
bool print_trees;
bool only_distinct;
bool include_alignment_info;
bool include_segmentation;
bool include_feature_labels;
bool include_passthrough;
bool include_all_factors;
std::string output_file_path;
bool init(Parameter const& param);
};
}