From d773ad428ec65a8496dee51a3f398843fa90cf39 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Tue, 3 Nov 2015 19:36:43 +0000 Subject: [PATCH] More options cleanup. --- moses/SearchNormal.cpp | 2 +- moses/Sentence.cpp | 16 +- moses/Sentence.h | 4 +- moses/StaticData.cpp | 38 ++--- moses/StaticData.h | 157 ++---------------- moses/TranslationOptionCollection.cpp | 18 +- ...ranslationOptionCollectionConfusionNet.cpp | 2 +- moses/TreeInput.cpp | 10 +- moses/TreeInput.h | 6 +- moses/TrellisPath.cpp | 12 +- moses/TrellisPath.h | 25 ++- moses/XmlOption.cpp | 16 +- moses/XmlOption.h | 8 +- moses/parameters/InputOptions.cpp | 36 ++-- moses/parameters/InputOptions.h | 3 +- moses/parameters/ReportingOptions.cpp | 5 + moses/parameters/ReportingOptions.h | 2 + 17 files changed, 128 insertions(+), 232 deletions(-) diff --git a/moses/SearchNormal.cpp b/moses/SearchNormal.cpp index 62bf420fc..07862df43 100644 --- a/moses/SearchNormal.cpp +++ b/moses/SearchNormal.cpp @@ -319,7 +319,7 @@ void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis, float allowedScoreForBitmap = m_hypoStackColl[wordsTranslated]->GetWorstScoreForBitmap( id ); allowedScore = std::min( allowedScore, allowedScoreForBitmap ); } - allowedScore += staticData.GetEarlyDiscardingThreshold(); + allowedScore += m_options.search.early_discarding_threshold; // add expected score of translation option expectedScore += transOpt.GetFutureScore(); diff --git a/moses/Sentence.cpp b/moses/Sentence.cpp index 842feed2b..bc3282dd0 100644 --- a/moses/Sentence.cpp +++ b/moses/Sentence.cpp @@ -145,7 +145,7 @@ aux_interpret_dlt(string& line) // whatever DLT means ... --- UG void Sentence:: -aux_interpret_xml(std::string& line, std::vector & xmlWalls, +aux_interpret_xml(AllOptions const& opts, std::string& line, std::vector & xmlWalls, std::vector >& placeholders) { // parse XML markup in translation line @@ -153,9 +153,9 @@ aux_interpret_xml(std::string& line, std::vector & xmlWalls, const StaticData &SD = StaticData::Instance(); using namespace std; - if (SD.GetXmlInputType() != XmlPassThrough) { + if (opts.input.xml_policy != XmlPassThrough) { int offset = SD.IsSyntax() ? 1 : 0; - bool OK = ProcessAndStripXMLTags(line, m_xmlOptions, + bool OK = ProcessAndStripXMLTags(opts, line, m_xmlOptions, m_reorderingConstraint, xmlWalls, placeholders, offset, SD.GetXmlBrackets().first, @@ -191,7 +191,7 @@ init(string line, std::vector const& factorOrder, vector xmlWalls; vector >placeholders; - aux_interpret_xml(line, xmlWalls, placeholders); + aux_interpret_xml(opts, line, xmlWalls, placeholders); Phrase::CreateFromString(Input, factorOrder, line, NULL); @@ -204,7 +204,7 @@ init(string line, std::vector const& factorOrder, // our XmlOptions and create TranslationOptions // only fill the vector if we are parsing XML - if (SD.GetXmlInputType() != XmlPassThrough) { + if (opts.input.xml_policy != XmlPassThrough) { m_xmlCoverageMap.assign(GetSize(), false); BOOST_FOREACH(XmlOption* o, m_xmlOptions) { Range const& r = o->range; @@ -320,7 +320,9 @@ void Sentence::GetXmlTranslationOptions(std::vector &list, } } -std::vector Sentence::GetXmlChartTranslationOptions() const +std::vector +Sentence:: +GetXmlChartTranslationOptions(AllOptions const& opts) const { const StaticData &staticData = StaticData::Instance(); std::vector ret; @@ -329,7 +331,7 @@ std::vector Sentence::GetXmlChartTranslationOptions() // this code is a copy of the 1 in Sentence. //only fill the vector if we are parsing XML - if (staticData.GetXmlInputType() != XmlPassThrough ) { + if (opts.input.xml_policy != XmlPassThrough ) { //TODO: needed to handle exclusive //for (size_t i=0; i &list) const; void GetXmlTranslationOptions(std::vector &list, size_t startPos, size_t endPos) const; - std::vector GetXmlChartTranslationOptions() const; + std::vector GetXmlChartTranslationOptions(AllOptions const& opts) const; virtual int Read(std::istream& in, const std::vector& factorOrder, @@ -140,7 +140,7 @@ private: void aux_interpret_xml - (std::string& line, std::vector & xmlWalls, + (AllOptions const& opts, std::string& line, std::vector & xmlWalls, std::vector >& placeholders); void diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp index 3d5ffe9d7..c7a799570 100644 --- a/moses/StaticData.cpp +++ b/moses/StaticData.cpp @@ -63,7 +63,6 @@ StaticData StaticData::s_instance; StaticData::StaticData() : m_sourceStartPosMattersForRecombination(false) , m_requireSortingAfterSourceContext(false) - // , m_inputType(SentenceInput) , m_lmEnableOOVFeature(false) , m_isAlwaysCreateDirectTranslationOption(false) , m_currentWeightSetting("default") @@ -131,14 +130,11 @@ StaticData { const PARAM_VEC *params; - // input type has to be specified BEFORE loading the phrase tables! - // m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput); - m_parameter->SetParameter(m_continuePartialTranslation, "continue-partial-translation", false ); // use of xml in input - m_parameter->SetParameter(m_xmlInputType, "xml-input", XmlPassThrough); + // m_parameter->SetParameter(m_xmlInputType, "xml-input", XmlPassThrough); // specify XML tags opening and closing brackets for XML option params = m_parameter->GetParam("xml-brackets"); @@ -258,24 +254,24 @@ ini_factor_maps() m_factorDelimiter = ""; } - //input factors - params = m_parameter->GetParam("input-factors"); - if (params) { - m_inputFactorOrder = Scan(*params); - } - if(m_inputFactorOrder.empty()) { - m_inputFactorOrder.push_back(0); - } + // //input factors + // params = m_parameter->GetParam("input-factors"); + // if (params) { + // m_inputFactorOrder = Scan(*params); + // } + // if(m_inputFactorOrder.empty()) { + // m_inputFactorOrder.push_back(0); + // } //output factors - params = m_parameter->GetParam("output-factors"); - if (params) { - m_outputFactorOrder = Scan(*params); - } - if(m_outputFactorOrder.empty()) { - // default. output factor 0 - m_outputFactorOrder.push_back(0); - } + // params = m_parameter->GetParam("output-factors"); + // if (params) { + // m_outputFactorOrder = Scan(*params); + // } + // if(m_outputFactorOrder.empty()) { + // // default. output factor 0 + // m_outputFactorOrder.push_back(0); + // } } void diff --git a/moses/StaticData.h b/moses/StaticData.h index e39466079..0ee8118e4 100644 --- a/moses/StaticData.h +++ b/moses/StaticData.h @@ -74,7 +74,6 @@ protected: Parameter *m_parameter; AllOptions m_options; - std::vector m_inputFactorOrder, m_outputFactorOrder; mutable ScoreComponentCollection m_allWeights; std::vector m_decodeGraphs; @@ -107,49 +106,21 @@ protected: bool m_printTranslationOptions; bool m_sourceStartPosMattersForRecombination; - // bool m_recoverPath; - // bool m_outputHypoScore; bool m_requireSortingAfterSourceContext; - // SearchAlgorithm m_searchAlgorithm; - // InputTypeEnum m_inputType; - mutable size_t m_verboseLevel; - // bool m_reportSegmentation; - // bool m_reportSegmentationEnriched; - // bool m_reportAllFactors; - // std::string m_detailedTranslationReportingFilePath; - // std::string m_detailedTreeFragmentsTranslationReportingFilePath; - // std::string m_detailedAllTranslationReportingFilePath; - // bool m_PrintAlignmentInfo; - // bool m_PrintID; - // bool m_PrintPassthroughInformation; - // std::string m_alignmentOutputFile; - std::string m_factorDelimiter; //! by default, |, but it can be changed - XmlInputType m_xmlInputType; //! method for handling sentence XML input + // XmlInputType m_xmlInputType; //! method for handling sentence XML input std::pair m_xmlBrackets; //! strings to use as XML tags' opening and closing brackets. Default are "<" and ">" size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1) bool m_lmEnableOOVFeature; - // bool m_timeout; //! use timeout - // size_t m_timeout_threshold; //! seconds after which time out is activated - bool m_isAlwaysCreateDirectTranslationOption; //! constructor. only the 1 static variable can be created - // bool m_outputWordGraph; //! whether to output word graph - // bool m_outputSearchGraph; //! whether to output search graph - // bool m_outputSearchGraphExtended; //! ... in extended format - // bool m_outputSearchGraphSLF; //! whether to output search graph in HTK standard lattice format (SLF) - // bool m_outputSearchGraphHypergraph; //! whether to output search graph in hypergraph -#ifdef HAVE_PROTOBUF - // bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf -#endif - // bool m_unprunedSearchGraph; //! do not exclude dead ends (chart decoder only) bool m_includeLHSInSearchGraph; //! include LHS of rules in search graph std::string m_outputUnknownsFile; //! output unknowns in this file @@ -164,7 +135,6 @@ protected: Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal; SourceLabelOverlap m_sourceLabelOverlap; UnknownLHSList m_unknownLHS; - // WordAlignmentSort m_wordAlignmentSort; int m_threadCount; long m_startTranslationId; @@ -190,8 +160,10 @@ protected: //! load decoding steps void LoadDecodeGraphs(); - void LoadDecodeGraphsOld(const std::vector &mappingVector, const std::vector &maxChartSpans); - void LoadDecodeGraphsNew(const std::vector &mappingVector, const std::vector &maxChartSpans); + void LoadDecodeGraphsOld(const std::vector &mappingVector, + const std::vector &maxChartSpans); + void LoadDecodeGraphsNew(const std::vector &mappingVector, + const std::vector &maxChartSpans); void NoCache(); @@ -271,12 +243,13 @@ public: } const std::vector &GetInputFactorOrder() const { - return m_inputFactorOrder; + return m_options.input.factor_order; } + const std::vector &GetOutputFactorOrder() const { - return m_outputFactorOrder; + return m_options.output.factor_order; } - + inline bool GetSourceStartPosMattersForRecombination() const { return m_sourceStartPosMattersForRecombination; } @@ -314,12 +287,6 @@ public: bool UseReorderingConstraint() const { return m_reorderingConstraint; } - float GetBeamWidth() const { - return m_options.search.beam_width; - } - float GetEarlyDiscardingThreshold() const { - return m_options.search.early_discarding_threshold; - } bool UseEarlyDiscarding() const { return m_options.search.early_discarding_threshold @@ -391,68 +358,9 @@ public: //Weights for feature with fixed number of values void SetWeights(const FeatureFunction* sp, const std::vector& weights); - // bool GetDistinctNBest() const { - // return m_options.nbest.only_distinct; - // } const std::string& GetFactorDelimiter() const { return m_factorDelimiter; } - // bool UseMBR() const { - // return m_mbr; - // } - // bool UseLatticeMBR() const { - // return m_useLatticeMBR ; - // } - // bool UseConsensusDecoding() const { - // return m_useConsensusDecoding; - // } - // void SetUseLatticeMBR(bool flag) { - // m_useLatticeMBR = flag; - // } - // size_t GetMBRSize() const { - // return m_mbrSize; - // } - // float GetMBRScale() const { - // return m_mbrScale; - // } - // void SetMBRScale(float scale) { - // m_mbrScale = scale; - // } - // size_t GetLatticeMBRPruningFactor() const { - // return m_lmbrPruning; - // } - // void SetLatticeMBRPruningFactor(size_t prune) { - // m_lmbrPruning = prune; - // } - // const std::vector& GetLatticeMBRThetas() const { - // return m_lmbrThetas; - // } - // bool UseLatticeHypSetForLatticeMBR() const { - // return m_useLatticeHypSetForLatticeMBR; - // } - // float GetLatticeMBRPrecision() const { - // return m_lmbrPrecision; - // } - // void SetLatticeMBRPrecision(float p) { - // m_lmbrPrecision = p; - // } - // float GetLatticeMBRPRatio() const { - // return m_lmbrPRatio; - // } - // void SetLatticeMBRPRatio(float r) { - // m_lmbrPRatio = r; - // } - - // float GetLatticeMBRMapWeight() const { - // return m_lmbrMapWeight; - // } - - // bool UseTimeout() const { - // return m_timeout; - // } - // size_t GetTimeoutThreshold() const { - // return m_timeout_threshold; - // } size_t GetLMCacheCleanupThreshold() const { return m_lmcache_cleanup_threshold; @@ -462,44 +370,17 @@ public: return m_lmEnableOOVFeature; } - // bool GetOutputSearchGraph() const { - // return m_outputSearchGraph; - // } - - // void SetOutputSearchGraph(bool outputSearchGraph) { - // m_outputSearchGraph = outputSearchGraph; - // } - -// bool GetOutputSearchGraphExtended() const { -// return m_outputSearchGraphExtended; -// } -// GetOutputSearchGraphSLF() const { -// return m_outputSearchGraphSLF; -// } -// bool GetOutputSearchGraphHypergraph() const { -// return m_outputSearchGraphHypergraph; -// } - -// #ifdef HAVE_PROTOBUF -// bool GetOutputSearchGraphPB() const { -// return m_outputSearchGraphPB; -// } -// #endif const std::string& GetOutputUnknownsFile() const { return m_outputUnknownsFile; } - // bool GetUnprunedSearchGraph() const { - // return m_unprunedSearchGraph; - // } - bool GetIncludeLHSInSearchGraph() const { return m_includeLHSInSearchGraph; } - XmlInputType GetXmlInputType() const { - return m_xmlInputType; - } + // XmlInputType GetXmlInputType() const { + // return m_xmlInputType; + // } std::pair GetXmlBrackets() const { return m_xmlBrackets; @@ -528,9 +409,6 @@ public: return m_sourceLabelOverlap; } - // bool GetOutputHypoScore() const { - // return m_outputHypoScore; - // } size_t GetRuleLimit() const { return m_ruleLimit; } @@ -561,18 +439,7 @@ public: bool NeedAlignmentInfo() const { return m_bookkeeping_options.need_alignment_info; - // return m_needAlignmentInfo; } - // const std::string &GetAlignmentOutputFile() const { - // return m_alignmentOutputFile; - // } - // bool PrintAlignmentInfo() const { - // return m_PrintAlignmentInfo; - // } - - // WordAlignmentSort GetWordAlignmentSort() const { - // return m_wordAlignmentSort; - // } bool GetHasAlternateWeightSettings() const { return m_weightSetting.size() > 0; diff --git a/moses/TranslationOptionCollection.cpp b/moses/TranslationOptionCollection.cpp index 85ae4286a..7b1d02bce 100644 --- a/moses/TranslationOptionCollection.cpp +++ b/moses/TranslationOptionCollection.cpp @@ -38,6 +38,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "moses/FF/UnknownWordPenaltyProducer.h" #include "moses/FF/LexicalReordering/LexicalReordering.h" #include "moses/FF/InputFeature.h" +#include "TranslationTask.h" #include "util/exception.hh" #include @@ -46,12 +47,6 @@ using namespace std; namespace Moses { -/** helper for pruning */ -// bool CompareTranslationOption(const TranslationOption *a, const TranslationOption *b) -// { -// return a->GetFutureScore() > b->GetFutureScore(); -// } - /** constructor; since translation options are indexed by coverage span, the * corresponding data structure is initialized here This fn should be * called by inherited classe */ @@ -393,7 +388,8 @@ CreateTranslationOptionsForRange { typedef DecodeStepTranslation Tstep; typedef DecodeStepGeneration Gstep; - if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) + XmlInputType xml_policy = m_ttask.lock()->options().input.xml_policy; + if ((xml_policy != XmlExclusive) || !HasXmlOptionsOverlappingRange(sPos,ePos)) { // partial trans opt stored in here @@ -452,8 +448,8 @@ CreateTranslationOptionsForRange vector::const_iterator c; for (c = partTransOptList.begin() ; c != partTransOptList.end() ; ++c) { TranslationOption *transOpt = *c; - if (StaticData::Instance().GetXmlInputType() != XmlConstraint - || !ViolatesXmlOptionsConstraint(sPos,ePos,transOpt)) { + if (xml_policy != XmlConstraint || + !ViolatesXmlOptionsConstraint(sPos,ePos,transOpt)) { Add(transOpt); } } @@ -461,9 +457,9 @@ CreateTranslationOptionsForRange totalEarlyPruned += oldPtoc->GetPrunedCount(); delete oldPtoc; // TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl); - } // if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(sPos,ePos)) + } // if ((xml_policy != XmlExclusive) || !HasXmlOptionsOverlappingRange(sPos,ePos)) - if (gidx == 0 && StaticData::Instance().GetXmlInputType() != XmlPassThrough + if (gidx == 0 && xml_policy != XmlPassThrough && HasXmlOptionsOverlappingRange(sPos,ePos)) { CreateXmlOptionsForRange(sPos, ePos); } diff --git a/moses/TranslationOptionCollectionConfusionNet.cpp b/moses/TranslationOptionCollectionConfusionNet.cpp index 89c0a9363..dc8c66b9d 100644 --- a/moses/TranslationOptionCollectionConfusionNet.cpp +++ b/moses/TranslationOptionCollectionConfusionNet.cpp @@ -222,7 +222,7 @@ CreateTranslationOptionsForRangeLEGACY(const DecodeGraph &decodeGraph, size_t st size_t endPos, bool adhereTableLimit, size_t graphInd) { bool retval = true; - XmlInputType intype = StaticData::Instance().GetXmlInputType(); + XmlInputType intype = m_ttask.lock()->options().input.xml_policy; if ((intype != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) { InputPathList &inputPathList = GetInputPathList(startPos, endPos); diff --git a/moses/TreeInput.cpp b/moses/TreeInput.cpp index 37cf76bf2..cbbdb0d45 100644 --- a/moses/TreeInput.cpp +++ b/moses/TreeInput.cpp @@ -22,7 +22,11 @@ namespace Moses * \param reorderingConstraint reordering constraint zones specified by xml * \param walls reordering constraint walls specified by xml */ -bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector &sourceLabels, std::vector &xmlOptions) +bool +TreeInput:: +ProcessAndStripXMLTags(AllOptions const& opts, string &line, + std::vector &sourceLabels, + std::vector &xmlOptions) { //parse XML markup in translation line @@ -172,7 +176,7 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector } // specified translations -> vector of phrases, separated by "||" - if (translation.length() > 0 && StaticData::Instance().GetXmlInputType() != XmlIgnore) { + if (translation.length() > 0 && opts.input.xml_policy != XmlIgnore) { vector altTexts = TokenizeMultiCharSeparator(translation, "||"); vector altLabel = TokenizeMultiCharSeparator(label, "||"); vector altProbs = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"prob"), "||"); @@ -251,7 +255,7 @@ Read(std::istream& in, const std::vector& factorOrder, //line = Trim(line); m_labelledSpans.clear(); - ProcessAndStripXMLTags(line, m_labelledSpans, m_xmlOptions); + ProcessAndStripXMLTags(opts, line, m_labelledSpans, m_xmlOptions); // do words 1st - hack stringstream strme; diff --git a/moses/TreeInput.h b/moses/TreeInput.h index a8c53493e..3bc5f171c 100644 --- a/moses/TreeInput.h +++ b/moses/TreeInput.h @@ -43,8 +43,10 @@ protected: return m_sourceChart[startPos][endPos - startPos]; } - bool ProcessAndStripXMLTags(std::string &line, std::vector &sourceLabels, std::vector &res); - + bool ProcessAndStripXMLTags(AllOptions const& opts, std::string &line, + std::vector &sourceLabels, + std::vector &res); + public: TreeInput() : Sentence() { } diff --git a/moses/TrellisPath.cpp b/moses/TrellisPath.cpp index 7b7baa815..924153d91 100644 --- a/moses/TrellisPath.cpp +++ b/moses/TrellisPath.cpp @@ -23,13 +23,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "TrellisPathList.h" #include "TrellisPathCollection.h" #include "StaticData.h" - +#include "Manager.h" using namespace std; namespace Moses { TrellisPath::TrellisPath(const Hypothesis *hypo) - : m_prevEdgeChanged(NOT_FOUND) + : m_prevEdgeChanged(NOT_FOUND) { m_totalScore = hypo->GetTotalScore(); @@ -208,9 +208,11 @@ Phrase TrellisPath::GetTargetPhrase() const Phrase TrellisPath::GetSurfacePhrase() const { - const std::vector &outputFactor = StaticData::Instance().GetOutputFactorOrder(); - Phrase targetPhrase = GetTargetPhrase() - ,ret(targetPhrase.GetSize()); + const std::vector &outputFactor + = manager().options().output.factor_order; + // = StaticData::Instance().GetOutputFactorOrder(); + Phrase targetPhrase = GetTargetPhrase(); + Phrase ret(targetPhrase.GetSize()); for (size_t pos = 0 ; pos < targetPhrase.GetSize() ; ++pos) { Word &newWord = ret.AddWord(); diff --git a/moses/TrellisPath.h b/moses/TrellisPath.h index 1e6914a5c..2ec50bc72 100644 --- a/moses/TrellisPath.h +++ b/moses/TrellisPath.h @@ -1,5 +1,4 @@ -// $Id$ - +// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh @@ -34,10 +33,12 @@ namespace Moses class TrellisPathCollection; class TrellisPathList; -/** Encapsulate the set of hypotheses/arcs that goes from decoding 1 phrase to all the source phrases - * to reach a final translation. For the best translation, this consist of all hypotheses, for the other - * n-best paths, the node on the path can consist of hypotheses or arcs. - * Used by phrase-based decoding +/** Encapsulate the set of hypotheses/arcs that goes from decoding 1 + * phrase to all the source phrases to reach a final + * translation. For the best translation, this consist of all + * hypotheses, for the other n-best paths, the node on the path + * can consist of hypotheses or arcs. Used by phrase-based + * decoding */ class TrellisPath { @@ -46,9 +47,10 @@ class TrellisPath protected: std::vector m_path; //< list of hypotheses/arcs - size_t m_prevEdgeChanged; /**< the last node that was wiggled to create this path - , or NOT_FOUND if this path is the best trans so consist of only hypos - */ + size_t m_prevEdgeChanged; + /**< the last node that was wiggled to create this path + , or NOT_FOUND if this path is the best trans so consist of only hypos + */ float m_totalScore; mutable boost::shared_ptr m_scoreBreakdown; @@ -58,6 +60,11 @@ protected: void InitTotalScore(); + Manager const& manager() const { + UTIL_THROW_IF2(m_path.size() == 0, "zero-length trellis path"); + return m_path[0]->GetManager(); + } + public: TrellisPath(); // not implemented diff --git a/moses/XmlOption.cpp b/moses/XmlOption.cpp index 941eb2d24..f79ec32d6 100644 --- a/moses/XmlOption.cpp +++ b/moses/XmlOption.cpp @@ -1,6 +1,5 @@ -// $Id$ +// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- // vim:tabstop=2 - /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh @@ -159,10 +158,13 @@ vector TokenizeXml(const string& str, const std::string& lbrackStr, cons * \param lbrackStr xml tag's left bracket string, typically "<" * \param rbrackStr xml tag's right bracket string, typically ">" */ -bool ProcessAndStripXMLTags(string &line, vector &res, ReorderingConstraint &reorderingConstraint, vector< size_t > &walls, - std::vector< std::pair > &placeholders, - int offset, - const std::string& lbrackStr, const std::string& rbrackStr) +bool +ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector &res, + ReorderingConstraint &reorderingConstraint, + vector< size_t > &walls, + std::vector< std::pair > &placeholders, + int offset, const std::string& lbrackStr, + const std::string& rbrackStr) { //parse XML markup in translation line @@ -440,7 +442,7 @@ bool ProcessAndStripXMLTags(string &line, vector &res, ReorderingCon } // store translation options into members - if (staticData.GetXmlInputType() != XmlIgnore) { + if (opts.input.xml_policy != XmlIgnore) { // only store options if we aren't ignoring them for (size_t i=0; i #include "Range.h" #include "TargetPhrase.h" - +#include "parameters/AllOptions.h" namespace Moses { @@ -29,8 +29,10 @@ std::string ParseXmlTagAttribute(const std::string& tag,const std::string& attri std::string TrimXml(const std::string& str, const std::string& lbrackStr="<", const std::string& rbrackStr=">") ; bool isXmlTag(const std::string& tag, const std::string& lbrackStr="<", const std::string& rbrackStr=">"); std::vector TokenizeXml(const std::string& str, const std::string& lbrackStr="<", const std::string& rbrackStr=">"); - -bool ProcessAndStripXMLTags(std::string &line, std::vector &res, ReorderingConstraint &reorderingConstraint, std::vector< size_t > &walls, + + bool ProcessAndStripXMLTags(AllOptions const& opts, + std::string &line, std::vector &res, + ReorderingConstraint &reorderingConstraint, std::vector< size_t > &walls, std::vector< std::pair > &placeholders, int offset, const std::string& lbrackStr="<", const std::string& rbrackStr=">"); diff --git a/moses/parameters/InputOptions.cpp b/moses/parameters/InputOptions.cpp index 206be4660..eb1dc0a6f 100644 --- a/moses/parameters/InputOptions.cpp +++ b/moses/parameters/InputOptions.cpp @@ -45,21 +45,29 @@ namespace Moses { pspec = param.GetParam("xml-brackets"); if (pspec && pspec->size()) { - std::vector brackets = Tokenize(pspec->at(0)); - if(brackets.size()!=2) - { - std::cerr << "invalid xml-brackets value, " - << "must specify exactly 2 blank-delimited strings " - << "for XML tags opening and closing brackets" << std::endl; - exit(1); - } - xml_brackets.first= brackets[0]; - xml_brackets.second=brackets[1]; - VERBOSE(1,"XML tags opening and closing brackets for XML input are: " - << xml_brackets.first << " and " - << xml_brackets.second << std::endl); + std::vector brackets = Tokenize(pspec->at(0)); + if(brackets.size()!=2) + { + std::cerr << "invalid xml-brackets value, " + << "must specify exactly 2 blank-delimited strings " + << "for XML tags opening and closing brackets" + << std::endl; + exit(1); + } + + xml_brackets.first= brackets[0]; + xml_brackets.second=brackets[1]; + + VERBOSE(1,"XML tags opening and closing brackets for XML input are: " + << xml_brackets.first << " and " + << xml_brackets.second << std::endl); } + + pspec = param.GetParam("input-factors"); + if (pspec) factor_order = Scan(*pspec); + if (factor_order.empty()) factor_order.assign(1,0); + return true; } - + } diff --git a/moses/parameters/InputOptions.h b/moses/parameters/InputOptions.h index f81ea06eb..b96b5fac7 100644 --- a/moses/parameters/InputOptions.h +++ b/moses/parameters/InputOptions.h @@ -14,7 +14,8 @@ namespace Moses bool default_non_term_only_for_empty_range; // whatever that means InputTypeEnum input_type; XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive - + std::vector factor_order; // input factor order + std::pair xml_brackets; // strings to use as XML tags' opening and closing brackets. // Default are "<" and ">" diff --git a/moses/parameters/ReportingOptions.cpp b/moses/parameters/ReportingOptions.cpp index fa06eb06f..17680cfa6 100644 --- a/moses/parameters/ReportingOptions.cpp +++ b/moses/parameters/ReportingOptions.cpp @@ -62,6 +62,11 @@ namespace Moses { } else { lattice_sample_size = 0; } + + params= param.GetParam("output-factors"); + if (params) factor_order = Scan(*params); + if (factor_order.empty()) factor_order.assign(1,0); + return true; } diff --git a/moses/parameters/ReportingOptions.h b/moses/parameters/ReportingOptions.h index 0c4c2ac58..b2d5cf89e 100644 --- a/moses/parameters/ReportingOptions.h +++ b/moses/parameters/ReportingOptions.h @@ -10,6 +10,8 @@ namespace Moses struct ReportingOptions : public OptionsBaseClass { + std::vector factor_order; + bool ReportAllFactors; // m_reportAllFactors; int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched