mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
More options cleanup.
This commit is contained in:
parent
9dc31e197b
commit
d773ad428e
@ -319,7 +319,7 @@ void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis,
|
||||
float allowedScoreForBitmap = m_hypoStackColl[wordsTranslated]->GetWorstScoreForBitmap( id );
|
||||
allowedScore = std::min( allowedScore, allowedScoreForBitmap );
|
||||
}
|
||||
allowedScore += staticData.GetEarlyDiscardingThreshold();
|
||||
allowedScore += m_options.search.early_discarding_threshold;
|
||||
|
||||
// add expected score of translation option
|
||||
expectedScore += transOpt.GetFutureScore();
|
||||
|
@ -145,7 +145,7 @@ aux_interpret_dlt(string& line) // whatever DLT means ... --- UG
|
||||
|
||||
void
|
||||
Sentence::
|
||||
aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls,
|
||||
aux_interpret_xml(AllOptions const& opts, std::string& line, std::vector<size_t> & xmlWalls,
|
||||
std::vector<std::pair<size_t, std::string> >& placeholders)
|
||||
{
|
||||
// parse XML markup in translation line
|
||||
@ -153,9 +153,9 @@ aux_interpret_xml(std::string& line, std::vector<size_t> & xmlWalls,
|
||||
const StaticData &SD = StaticData::Instance();
|
||||
|
||||
using namespace std;
|
||||
if (SD.GetXmlInputType() != XmlPassThrough) {
|
||||
if (opts.input.xml_policy != XmlPassThrough) {
|
||||
int offset = SD.IsSyntax() ? 1 : 0;
|
||||
bool OK = ProcessAndStripXMLTags(line, m_xmlOptions,
|
||||
bool OK = ProcessAndStripXMLTags(opts, line, m_xmlOptions,
|
||||
m_reorderingConstraint,
|
||||
xmlWalls, placeholders, offset,
|
||||
SD.GetXmlBrackets().first,
|
||||
@ -191,7 +191,7 @@ init(string line, std::vector<FactorType> const& factorOrder,
|
||||
|
||||
vector<size_t> xmlWalls;
|
||||
vector<pair<size_t, string> >placeholders;
|
||||
aux_interpret_xml(line, xmlWalls, placeholders);
|
||||
aux_interpret_xml(opts, line, xmlWalls, placeholders);
|
||||
|
||||
Phrase::CreateFromString(Input, factorOrder, line, NULL);
|
||||
|
||||
@ -204,7 +204,7 @@ init(string line, std::vector<FactorType> const& factorOrder,
|
||||
// our XmlOptions and create TranslationOptions
|
||||
|
||||
// only fill the vector if we are parsing XML
|
||||
if (SD.GetXmlInputType() != XmlPassThrough) {
|
||||
if (opts.input.xml_policy != XmlPassThrough) {
|
||||
m_xmlCoverageMap.assign(GetSize(), false);
|
||||
BOOST_FOREACH(XmlOption* o, m_xmlOptions) {
|
||||
Range const& r = o->range;
|
||||
@ -320,7 +320,9 @@ void Sentence::GetXmlTranslationOptions(std::vector <TranslationOption*> &list,
|
||||
}
|
||||
}
|
||||
|
||||
std::vector <ChartTranslationOptions*> Sentence::GetXmlChartTranslationOptions() const
|
||||
std::vector <ChartTranslationOptions*>
|
||||
Sentence::
|
||||
GetXmlChartTranslationOptions(AllOptions const& opts) const
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
std::vector <ChartTranslationOptions*> ret;
|
||||
@ -329,7 +331,7 @@ std::vector <ChartTranslationOptions*> Sentence::GetXmlChartTranslationOptions()
|
||||
// this code is a copy of the 1 in Sentence.
|
||||
|
||||
//only fill the vector if we are parsing XML
|
||||
if (staticData.GetXmlInputType() != XmlPassThrough ) {
|
||||
if (opts.input.xml_policy != XmlPassThrough ) {
|
||||
//TODO: needed to handle exclusive
|
||||
//for (size_t i=0; i<GetSize(); i++) {
|
||||
// m_xmlCoverageMap.push_back(false);
|
||||
|
@ -95,7 +95,7 @@ public:
|
||||
//! populates vector argument with XML force translation options for the specific range passed
|
||||
void GetXmlTranslationOptions(std::vector<TranslationOption*> &list) const;
|
||||
void GetXmlTranslationOptions(std::vector<TranslationOption*> &list, size_t startPos, size_t endPos) const;
|
||||
std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions() const;
|
||||
std::vector<ChartTranslationOptions*> GetXmlChartTranslationOptions(AllOptions const& opts) const;
|
||||
|
||||
virtual int
|
||||
Read(std::istream& in, const std::vector<FactorType>& factorOrder,
|
||||
@ -140,7 +140,7 @@ private:
|
||||
|
||||
void
|
||||
aux_interpret_xml
|
||||
(std::string& line, std::vector<size_t> & xmlWalls,
|
||||
(AllOptions const& opts, std::string& line, std::vector<size_t> & xmlWalls,
|
||||
std::vector<std::pair<size_t, std::string> >& placeholders);
|
||||
|
||||
void
|
||||
|
@ -63,7 +63,6 @@ StaticData StaticData::s_instance;
|
||||
StaticData::StaticData()
|
||||
: m_sourceStartPosMattersForRecombination(false)
|
||||
, m_requireSortingAfterSourceContext(false)
|
||||
// , m_inputType(SentenceInput)
|
||||
, m_lmEnableOOVFeature(false)
|
||||
, m_isAlwaysCreateDirectTranslationOption(false)
|
||||
, m_currentWeightSetting("default")
|
||||
@ -131,14 +130,11 @@ StaticData
|
||||
{
|
||||
const PARAM_VEC *params;
|
||||
|
||||
// input type has to be specified BEFORE loading the phrase tables!
|
||||
// m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput);
|
||||
|
||||
m_parameter->SetParameter(m_continuePartialTranslation,
|
||||
"continue-partial-translation", false );
|
||||
|
||||
// use of xml in input
|
||||
m_parameter->SetParameter<XmlInputType>(m_xmlInputType, "xml-input", XmlPassThrough);
|
||||
// m_parameter->SetParameter<XmlInputType>(m_xmlInputType, "xml-input", XmlPassThrough);
|
||||
|
||||
// specify XML tags opening and closing brackets for XML option
|
||||
params = m_parameter->GetParam("xml-brackets");
|
||||
@ -258,24 +254,24 @@ ini_factor_maps()
|
||||
m_factorDelimiter = "";
|
||||
}
|
||||
|
||||
//input factors
|
||||
params = m_parameter->GetParam("input-factors");
|
||||
if (params) {
|
||||
m_inputFactorOrder = Scan<FactorType>(*params);
|
||||
}
|
||||
if(m_inputFactorOrder.empty()) {
|
||||
m_inputFactorOrder.push_back(0);
|
||||
}
|
||||
// //input factors
|
||||
// params = m_parameter->GetParam("input-factors");
|
||||
// if (params) {
|
||||
// m_inputFactorOrder = Scan<FactorType>(*params);
|
||||
// }
|
||||
// if(m_inputFactorOrder.empty()) {
|
||||
// m_inputFactorOrder.push_back(0);
|
||||
// }
|
||||
|
||||
//output factors
|
||||
params = m_parameter->GetParam("output-factors");
|
||||
if (params) {
|
||||
m_outputFactorOrder = Scan<FactorType>(*params);
|
||||
}
|
||||
if(m_outputFactorOrder.empty()) {
|
||||
// default. output factor 0
|
||||
m_outputFactorOrder.push_back(0);
|
||||
}
|
||||
// params = m_parameter->GetParam("output-factors");
|
||||
// if (params) {
|
||||
// m_outputFactorOrder = Scan<FactorType>(*params);
|
||||
// }
|
||||
// if(m_outputFactorOrder.empty()) {
|
||||
// // default. output factor 0
|
||||
// m_outputFactorOrder.push_back(0);
|
||||
// }
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -74,7 +74,6 @@ protected:
|
||||
Parameter *m_parameter;
|
||||
AllOptions m_options;
|
||||
|
||||
std::vector<FactorType> m_inputFactorOrder, m_outputFactorOrder;
|
||||
mutable ScoreComponentCollection m_allWeights;
|
||||
|
||||
std::vector<DecodeGraph*> m_decodeGraphs;
|
||||
@ -107,49 +106,21 @@ protected:
|
||||
bool m_printTranslationOptions;
|
||||
|
||||
bool m_sourceStartPosMattersForRecombination;
|
||||
// bool m_recoverPath;
|
||||
// bool m_outputHypoScore;
|
||||
bool m_requireSortingAfterSourceContext;
|
||||
|
||||
// SearchAlgorithm m_searchAlgorithm;
|
||||
// InputTypeEnum m_inputType;
|
||||
|
||||
mutable size_t m_verboseLevel;
|
||||
|
||||
// bool m_reportSegmentation;
|
||||
// bool m_reportSegmentationEnriched;
|
||||
// bool m_reportAllFactors;
|
||||
// std::string m_detailedTranslationReportingFilePath;
|
||||
// std::string m_detailedTreeFragmentsTranslationReportingFilePath;
|
||||
// std::string m_detailedAllTranslationReportingFilePath;
|
||||
// bool m_PrintAlignmentInfo;
|
||||
// bool m_PrintID;
|
||||
// bool m_PrintPassthroughInformation;
|
||||
// std::string m_alignmentOutputFile;
|
||||
|
||||
std::string m_factorDelimiter; //! by default, |, but it can be changed
|
||||
|
||||
XmlInputType m_xmlInputType; //! method for handling sentence XML input
|
||||
// XmlInputType m_xmlInputType; //! method for handling sentence XML input
|
||||
std::pair<std::string,std::string> m_xmlBrackets; //! strings to use as XML tags' opening and closing brackets. Default are "<" and ">"
|
||||
|
||||
size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1)
|
||||
bool m_lmEnableOOVFeature;
|
||||
|
||||
// bool m_timeout; //! use timeout
|
||||
// size_t m_timeout_threshold; //! seconds after which time out is activated
|
||||
|
||||
bool m_isAlwaysCreateDirectTranslationOption;
|
||||
//! constructor. only the 1 static variable can be created
|
||||
|
||||
// bool m_outputWordGraph; //! whether to output word graph
|
||||
// bool m_outputSearchGraph; //! whether to output search graph
|
||||
// bool m_outputSearchGraphExtended; //! ... in extended format
|
||||
// bool m_outputSearchGraphSLF; //! whether to output search graph in HTK standard lattice format (SLF)
|
||||
// bool m_outputSearchGraphHypergraph; //! whether to output search graph in hypergraph
|
||||
#ifdef HAVE_PROTOBUF
|
||||
// bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf
|
||||
#endif
|
||||
// bool m_unprunedSearchGraph; //! do not exclude dead ends (chart decoder only)
|
||||
bool m_includeLHSInSearchGraph; //! include LHS of rules in search graph
|
||||
std::string m_outputUnknownsFile; //! output unknowns in this file
|
||||
|
||||
@ -164,7 +135,6 @@ protected:
|
||||
Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal;
|
||||
SourceLabelOverlap m_sourceLabelOverlap;
|
||||
UnknownLHSList m_unknownLHS;
|
||||
// WordAlignmentSort m_wordAlignmentSort;
|
||||
|
||||
int m_threadCount;
|
||||
long m_startTranslationId;
|
||||
@ -190,8 +160,10 @@ protected:
|
||||
|
||||
//! load decoding steps
|
||||
void LoadDecodeGraphs();
|
||||
void LoadDecodeGraphsOld(const std::vector<std::string> &mappingVector, const std::vector<size_t> &maxChartSpans);
|
||||
void LoadDecodeGraphsNew(const std::vector<std::string> &mappingVector, const std::vector<size_t> &maxChartSpans);
|
||||
void LoadDecodeGraphsOld(const std::vector<std::string> &mappingVector,
|
||||
const std::vector<size_t> &maxChartSpans);
|
||||
void LoadDecodeGraphsNew(const std::vector<std::string> &mappingVector,
|
||||
const std::vector<size_t> &maxChartSpans);
|
||||
|
||||
void NoCache();
|
||||
|
||||
@ -271,12 +243,13 @@ public:
|
||||
}
|
||||
|
||||
const std::vector<FactorType> &GetInputFactorOrder() const {
|
||||
return m_inputFactorOrder;
|
||||
return m_options.input.factor_order;
|
||||
}
|
||||
|
||||
const std::vector<FactorType> &GetOutputFactorOrder() const {
|
||||
return m_outputFactorOrder;
|
||||
return m_options.output.factor_order;
|
||||
}
|
||||
|
||||
|
||||
inline bool GetSourceStartPosMattersForRecombination() const {
|
||||
return m_sourceStartPosMattersForRecombination;
|
||||
}
|
||||
@ -314,12 +287,6 @@ public:
|
||||
bool UseReorderingConstraint() const {
|
||||
return m_reorderingConstraint;
|
||||
}
|
||||
float GetBeamWidth() const {
|
||||
return m_options.search.beam_width;
|
||||
}
|
||||
float GetEarlyDiscardingThreshold() const {
|
||||
return m_options.search.early_discarding_threshold;
|
||||
}
|
||||
|
||||
bool UseEarlyDiscarding() const {
|
||||
return m_options.search.early_discarding_threshold
|
||||
@ -391,68 +358,9 @@ public:
|
||||
//Weights for feature with fixed number of values
|
||||
void SetWeights(const FeatureFunction* sp, const std::vector<float>& weights);
|
||||
|
||||
// bool GetDistinctNBest() const {
|
||||
// return m_options.nbest.only_distinct;
|
||||
// }
|
||||
const std::string& GetFactorDelimiter() const {
|
||||
return m_factorDelimiter;
|
||||
}
|
||||
// bool UseMBR() const {
|
||||
// return m_mbr;
|
||||
// }
|
||||
// bool UseLatticeMBR() const {
|
||||
// return m_useLatticeMBR ;
|
||||
// }
|
||||
// bool UseConsensusDecoding() const {
|
||||
// return m_useConsensusDecoding;
|
||||
// }
|
||||
// void SetUseLatticeMBR(bool flag) {
|
||||
// m_useLatticeMBR = flag;
|
||||
// }
|
||||
// size_t GetMBRSize() const {
|
||||
// return m_mbrSize;
|
||||
// }
|
||||
// float GetMBRScale() const {
|
||||
// return m_mbrScale;
|
||||
// }
|
||||
// void SetMBRScale(float scale) {
|
||||
// m_mbrScale = scale;
|
||||
// }
|
||||
// size_t GetLatticeMBRPruningFactor() const {
|
||||
// return m_lmbrPruning;
|
||||
// }
|
||||
// void SetLatticeMBRPruningFactor(size_t prune) {
|
||||
// m_lmbrPruning = prune;
|
||||
// }
|
||||
// const std::vector<float>& GetLatticeMBRThetas() const {
|
||||
// return m_lmbrThetas;
|
||||
// }
|
||||
// bool UseLatticeHypSetForLatticeMBR() const {
|
||||
// return m_useLatticeHypSetForLatticeMBR;
|
||||
// }
|
||||
// float GetLatticeMBRPrecision() const {
|
||||
// return m_lmbrPrecision;
|
||||
// }
|
||||
// void SetLatticeMBRPrecision(float p) {
|
||||
// m_lmbrPrecision = p;
|
||||
// }
|
||||
// float GetLatticeMBRPRatio() const {
|
||||
// return m_lmbrPRatio;
|
||||
// }
|
||||
// void SetLatticeMBRPRatio(float r) {
|
||||
// m_lmbrPRatio = r;
|
||||
// }
|
||||
|
||||
// float GetLatticeMBRMapWeight() const {
|
||||
// return m_lmbrMapWeight;
|
||||
// }
|
||||
|
||||
// bool UseTimeout() const {
|
||||
// return m_timeout;
|
||||
// }
|
||||
// size_t GetTimeoutThreshold() const {
|
||||
// return m_timeout_threshold;
|
||||
// }
|
||||
|
||||
size_t GetLMCacheCleanupThreshold() const {
|
||||
return m_lmcache_cleanup_threshold;
|
||||
@ -462,44 +370,17 @@ public:
|
||||
return m_lmEnableOOVFeature;
|
||||
}
|
||||
|
||||
// bool GetOutputSearchGraph() const {
|
||||
// return m_outputSearchGraph;
|
||||
// }
|
||||
|
||||
// void SetOutputSearchGraph(bool outputSearchGraph) {
|
||||
// m_outputSearchGraph = outputSearchGraph;
|
||||
// }
|
||||
|
||||
// bool GetOutputSearchGraphExtended() const {
|
||||
// return m_outputSearchGraphExtended;
|
||||
// }
|
||||
// GetOutputSearchGraphSLF() const {
|
||||
// return m_outputSearchGraphSLF;
|
||||
// }
|
||||
// bool GetOutputSearchGraphHypergraph() const {
|
||||
// return m_outputSearchGraphHypergraph;
|
||||
// }
|
||||
|
||||
// #ifdef HAVE_PROTOBUF
|
||||
// bool GetOutputSearchGraphPB() const {
|
||||
// return m_outputSearchGraphPB;
|
||||
// }
|
||||
// #endif
|
||||
const std::string& GetOutputUnknownsFile() const {
|
||||
return m_outputUnknownsFile;
|
||||
}
|
||||
|
||||
// bool GetUnprunedSearchGraph() const {
|
||||
// return m_unprunedSearchGraph;
|
||||
// }
|
||||
|
||||
bool GetIncludeLHSInSearchGraph() const {
|
||||
return m_includeLHSInSearchGraph;
|
||||
}
|
||||
|
||||
XmlInputType GetXmlInputType() const {
|
||||
return m_xmlInputType;
|
||||
}
|
||||
// XmlInputType GetXmlInputType() const {
|
||||
// return m_xmlInputType;
|
||||
// }
|
||||
|
||||
std::pair<std::string,std::string> GetXmlBrackets() const {
|
||||
return m_xmlBrackets;
|
||||
@ -528,9 +409,6 @@ public:
|
||||
return m_sourceLabelOverlap;
|
||||
}
|
||||
|
||||
// bool GetOutputHypoScore() const {
|
||||
// return m_outputHypoScore;
|
||||
// }
|
||||
size_t GetRuleLimit() const {
|
||||
return m_ruleLimit;
|
||||
}
|
||||
@ -561,18 +439,7 @@ public:
|
||||
|
||||
bool NeedAlignmentInfo() const {
|
||||
return m_bookkeeping_options.need_alignment_info;
|
||||
// return m_needAlignmentInfo;
|
||||
}
|
||||
// const std::string &GetAlignmentOutputFile() const {
|
||||
// return m_alignmentOutputFile;
|
||||
// }
|
||||
// bool PrintAlignmentInfo() const {
|
||||
// return m_PrintAlignmentInfo;
|
||||
// }
|
||||
|
||||
// WordAlignmentSort GetWordAlignmentSort() const {
|
||||
// return m_wordAlignmentSort;
|
||||
// }
|
||||
|
||||
bool GetHasAlternateWeightSettings() const {
|
||||
return m_weightSetting.size() > 0;
|
||||
|
@ -38,6 +38,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "moses/FF/UnknownWordPenaltyProducer.h"
|
||||
#include "moses/FF/LexicalReordering/LexicalReordering.h"
|
||||
#include "moses/FF/InputFeature.h"
|
||||
#include "TranslationTask.h"
|
||||
#include "util/exception.hh"
|
||||
|
||||
#include <boost/foreach.hpp>
|
||||
@ -46,12 +47,6 @@ using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
/** helper for pruning */
|
||||
// bool CompareTranslationOption(const TranslationOption *a, const TranslationOption *b)
|
||||
// {
|
||||
// return a->GetFutureScore() > b->GetFutureScore();
|
||||
// }
|
||||
|
||||
/** constructor; since translation options are indexed by coverage span, the
|
||||
* corresponding data structure is initialized here This fn should be
|
||||
* called by inherited classe */
|
||||
@ -393,7 +388,8 @@ CreateTranslationOptionsForRange
|
||||
{
|
||||
typedef DecodeStepTranslation Tstep;
|
||||
typedef DecodeStepGeneration Gstep;
|
||||
if ((StaticData::Instance().GetXmlInputType() != XmlExclusive)
|
||||
XmlInputType xml_policy = m_ttask.lock()->options().input.xml_policy;
|
||||
if ((xml_policy != XmlExclusive)
|
||||
|| !HasXmlOptionsOverlappingRange(sPos,ePos)) {
|
||||
|
||||
// partial trans opt stored in here
|
||||
@ -452,8 +448,8 @@ CreateTranslationOptionsForRange
|
||||
vector<TranslationOption*>::const_iterator c;
|
||||
for (c = partTransOptList.begin() ; c != partTransOptList.end() ; ++c) {
|
||||
TranslationOption *transOpt = *c;
|
||||
if (StaticData::Instance().GetXmlInputType() != XmlConstraint
|
||||
|| !ViolatesXmlOptionsConstraint(sPos,ePos,transOpt)) {
|
||||
if (xml_policy != XmlConstraint ||
|
||||
!ViolatesXmlOptionsConstraint(sPos,ePos,transOpt)) {
|
||||
Add(transOpt);
|
||||
}
|
||||
}
|
||||
@ -461,9 +457,9 @@ CreateTranslationOptionsForRange
|
||||
totalEarlyPruned += oldPtoc->GetPrunedCount();
|
||||
delete oldPtoc;
|
||||
// TRACE_ERR( "Early translation options pruned: " << totalEarlyPruned << endl);
|
||||
} // if ((StaticData::Instance().GetXmlInputType() != XmlExclusive) || !HasXmlOptionsOverlappingRange(sPos,ePos))
|
||||
} // if ((xml_policy != XmlExclusive) || !HasXmlOptionsOverlappingRange(sPos,ePos))
|
||||
|
||||
if (gidx == 0 && StaticData::Instance().GetXmlInputType() != XmlPassThrough
|
||||
if (gidx == 0 && xml_policy != XmlPassThrough
|
||||
&& HasXmlOptionsOverlappingRange(sPos,ePos)) {
|
||||
CreateXmlOptionsForRange(sPos, ePos);
|
||||
}
|
||||
|
@ -222,7 +222,7 @@ CreateTranslationOptionsForRangeLEGACY(const DecodeGraph &decodeGraph, size_t st
|
||||
size_t endPos, bool adhereTableLimit, size_t graphInd)
|
||||
{
|
||||
bool retval = true;
|
||||
XmlInputType intype = StaticData::Instance().GetXmlInputType();
|
||||
XmlInputType intype = m_ttask.lock()->options().input.xml_policy;
|
||||
if ((intype != XmlExclusive) || !HasXmlOptionsOverlappingRange(startPos,endPos)) {
|
||||
InputPathList &inputPathList = GetInputPathList(startPos, endPos);
|
||||
|
||||
|
@ -22,7 +22,11 @@ namespace Moses
|
||||
* \param reorderingConstraint reordering constraint zones specified by xml
|
||||
* \param walls reordering constraint walls specified by xml
|
||||
*/
|
||||
bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput> &sourceLabels, std::vector<XmlOption*> &xmlOptions)
|
||||
bool
|
||||
TreeInput::
|
||||
ProcessAndStripXMLTags(AllOptions const& opts, string &line,
|
||||
std::vector<XMLParseOutput> &sourceLabels,
|
||||
std::vector<XmlOption*> &xmlOptions)
|
||||
{
|
||||
//parse XML markup in translation line
|
||||
|
||||
@ -172,7 +176,7 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
|
||||
}
|
||||
|
||||
// specified translations -> vector of phrases, separated by "||"
|
||||
if (translation.length() > 0 && StaticData::Instance().GetXmlInputType() != XmlIgnore) {
|
||||
if (translation.length() > 0 && opts.input.xml_policy != XmlIgnore) {
|
||||
vector<string> altTexts = TokenizeMultiCharSeparator(translation, "||");
|
||||
vector<string> altLabel = TokenizeMultiCharSeparator(label, "||");
|
||||
vector<string> altProbs = TokenizeMultiCharSeparator(ParseXmlTagAttribute(tagContent,"prob"), "||");
|
||||
@ -251,7 +255,7 @@ Read(std::istream& in, const std::vector<FactorType>& factorOrder,
|
||||
//line = Trim(line);
|
||||
|
||||
m_labelledSpans.clear();
|
||||
ProcessAndStripXMLTags(line, m_labelledSpans, m_xmlOptions);
|
||||
ProcessAndStripXMLTags(opts, line, m_labelledSpans, m_xmlOptions);
|
||||
|
||||
// do words 1st - hack
|
||||
stringstream strme;
|
||||
|
@ -43,8 +43,10 @@ protected:
|
||||
return m_sourceChart[startPos][endPos - startPos];
|
||||
}
|
||||
|
||||
bool ProcessAndStripXMLTags(std::string &line, std::vector<XMLParseOutput> &sourceLabels, std::vector<XmlOption*> &res);
|
||||
|
||||
bool ProcessAndStripXMLTags(AllOptions const& opts, std::string &line,
|
||||
std::vector<XMLParseOutput> &sourceLabels,
|
||||
std::vector<XmlOption*> &res);
|
||||
|
||||
public:
|
||||
TreeInput() : Sentence() { }
|
||||
|
||||
|
@ -23,13 +23,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "TrellisPathList.h"
|
||||
#include "TrellisPathCollection.h"
|
||||
#include "StaticData.h"
|
||||
|
||||
#include "Manager.h"
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
TrellisPath::TrellisPath(const Hypothesis *hypo)
|
||||
: m_prevEdgeChanged(NOT_FOUND)
|
||||
: m_prevEdgeChanged(NOT_FOUND)
|
||||
{
|
||||
m_totalScore = hypo->GetTotalScore();
|
||||
|
||||
@ -208,9 +208,11 @@ Phrase TrellisPath::GetTargetPhrase() const
|
||||
|
||||
Phrase TrellisPath::GetSurfacePhrase() const
|
||||
{
|
||||
const std::vector<FactorType> &outputFactor = StaticData::Instance().GetOutputFactorOrder();
|
||||
Phrase targetPhrase = GetTargetPhrase()
|
||||
,ret(targetPhrase.GetSize());
|
||||
const std::vector<FactorType> &outputFactor
|
||||
= manager().options().output.factor_order;
|
||||
// = StaticData::Instance().GetOutputFactorOrder();
|
||||
Phrase targetPhrase = GetTargetPhrase();
|
||||
Phrase ret(targetPhrase.GetSize());
|
||||
|
||||
for (size_t pos = 0 ; pos < targetPhrase.GetSize() ; ++pos) {
|
||||
Word &newWord = ret.AddWord();
|
||||
|
@ -1,5 +1,4 @@
|
||||
// $Id$
|
||||
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
@ -34,10 +33,12 @@ namespace Moses
|
||||
class TrellisPathCollection;
|
||||
class TrellisPathList;
|
||||
|
||||
/** Encapsulate the set of hypotheses/arcs that goes from decoding 1 phrase to all the source phrases
|
||||
* to reach a final translation. For the best translation, this consist of all hypotheses, for the other
|
||||
* n-best paths, the node on the path can consist of hypotheses or arcs.
|
||||
* Used by phrase-based decoding
|
||||
/** Encapsulate the set of hypotheses/arcs that goes from decoding 1
|
||||
* phrase to all the source phrases to reach a final
|
||||
* translation. For the best translation, this consist of all
|
||||
* hypotheses, for the other n-best paths, the node on the path
|
||||
* can consist of hypotheses or arcs. Used by phrase-based
|
||||
* decoding
|
||||
*/
|
||||
class TrellisPath
|
||||
{
|
||||
@ -46,9 +47,10 @@ class TrellisPath
|
||||
|
||||
protected:
|
||||
std::vector<const Hypothesis *> m_path; //< list of hypotheses/arcs
|
||||
size_t m_prevEdgeChanged; /**< the last node that was wiggled to create this path
|
||||
, or NOT_FOUND if this path is the best trans so consist of only hypos
|
||||
*/
|
||||
size_t m_prevEdgeChanged;
|
||||
/**< the last node that was wiggled to create this path
|
||||
, or NOT_FOUND if this path is the best trans so consist of only hypos
|
||||
*/
|
||||
|
||||
float m_totalScore;
|
||||
mutable boost::shared_ptr<ScoreComponentCollection> m_scoreBreakdown;
|
||||
@ -58,6 +60,11 @@ protected:
|
||||
|
||||
void InitTotalScore();
|
||||
|
||||
Manager const& manager() const {
|
||||
UTIL_THROW_IF2(m_path.size() == 0, "zero-length trellis path");
|
||||
return m_path[0]->GetManager();
|
||||
}
|
||||
|
||||
public:
|
||||
TrellisPath(); // not implemented
|
||||
|
||||
|
@ -1,6 +1,5 @@
|
||||
// $Id$
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
@ -159,10 +158,13 @@ vector<string> TokenizeXml(const string& str, const std::string& lbrackStr, cons
|
||||
* \param lbrackStr xml tag's left bracket string, typically "<"
|
||||
* \param rbrackStr xml tag's right bracket string, typically ">"
|
||||
*/
|
||||
bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingConstraint &reorderingConstraint, vector< size_t > &walls,
|
||||
std::vector< std::pair<size_t, std::string> > &placeholders,
|
||||
int offset,
|
||||
const std::string& lbrackStr, const std::string& rbrackStr)
|
||||
bool
|
||||
ProcessAndStripXMLTags(AllOptions const& opts, string &line, vector<XmlOption*> &res,
|
||||
ReorderingConstraint &reorderingConstraint,
|
||||
vector< size_t > &walls,
|
||||
std::vector< std::pair<size_t, std::string> > &placeholders,
|
||||
int offset, const std::string& lbrackStr,
|
||||
const std::string& rbrackStr)
|
||||
{
|
||||
//parse XML markup in translation line
|
||||
|
||||
@ -440,7 +442,7 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
|
||||
}
|
||||
|
||||
// store translation options into members
|
||||
if (staticData.GetXmlInputType() != XmlIgnore) {
|
||||
if (opts.input.xml_policy != XmlIgnore) {
|
||||
// only store options if we aren't ignoring them
|
||||
for (size_t i=0; i<altTexts.size(); ++i) {
|
||||
Phrase sourcePhrase; // TODO don't know what the source phrase is
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <string>
|
||||
#include "Range.h"
|
||||
#include "TargetPhrase.h"
|
||||
|
||||
#include "parameters/AllOptions.h"
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
@ -29,8 +29,10 @@ std::string ParseXmlTagAttribute(const std::string& tag,const std::string& attri
|
||||
std::string TrimXml(const std::string& str, const std::string& lbrackStr="<", const std::string& rbrackStr=">") ;
|
||||
bool isXmlTag(const std::string& tag, const std::string& lbrackStr="<", const std::string& rbrackStr=">");
|
||||
std::vector<std::string> TokenizeXml(const std::string& str, const std::string& lbrackStr="<", const std::string& rbrackStr=">");
|
||||
|
||||
bool ProcessAndStripXMLTags(std::string &line, std::vector<XmlOption*> &res, ReorderingConstraint &reorderingConstraint, std::vector< size_t > &walls,
|
||||
|
||||
bool ProcessAndStripXMLTags(AllOptions const& opts,
|
||||
std::string &line, std::vector<XmlOption*> &res,
|
||||
ReorderingConstraint &reorderingConstraint, std::vector< size_t > &walls,
|
||||
std::vector< std::pair<size_t, std::string> > &placeholders,
|
||||
int offset,
|
||||
const std::string& lbrackStr="<", const std::string& rbrackStr=">");
|
||||
|
@ -45,21 +45,29 @@ namespace Moses {
|
||||
pspec = param.GetParam("xml-brackets");
|
||||
if (pspec && pspec->size())
|
||||
{
|
||||
std::vector<std::string> brackets = Tokenize(pspec->at(0));
|
||||
if(brackets.size()!=2)
|
||||
{
|
||||
std::cerr << "invalid xml-brackets value, "
|
||||
<< "must specify exactly 2 blank-delimited strings "
|
||||
<< "for XML tags opening and closing brackets" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
xml_brackets.first= brackets[0];
|
||||
xml_brackets.second=brackets[1];
|
||||
VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
|
||||
<< xml_brackets.first << " and "
|
||||
<< xml_brackets.second << std::endl);
|
||||
std::vector<std::string> brackets = Tokenize(pspec->at(0));
|
||||
if(brackets.size()!=2)
|
||||
{
|
||||
std::cerr << "invalid xml-brackets value, "
|
||||
<< "must specify exactly 2 blank-delimited strings "
|
||||
<< "for XML tags opening and closing brackets"
|
||||
<< std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
xml_brackets.first= brackets[0];
|
||||
xml_brackets.second=brackets[1];
|
||||
|
||||
VERBOSE(1,"XML tags opening and closing brackets for XML input are: "
|
||||
<< xml_brackets.first << " and "
|
||||
<< xml_brackets.second << std::endl);
|
||||
}
|
||||
|
||||
pspec = param.GetParam("input-factors");
|
||||
if (pspec) factor_order = Scan<FactorType>(*pspec);
|
||||
if (factor_order.empty()) factor_order.assign(1,0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -14,7 +14,8 @@ namespace Moses
|
||||
bool default_non_term_only_for_empty_range; // whatever that means
|
||||
InputTypeEnum input_type;
|
||||
XmlInputType xml_policy; // pass through, ignore, exclusive, inclusive
|
||||
|
||||
std::vector<FactorType> factor_order; // input factor order
|
||||
|
||||
std::pair<std::string,std::string> xml_brackets;
|
||||
// strings to use as XML tags' opening and closing brackets.
|
||||
// Default are "<" and ">"
|
||||
|
@ -62,6 +62,11 @@ namespace Moses {
|
||||
} else {
|
||||
lattice_sample_size = 0;
|
||||
}
|
||||
|
||||
params= param.GetParam("output-factors");
|
||||
if (params) factor_order = Scan<FactorType>(*params);
|
||||
if (factor_order.empty()) factor_order.assign(1,0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -10,6 +10,8 @@ namespace Moses
|
||||
struct
|
||||
ReportingOptions : public OptionsBaseClass
|
||||
{
|
||||
std::vector<FactorType> factor_order;
|
||||
|
||||
bool ReportAllFactors; // m_reportAllFactors;
|
||||
|
||||
int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched
|
||||
|
Loading…
Reference in New Issue
Block a user