diff --git a/contrib/server/mosesserver.cpp b/contrib/server/mosesserver.cpp index 9b34adb6c..79b48f54c 100644 --- a/contrib/server/mosesserver.cpp +++ b/contrib/server/mosesserver.cpp @@ -257,9 +257,9 @@ public: const StaticData &staticData = StaticData::Instance(); //Make sure alternative paths are retained, if necessary - if (addGraphInfo || nbest_size>0) { - (const_cast(staticData)).SetOutputSearchGraph(true); - } + // if (addGraphInfo || nbest_size>0) { + // (const_cast(staticData)).SetOutputSearchGraph(true); + // } stringstream out, graphInfo, transCollOpts; @@ -269,7 +269,7 @@ public: boost::shared_ptr tinput(new TreeInput); const vector& IFO = staticData.GetInputFactorOrder(); istringstream in(source + "\n"); - tinput->Read(in,IFO); + tinput->Read(in,IFO,staticData.options()); ttasksptr task = Moses::TranslationTask::create(tinput); ChartManager manager(task); manager.Decode(); @@ -285,7 +285,8 @@ public: else { // size_t lineNumber = 0; // TODO: Include sentence request number here? - boost::shared_ptr sentence(new Sentence(0,source)); + boost::shared_ptr sentence; + sentence.reset(new Sentence(0,source,staticData.options())); ttasksptr task = Moses::TranslationTask::create(sentence); Manager manager(task); manager.Decode(); @@ -320,7 +321,7 @@ public: outputNBest(manager, m_retData, nbest_size, nbest_distinct, reportAllFactors, addAlignInfo, addScoreBreakdown); } - (const_cast(staticData)).SetOutputSearchGraph(false); + // (const_cast(staticData)).SetOutputSearchGraph(false); } m_retData["text"] = value_string(out.str()); XVERBOSE(1,"Output: " << out.str() << endl); @@ -479,7 +480,9 @@ public: { // should the score breakdown be reported in a more structured manner? ostringstream buf; - path.GetScoreBreakdown()->OutputAllFeatureScores(buf); + bool with_labels + = StaticData::Instance().options().nbest.include_feature_labels; + path.GetScoreBreakdown()->OutputAllFeatureScores(buf, with_labels); nBestXMLItem["fvals"] = xmlrpc_c::value_string(buf.str()); } diff --git a/moses-cmd/LatticeMBRGrid.cpp b/moses-cmd/LatticeMBRGrid.cpp index a0c0a7852..356f90525 100644 --- a/moses-cmd/LatticeMBRGrid.cpp +++ b/moses-cmd/LatticeMBRGrid.cpp @@ -202,8 +202,9 @@ int main(int argc, char* argv[]) << " ||| "; vector mbrBestHypo = doLatticeMBR(manager,nBestList); manager.OutputBestHypo(mbrBestHypo, lineCount, - SD.GetReportSegmentation(), - SD.GetReportAllFactors(),cout); + manager.options().output.ReportSegmentation, + manager.options().output.ReportAllFactors, + cout); } } } diff --git a/moses/AlignmentInfo.cpp b/moses/AlignmentInfo.cpp index 97efc25eb..15e8b6d52 100644 --- a/moses/AlignmentInfo.cpp +++ b/moses/AlignmentInfo.cpp @@ -106,7 +106,9 @@ std::set AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const } -bool compare_target(const std::pair *a, const std::pair *b) +bool +compare_target(std::pair const* a, + std::pair const* b) { if(a->second < b->second) return true; if(a->second == b->second) return (a->first < b->first); @@ -114,29 +116,29 @@ bool compare_target(const std::pair *a, const std::pair* > AlignmentInfo::GetSortedAlignments() const +std::vector< const std::pair* > +AlignmentInfo:: +GetSortedAlignments(WordAlignmentSort SortOrder) const { std::vector< const std::pair* > ret; - + CollType::const_iterator iter; for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) { const std::pair &alignPair = *iter; ret.push_back(&alignPair); } - - const StaticData &staticData = StaticData::Instance(); - WordAlignmentSort wordAlignmentSort = staticData.GetWordAlignmentSort(); - - switch (wordAlignmentSort) { + + switch (SortOrder) { case NoSort: break; - + case TargetOrder: std::sort(ret.begin(), ret.end(), compare_target); break; - + default: - UTIL_THROW(util::Exception, "Unknown alignment sort option: " << wordAlignmentSort); + UTIL_THROW(util::Exception, "Unknown word alignment sort option: " + << SortOrder); } return ret; diff --git a/moses/AlignmentInfo.h b/moses/AlignmentInfo.h index c74ff340c..50a4bf550 100644 --- a/moses/AlignmentInfo.h +++ b/moses/AlignmentInfo.h @@ -26,7 +26,7 @@ #include #include - +#include "TypeDef.h" namespace Moses { @@ -83,7 +83,8 @@ public: return m_collection.size(); } - std::vector< const std::pair* > GetSortedAlignments() const; + std::vector< const std::pair* > + GetSortedAlignments(WordAlignmentSort SortOrder) const; std::vector GetSourceIndex2PosMap() const; diff --git a/moses/ChartCell.cpp b/moses/ChartCell.cpp index f0a25986a..89108ebf9 100644 --- a/moses/ChartCell.cpp +++ b/moses/ChartCell.cpp @@ -27,7 +27,6 @@ #include "RuleCube.h" #include "Range.h" #include "Util.h" -#include "StaticData.h" #include "ChartTranslationOptions.h" #include "ChartTranslationOptionList.h" #include "ChartManager.h" @@ -52,8 +51,7 @@ ChartCellBase::~ChartCellBase() {} ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) : ChartCellBase(startPos, endPos), m_manager(manager) { - const StaticData &staticData = StaticData::Instance(); - m_nBestIsEnabled = staticData.options().nbest.enabled; + m_nBestIsEnabled = manager.options().nbest.enabled; } ChartCell::~ChartCell() {} @@ -66,7 +64,14 @@ ChartCell::~ChartCell() {} bool ChartCell::AddHypothesis(ChartHypothesis *hypo) { const Word &targetLHS = hypo->GetTargetLHS(); - return m_hypoColl[targetLHS].AddHypothesis(hypo, m_manager); + MapType::iterator m = m_hypoColl.find(targetLHS); + if (m == m_hypoColl.end()) + { + std::pair + e(targetLHS, ChartHypothesisCollection(m_manager.options())); + m = m_hypoColl.insert(e).first; + } + return m->second.AddHypothesis(hypo, m_manager); } /** Prune each collection in this cell to a particular size */ @@ -87,8 +92,6 @@ void ChartCell::PruneToSize() void ChartCell::Decode(const ChartTranslationOptionList &transOptList , const ChartCellCollection &allChartCells) { - const StaticData &staticData = StaticData::Instance(); - // priority queue for applicable rules with selected hypotheses RuleCubeQueue queue(m_manager); @@ -100,7 +103,7 @@ void ChartCell::Decode(const ChartTranslationOptionList &transOptList } // pluck things out of queue and add to hypo collection - const size_t popLimit = staticData.options().cube.pop_limit; + const size_t popLimit = m_manager.options().cube.pop_limit; for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) { ChartHypothesis *hypo = queue.Pop(); AddHypothesis(hypo); diff --git a/moses/ChartHypothesis.cpp b/moses/ChartHypothesis.cpp index cf2904e47..7c9f17621 100644 --- a/moses/ChartHypothesis.cpp +++ b/moses/ChartHypothesis.cpp @@ -256,12 +256,13 @@ void ChartHypothesis::CleanupArcList() * However, may not be enough if only unique candidates are needed, * so we'll keep all of arc list if nedd distinct n-best list */ + AllOptions const& opts = StaticData::Instance().options(); const StaticData &staticData = StaticData::Instance(); - size_t nBestSize = staticData.options().nbest.nbest_size; - bool distinctNBest = (staticData.options().nbest.only_distinct - || staticData.options().mbr.enabled - || staticData.GetOutputSearchGraph() - || staticData.GetOutputSearchGraphHypergraph()); + size_t nBestSize = opts.nbest.nbest_size; + bool distinctNBest = (opts.nbest.only_distinct + || opts.mbr.enabled + || opts.output.NeedSearchGraph() + || !opts.output.SearchGraphHG.empty()); if (!distinctNBest && m_arcList->size() > nBestSize) { // prune arc list only if there too many arcs diff --git a/moses/ChartHypothesisCollection.cpp b/moses/ChartHypothesisCollection.cpp index 068194287..f7002bfad 100644 --- a/moses/ChartHypothesisCollection.cpp +++ b/moses/ChartHypothesisCollection.cpp @@ -26,6 +26,7 @@ #include "ChartManager.h" #include "HypergraphOutput.h" #include "util/exception.hh" +#include "parameters/AllOptions.h" using namespace std; using namespace Moses; @@ -33,13 +34,13 @@ using namespace Moses; namespace Moses { -ChartHypothesisCollection::ChartHypothesisCollection() +ChartHypothesisCollection::ChartHypothesisCollection(AllOptions const& opts) { - const StaticData &staticData = StaticData::Instance(); + // const StaticData &staticData = StaticData::Instance(); - m_beamWidth = staticData.GetBeamWidth(); - m_maxHypoStackSize = staticData.options().search.stack_size; - m_nBestIsEnabled = staticData.options().nbest.enabled; + m_beamWidth = opts.search.beam_width; // staticData.GetBeamWidth(); + m_maxHypoStackSize = opts.search.stack_size; // staticData.options().search.stack_size; + m_nBestIsEnabled = opts.nbest.enabled; // staticData.options().nbest.enabled; m_bestScore = -std::numeric_limits::infinity(); } diff --git a/moses/ChartHypothesisCollection.h b/moses/ChartHypothesisCollection.h index 169e81f19..9d682d76d 100644 --- a/moses/ChartHypothesisCollection.h +++ b/moses/ChartHypothesisCollection.h @@ -29,6 +29,7 @@ namespace Moses { class ChartSearchGraphWriter; + class AllOptions; //! functor to compare (chart) hypotheses by (descending) score class ChartHypothesisScoreOrderer @@ -70,7 +71,7 @@ public: return m_hypos.end(); } - ChartHypothesisCollection(); + ChartHypothesisCollection(AllOptions const& opts); ~ChartHypothesisCollection(); bool AddHypothesis(ChartHypothesis *hypo, ChartManager &manager); diff --git a/moses/ChartManager.cpp b/moses/ChartManager.cpp index 9c672e00a..767c5b44b 100644 --- a/moses/ChartManager.cpp +++ b/moses/ChartManager.cpp @@ -371,7 +371,8 @@ void ChartManager::OutputNBestList(OutputCollector *collector, OutputSurface(out, outputPhrase, outputFactorOrder, false); out << " ||| "; boost::shared_ptr scoreBreakdown = ChartKBestExtractor::GetOutputScoreBreakdown(derivation); - scoreBreakdown->OutputAllFeatureScores(out); + bool with_labels = options().nbest.include_feature_labels; + scoreBreakdown->OutputAllFeatureScores(out, with_labels); out << " ||| " << derivation.score; // optionally, print word alignments @@ -618,7 +619,7 @@ void ChartManager::OutputDetailedTranslationReport( //DIMw const StaticData &staticData = StaticData::Instance(); - if (staticData.IsDetailedAllTranslationReportingEnabled()) { + if (options().output.detailed_all_transrep_filepath.size()) { const Sentence &sentence = static_cast(m_source); size_t nBestSize = staticData.options().nbest.nbest_size; std::vector > nBestList; @@ -835,11 +836,11 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe Backtrack(hypo); VERBOSE(3,"0" << std::endl); - if (StaticData::Instance().GetOutputHypoScore()) { + if (options().output.ReportHypoScore) { out << hypo->GetTotalScore() << " "; } - if (StaticData::Instance().IsPathRecoveryEnabled()) { + if (options().output.RecoverPath) { out << "||| "; } Phrase outPhrase(ARRAY_SIZE_INCR); @@ -858,7 +859,7 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe } else { VERBOSE(1, "NO BEST TRANSLATION" << endl); - if (StaticData::Instance().GetOutputHypoScore()) { + if (options().output.ReportHypoScore) { out << "0 "; } diff --git a/moses/ChartParser.cpp b/moses/ChartParser.cpp index 897f1828f..19aeb8515 100644 --- a/moses/ChartParser.cpp +++ b/moses/ChartParser.cpp @@ -107,8 +107,13 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha targetPhrase->SetAlignmentInfo("0-0"); targetPhrase->EvaluateInIsolation(*unksrc); - if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.options().nbest.print_trees || staticData.GetTreeStructure() != NULL) { - targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]"); + AllOptions const& opts = staticData.options(); + if (!opts.output.detailed_tree_transrep_filepath.empty() || + opts.nbest.print_trees || staticData.GetTreeStructure() != NULL) { + std::string prop = "[ "; + prop += (*targetLHS)[0]->GetString().as_string() + " "; + prop += sourceWord[0]->GetString().as_string() + " ]"; + targetPhrase->SetProperty("Tree", prop); } // chart rule diff --git a/moses/ConfusionNet.cpp b/moses/ConfusionNet.cpp index 41522268d..188c57438 100644 --- a/moses/ConfusionNet.cpp +++ b/moses/ConfusionNet.cpp @@ -110,29 +110,14 @@ ReadF(std::istream& in, const std::vector& factorOrder, int format) int ConfusionNet:: Read(std::istream& in, - const std::vector& factorOrder) + const std::vector& factorOrder, + AllOptions const& opts) { int rv=ReadF(in,factorOrder,0); if(rv) stats.collect(*this); return rv; } -#if 0 -// Deprecated due to code duplication; -// use Word::CreateFromString() instead -void -ConfusionNet:: -String2Word(const std::string& s,Word& w, - const std::vector& factorOrder) -{ - std::vector factorStrVector = Tokenize(s, "|"); - for(size_t i=0; i& factorOrder) @@ -161,7 +146,8 @@ ReadFormat0(std::istream& in, const std::vector& factorOrder) for(size_t i=0; i < numInputScores; i++) { double prob; if (!(is>>prob)) { - TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n"); + TRACE_ERR("ERROR: unable to parse CN input - bad link probability, " + << "or wrong number of scores\n"); return false; } if(prob<0.0) { @@ -174,7 +160,8 @@ ReadFormat0(std::istream& in, const std::vector& factorOrder) probs[i] = (std::max(static_cast(log(prob)),LOWEST_SCORE)); } - //store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon + // store 'real' word count in last feature if we have one more + // weight than we do arc scores and not epsilon if (addRealWordCount && word!=EPSILON && word!="") probs.back() = -1.0; diff --git a/moses/ConfusionNet.h b/moses/ConfusionNet.h index 85e5f338d..834148864 100644 --- a/moses/ConfusionNet.h +++ b/moses/ConfusionNet.h @@ -67,7 +67,8 @@ public: bool ReadF(std::istream&,const std::vector& factorOrder,int format=0); virtual void Print(std::ostream&) const; - int Read(std::istream& in,const std::vector& factorOrder); + int Read(std::istream& in,const std::vector& factorOrder, + AllOptions const& opts); Phrase GetSubString(const Range&) const; //TODO not defined std::string GetStringRep(const std::vector factorsToPrint) const; //TODO not defined diff --git a/moses/DecodeStepTranslation.cpp b/moses/DecodeStepTranslation.cpp index c593f8ebc..25d160626 100644 --- a/moses/DecodeStepTranslation.cpp +++ b/moses/DecodeStepTranslation.cpp @@ -100,12 +100,14 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO } } -void DecodeStepTranslation::ProcessInitialTranslation( - const InputType &source - ,PartialTranslOptColl &outputPartialTranslOptColl - , size_t startPos, size_t endPos, bool adhereTableLimit - , const InputPath &inputPath - , TargetPhraseCollection::shared_ptr phraseColl) const +void +DecodeStepTranslation:: +ProcessInitialTranslation(InputType const& source, + PartialTranslOptColl &outputPartialTranslOptColl, + size_t startPos, size_t endPos, + bool adhereTableLimit, + InputPath const& inputPath, + TargetPhraseCollection::shared_ptr phraseColl) const { const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature(); const size_t tableLimit = phraseDictionary->GetTableLimit(); @@ -114,12 +116,13 @@ void DecodeStepTranslation::ProcessInitialTranslation( if (phraseColl != NULL) { IFVERBOSE(3) { - if(StaticData::Instance().GetInputType() == SentenceInput) - TRACE_ERR("[" << source.GetSubString(range) << "; " << startPos << "-" << endPos << "]\n"); + if(source.GetType() == SentenceInput) + TRACE_ERR("[" << source.GetSubString(range) << "; " + << startPos << "-" << endPos << "]\n"); else TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl); } - + TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd; iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit; @@ -137,11 +140,13 @@ void DecodeStepTranslation::ProcessInitialTranslation( } } -void DecodeStepTranslation::ProcessInitialTranslationLEGACY( - const InputType &source - ,PartialTranslOptColl &outputPartialTranslOptColl - , size_t startPos, size_t endPos, bool adhereTableLimit - , const InputPathList &inputPathList) const +void +DecodeStepTranslation:: +ProcessInitialTransLEGACY(InputType const& source, + PartialTranslOptColl &outputPartialTranslOptColl, + size_t startPos, size_t endPos, + bool adhereTableLimit, + InputPathList const& inputPathList) const { const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature(); const size_t tableLimit = phraseDictionary->GetTableLimit(); @@ -152,12 +157,13 @@ void DecodeStepTranslation::ProcessInitialTranslationLEGACY( if (phraseColl != NULL) { IFVERBOSE(3) { - if(StaticData::Instance().GetInputType() == SentenceInput) - TRACE_ERR("[" << source.GetSubString(range) << "; " << startPos << "-" << endPos << "]\n"); + if(source.GetType() == SentenceInput) + TRACE_ERR("[" << source.GetSubString(range) << "; " + << startPos << "-" << endPos << "]\n"); else TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl); } - + const std::vector &sourcePhrases = phraseColl->GetSourcePhrases(); TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd; diff --git a/moses/DecodeStepTranslation.h b/moses/DecodeStepTranslation.h index eceebb940..25af693eb 100644 --- a/moses/DecodeStepTranslation.h +++ b/moses/DecodeStepTranslation.h @@ -61,10 +61,13 @@ public: , TargetPhraseCollection::shared_ptr phraseColl) const; // legacy - void ProcessInitialTranslationLEGACY(const InputType &source - , PartialTranslOptColl &outputPartialTranslOptColl - , size_t startPos, size_t endPos, bool adhereTableLimit - , const InputPathList &inputPathList) const; + void + ProcessInitialTransLEGACY(InputType const& source, + PartialTranslOptColl &outputPartialTranslOptColl, + size_t startPos, size_t endPos, + bool adhereTableLimit, + InputPathList const& inputPathList) const; + void ProcessLEGACY(const TranslationOption &inputPartialTranslOpt , const DecodeStep &decodeStep , PartialTranslOptColl &outputPartialTranslOptColl diff --git a/moses/ForestInput.cpp b/moses/ForestInput.cpp index 2977c0636..57b8fa472 100644 --- a/moses/ForestInput.cpp +++ b/moses/ForestInput.cpp @@ -17,8 +17,10 @@ namespace Moses { //! populate this InputType with data from in stream -int ForestInput::Read(std::istream &in, - const std::vector& factorOrder) +int ForestInput:: +Read(std::istream &in, + std::vector const& factorOrder, + AllOptions const& opts) { using Syntax::F2S::Forest; @@ -56,7 +58,7 @@ int ForestInput::Read(std::istream &in, // not sure ForestInput needs to. std::stringstream strme; strme << " " << sentence << " " << std::endl; - Sentence::Read(strme, factorOrder); + Sentence::Read(strme, factorOrder, opts); // Find the maximum end position of any vertex (0 if forest is empty). std::size_t maxEnd = FindMaxEnd(*m_forest); diff --git a/moses/ForestInput.h b/moses/ForestInput.h index 3ad764402..61cb08d83 100644 --- a/moses/ForestInput.h +++ b/moses/ForestInput.h @@ -28,7 +28,10 @@ public: } //! populate this InputType with data from in stream - virtual int Read(std::istream& in,const std::vector& factorOrder); + virtual int + Read(std::istream& in, + std::vector const& factorOrder, + AllOptions const& opts); //! Output debugging info to stream out virtual void Print(std::ostream&) const; diff --git a/moses/HypergraphOutput.cpp b/moses/HypergraphOutput.cpp index 830f97ba8..373c2109f 100644 --- a/moses/HypergraphOutput.cpp +++ b/moses/HypergraphOutput.cpp @@ -56,7 +56,7 @@ WriteHypos(const ChartHypothesisCollection& hypos, ChartHypothesisCollection::const_iterator iter; for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) { ChartHypothesis &mainHypo = **iter; - if (StaticData::Instance().GetUnprunedSearchGraph() || + if (StaticData::Instance().options().output.DontPruneSearchGraph || reachable.find(mainHypo.GetId()) != reachable.end()) { (*m_out) << m_lineNumber << " " << mainHypo << endl; } @@ -90,7 +90,7 @@ WriteHypos(const ChartHypothesisCollection& hypos, ChartHypothesisCollection::const_iterator iter; for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) { const ChartHypothesis* mainHypo = *iter; - if (!StaticData::Instance().GetUnprunedSearchGraph() && + if (!StaticData::Instance().options().output.DontPruneSearchGraph && reachable.find(mainHypo->GetId()) == reachable.end()) { //Ignore non reachable nodes continue; diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp index c6a248419..c5745c5de 100644 --- a/moses/Hypothesis.cpp +++ b/moses/Hypothesis.cpp @@ -195,9 +195,8 @@ EvaluateWhenApplied(float futureScore) const StatefulFeatureFunction &ff = *ffs[i]; const StaticData &staticData = StaticData::Instance(); if (! staticData.IsFeatureFunctionIgnored(ff)) { - m_ffStates[i] = ff.EvaluateWhenApplied(*this, - m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL, - &m_currScoreBreakdown); + FFState const* s = m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL; + m_ffStates[i] = ff.EvaluateWhenApplied(*this, s, &m_currScoreBreakdown); } } @@ -276,15 +275,11 @@ CleanupArcList() * However, may not be enough if only unique candidates are needed, * so we'll keep all of arc list if nedd distinct n-best list */ + const StaticData &staticData = StaticData::Instance(); - size_t nBestSize = staticData.options().nbest.nbest_size; - bool distinctNBest = (m_manager.options().nbest.only_distinct || - staticData.GetLatticeSamplesSize() || - m_manager.options().mbr.enabled || - staticData.GetOutputSearchGraph() || - staticData.GetOutputSearchGraphSLF() || - staticData.GetOutputSearchGraphHypergraph() || - m_manager.options().lmbr.enabled); + AllOptions const& opts = m_manager.options(); + size_t nBestSize = opts.nbest.nbest_size; + bool distinctNBest = opts.NBestDistinct(); if (!distinctNBest && m_arcList->size() > nBestSize * 5) { // prune arc list only if there too many arcs @@ -292,9 +287,8 @@ CleanupArcList() m_arcList->end(), CompareHypothesisTotalScore()); // delete bad ones - ArcList::iterator iter; - for (iter = m_arcList->begin() + nBestSize; iter != m_arcList->end() ; ++iter) - delete *iter; + ArcList::iterator i = m_arcList->begin() + nBestSize; + while (i != m_arcList->end()) delete *i++; m_arcList->erase(m_arcList->begin() + nBestSize, m_arcList->end()); } @@ -386,14 +380,16 @@ OutputAlignment(std::ostream &out) const edges.push_back(currentHypo); currentHypo = currentHypo->GetPrevHypo(); } - - OutputAlignment(out, edges); + + OutputAlignment(out, edges, m_manager.options().output.WA_SortOrder); } void Hypothesis:: -OutputAlignment(ostream &out, const vector &edges) +OutputAlignment(ostream &out, + vector const& edges, + WordAlignmentSort waso) { size_t targetOffset = 0; @@ -402,7 +398,7 @@ OutputAlignment(ostream &out, const vector &edges) const TargetPhrase &tp = edge.GetCurrTargetPhrase(); size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos(); - OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset); + OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset, waso); targetOffset += tp.GetSize(); } @@ -412,15 +408,17 @@ OutputAlignment(ostream &out, const vector &edges) void Hypothesis:: OutputAlignment(ostream &out, const AlignmentInfo &ai, - size_t sourceOffset, size_t targetOffset) + size_t sourceOffset, size_t targetOffset, + WordAlignmentSort waso) { typedef std::vector< const std::pair* > AlignVec; - AlignVec alignments = ai.GetSortedAlignments(); + AlignVec alignments = ai.GetSortedAlignments(waso); AlignVec::const_iterator it; for (it = alignments.begin(); it != alignments.end(); ++it) { const std::pair &alignment = **it; - out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " "; + out << alignment.first + sourceOffset << "-" + << alignment.second + targetOffset << " "; } } @@ -526,15 +524,17 @@ OutputSurface(std::ostream &out, const Hypothesis &edge, const int sourceEnd = sourceRange.GetEndPos(); out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt" if (reportSegmentation == 2) { + WordAlignmentSort waso = m_manager.options().output.WA_SortOrder; out << ",wa="; const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); - Hypothesis::OutputAlignment(out, ai, 0, 0); + Hypothesis::OutputAlignment(out, ai, 0, 0, waso); out << ",total="; out << edge.GetScore() - edge.GetPrevHypo()->GetScore(); out << ","; ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown()); scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown()); - scoreBreakdown.OutputAllFeatureScores(out); + bool with_labels = m_manager.options().nbest.include_feature_labels; + scoreBreakdown.OutputAllFeatureScores(out, with_labels); } out << "| "; } @@ -604,9 +604,10 @@ OutputLocalWordAlignment(vector& dest) const using namespace std; Range const& src = this->GetCurrSourceWordsRange(); Range const& trg = this->GetCurrTargetWordsRange(); - + + WordAlignmentSort waso = m_manager.options().output.WA_SortOrder; vector const* > a - = this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments(); + = this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments(waso); typedef pair item; map M; BOOST_FOREACH(item const* p, a) { diff --git a/moses/Hypothesis.h b/moses/Hypothesis.h index 125aef530..c1d6c4598 100644 --- a/moses/Hypothesis.h +++ b/moses/Hypothesis.h @@ -251,9 +251,18 @@ public: return m_transOpt; } - void OutputAlignment(std::ostream &out) const; - static void OutputAlignment(std::ostream &out, const std::vector &edges); - static void OutputAlignment(std::ostream &out, const Moses::AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset); + void + OutputAlignment(std::ostream &out) const; + + static void + OutputAlignment(std::ostream &out, + const std::vector &edges, + WordAlignmentSort waso); + + static void + OutputAlignment(std::ostream &out, const Moses::AlignmentInfo &ai, + size_t sourceOffset, size_t targetOffset, + WordAlignmentSort waso); void OutputInput(std::ostream& os) const; static void OutputInput(std::vector& map, const Hypothesis* hypo); diff --git a/moses/HypothesisStackNormal.cpp b/moses/HypothesisStackNormal.cpp index 9f13213d4..77347baaf 100644 --- a/moses/HypothesisStackNormal.cpp +++ b/moses/HypothesisStackNormal.cpp @@ -36,7 +36,7 @@ namespace Moses HypothesisStackNormal::HypothesisStackNormal(Manager& manager) : HypothesisStack(manager) { - m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled; + m_nBestIsEnabled = manager.options().nbest.enabled; m_bestScore = -std::numeric_limits::infinity(); m_worstScore = -std::numeric_limits::infinity(); } diff --git a/moses/IOWrapper.cpp b/moses/IOWrapper.cpp index 62c753269..8a83547e7 100644 --- a/moses/IOWrapper.cpp +++ b/moses/IOWrapper.cpp @@ -79,12 +79,6 @@ namespace Moses IOWrapper::IOWrapper() : m_nBestStream(NULL) - // , m_outputWordGraphStream(NULL) - // , m_outputSearchGraphStream(NULL) - // , m_detailedTranslationReportingStream(NULL) - // , m_unknownsStream(NULL) - // , m_alignmentInfoStream(NULL) - // , m_latticeSamplesStream(NULL) , m_surpressSingleBestOutput(false) , m_look_ahead(0) , m_look_back(0) @@ -100,8 +94,8 @@ IOWrapper::IOWrapper() m_look_ahead = staticData.options().context.look_ahead; m_look_back = staticData.options().context.look_back; - m_inputType = staticData.GetInputType(); - + m_inputType = staticData.options().input.input_type; + UTIL_THROW_IF2((m_look_ahead || m_look_back) && m_inputType != SentenceInput, "Context-sensitive decoding currently works only with sentence input."); diff --git a/moses/IOWrapper.h b/moses/IOWrapper.h index c55793329..02c3470bb 100644 --- a/moses/IOWrapper.h +++ b/moses/IOWrapper.h @@ -216,6 +216,7 @@ boost::shared_ptr IOWrapper:: BufferInput() { + AllOptions const& opts = StaticData::Instance().options(); boost::shared_ptr source; boost::shared_ptr ret; if (m_future_input.size()) { @@ -224,13 +225,13 @@ BufferInput() m_buffered_ahead -= ret->GetSize(); } else { source.reset(new itype); - if (!source->Read(*m_inputStream, *m_inputFactorOrder)) + if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts)) return ret; ret = source; } while (m_buffered_ahead < m_look_ahead) { source.reset(new itype); - if (!source->Read(*m_inputStream, *m_inputFactorOrder)) + if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts)) break; m_future_input.push_back(source); m_buffered_ahead += source->GetSize(); diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp index 407cf0f9d..c004cda5d 100644 --- a/moses/Incremental.cpp +++ b/moses/Incremental.cpp @@ -320,10 +320,15 @@ void Manager::OutputNBest(OutputCollector *collector) const OutputNBestList(collector, *completed_nbest_, m_source.GetTranslationId()); } -void Manager::OutputNBestList(OutputCollector *collector, const std::vector &nbest, long translationId) const +void +Manager:: +OutputNBestList(OutputCollector *collector, + std::vector const& nbest, + long translationId) const { const StaticData &staticData = StaticData::Instance(); - const std::vector &outputFactorOrder = staticData.GetOutputFactorOrder(); + const std::vector &outputFactorOrder + = staticData.GetOutputFactorOrder(); std::ostringstream out; // wtf? copied from the original OutputNBestList @@ -332,18 +337,21 @@ void Manager::OutputNBestList(OutputCollector *collector, const std::vector::const_iterator i = nbest.begin(); i != nbest.end(); ++i) { + for (std::vector::const_iterator i = nbest.begin(); + i != nbest.end(); ++i) { Incremental::PhraseAndFeatures(*i, outputPhrase, features); // and UTIL_THROW_IF2(outputPhrase.GetSize() < 2, - "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); + "Output phrase should have contained at least 2 words " + << "(beginning and end-of-sentence)"); outputPhrase.RemoveWord(0); outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); out << translationId << " ||| "; OutputSurface(out, outputPhrase, outputFactorOrder, false); out << " ||| "; - features.OutputAllFeatureScores(out); + bool with_labels = options().nbest.include_feature_labels; + features.OutputAllFeatureScores(out, with_labels); out << " ||| " << i->GetScore() << '\n'; } out << std::flush; @@ -351,7 +359,9 @@ void Manager::OutputNBestList(OutputCollector *collector, const std::vectorWrite(translationId, out.str()); } -void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const +void +Manager:: +OutputDetailedTranslationReport(OutputCollector *collector) const { if (collector && !completed_nbest_->empty()) { const search::Applied &applied = completed_nbest_->at(0); @@ -498,7 +508,7 @@ void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied if (collector == NULL) return; std::ostringstream out; FixPrecision(out); - if (StaticData::Instance().GetOutputHypoScore()) { + if (options().output.ReportHypoScore) { out << applied.GetScore() << ' '; } Phrase outPhrase; @@ -515,10 +525,12 @@ void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied VERBOSE(1,"BEST TRANSLATION: " << outPhrase << "[total=" << applied.GetScore() << "]" << std::endl); } -void Manager::OutputBestNone(OutputCollector *collector, long translationId) const +void +Manager:: +OutputBestNone(OutputCollector *collector, long translationId) const { if (collector == NULL) return; - if (StaticData::Instance().GetOutputHypoScore()) { + if (options().output.ReportHypoScore) { collector->Write(translationId, "0 \n"); } else { collector->Write(translationId, "\n"); diff --git a/moses/InputType.h b/moses/InputType.h index af0a73b0c..9f3777530 100644 --- a/moses/InputType.h +++ b/moses/InputType.h @@ -1,5 +1,4 @@ -// -*- c++ -*- -// $Id$ +// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- // vim:tabstop=2 /*********************************************************************** @@ -31,6 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "ReorderingConstraint.h" #include "NonTerminal.h" #include "Range.h" +#include "parameters/AllOptions.h" namespace Moses { @@ -184,7 +184,10 @@ public: } //! populate this InputType with data from in stream - virtual int Read(std::istream& in,const std::vector& factorOrder) =0; + virtual int + Read(std::istream& in, + std::vector const& factorOrder, + AllOptions const& opts) =0; //! Output debugging info to stream out virtual void Print(std::ostream&) const =0; diff --git a/moses/Manager.cpp b/moses/Manager.cpp index 71b384fd1..7e34302ba 100644 --- a/moses/Manager.cpp +++ b/moses/Manager.cpp @@ -1,6 +1,5 @@ -// $Id$ +// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- // vim:tabstop=2 - /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh @@ -49,6 +48,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "moses/mbr.h" #include "moses/LatticeMBR.h" +#include + #ifdef HAVE_PROTOBUF #include "hypergraph.pb.h" #include "rule.pb.h" @@ -98,6 +99,10 @@ Manager::GetSource() const */ void Manager::Decode() { + + std::cerr << options().nbest.nbest_size << " " + << options().nbest.enabled << " " << std::endl; + // initialize statistics ResetSentenceStats(m_source); IFVERBOSE(2) { @@ -123,7 +128,8 @@ void Manager::Decode() // some reporting on how long this took IFVERBOSE(1) { GetSentenceStats().StopTimeCollectOpts(); - TRACE_ERR("Line "<< m_source.GetTranslationId() << ": Collecting options took " + TRACE_ERR("Line "<< m_source.GetTranslationId() + << ": Collecting options took " << GetSentenceStats().GetTimeCollectOpts() << " seconds at " << __FILE__ << ":" << __LINE__ << endl); } @@ -1112,11 +1118,13 @@ void Manager::OutputSearchGraphAsSLF(long translationId, std::ostream &outputSea } -void OutputSearchNode(long translationId, std::ostream &outputSearchGraphStream, - const SearchGraphNode& searchNode) +void +OutputSearchNode(AllOptions const& opts, long translationId, + std::ostream &outputSearchGraphStream, + SearchGraphNode const& searchNode) { const vector &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); - bool extendedFormat = StaticData::Instance().GetOutputSearchGraphExtended(); + bool extendedFormat = opts.output.SearchGraphExtended.size(); outputSearchGraphStream << translationId; // special case: initial hypothesis @@ -1369,24 +1377,32 @@ void Manager::SerializeSearchGraphPB( } #endif -void Manager::OutputSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const +void +Manager:: +OutputSearchGraph(long translationId, std::ostream &out) const { vector searchGraph; GetSearchGraph(searchGraph); for (size_t i = 0; i < searchGraph.size(); ++i) { - OutputSearchNode(translationId,outputSearchGraphStream,searchGraph[i]); + OutputSearchNode(options(),translationId,out,searchGraph[i]); } } -void Manager::GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected, - std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, set< const Hypothesis* > >* pOutgoingHyps, vector< float>* pFwdBwdScores) const +void +Manager:: +GetForwardBackwardSearchGraph +( std::map< int, bool >* pConnected, + std::vector* pConnectedList, + std::map >* pOutgoingHyps, + vector< float>* pFwdBwdScores) const { std::map < int, bool > &connected = *pConnected; std::vector< const Hypothesis *>& connectedList = *pConnectedList; std::map < int, int > forward; std::map < int, double > forwardScore; - std::map < const Hypothesis*, set > & outgoingHyps = *pOutgoingHyps; + std::map < const Hypothesis*, set > & outgoingHyps + = *pOutgoingHyps; vector< float> & estimatedScores = *pFwdBwdScores; // *** find connected hypotheses *** @@ -1395,7 +1411,8 @@ void Manager::GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected, // ** compute best forward path for each hypothesis *** // // forward cost of hypotheses on final stack is 0 - const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks(); + const std::vector < HypothesisStack* > &hypoStackColl + = m_search->GetHypothesisStacks(); const HypothesisStack &finalStack = *hypoStackColl.back(); HypothesisStack::const_iterator iterHypo; for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo) { @@ -1504,34 +1521,34 @@ void Manager::OutputBest(OutputCollector *collector) const if (!options().mbr.enabled) { bestHypo = GetBestHypothesis(); if (bestHypo) { - if (StaticData::Instance().GetOutputHypoScore()) { + if (options().output.ReportHypoScore) { out << bestHypo->GetTotalScore() << ' '; } - if (staticData.IsPathRecoveryEnabled()) { + if (options().output.RecoverPath) { bestHypo->OutputInput(out); out << "||| "; } - const PARAM_VEC *params = staticData.GetParameter().GetParam("print-id"); - if (params && params->size() && Scan(params->at(0)) ) { - out << translationId << " "; - } - - // VN : I put back the code for OutputPassthroughInformation - if (staticData.IsPassthroughEnabled()) { - OutputPassthroughInformation(out, bestHypo); + // const PARAM_VEC *params = staticData.GetParameter().GetParam("print-id"); + if (options().output.PrintID) { + out << translationId << " "; + } + + // VN : I put back the code for OutputPassthroughInformation + if (options().output.PrintPassThrough) { + OutputPassthroughInformation(out, bestHypo); } // end of add back - if (staticData.GetReportSegmentation() == 2) { + if (options().output.ReportSegmentation == 2) { GetOutputLanguageModelOrder(out, bestHypo); } bestHypo->OutputBestSurface( out, staticData.GetOutputFactorOrder(), - staticData.GetReportSegmentation(), - staticData.GetReportAllFactors()); - if (staticData.PrintAlignmentInfo()) { + options().output.ReportSegmentation, + options().output.ReportAllFactors); + if (options().output.PrintAlignmentInfo) { out << "||| "; bestHypo->OutputAlignment(out); } @@ -1572,8 +1589,9 @@ void Manager::OutputBest(OutputCollector *collector) const } else { //Lattice MBR decoding vector mbrBestHypo = doLatticeMBR(*this,nBestList); - OutputBestHypo(mbrBestHypo, translationId, staticData.GetReportSegmentation(), - staticData.GetReportAllFactors(),out); + OutputBestHypo(mbrBestHypo, translationId, + options().output.ReportSegmentation, + options().output.ReportAllFactors, out); IFVERBOSE(2) { PrintUserTime("finished Lattice MBR decoding"); } @@ -1584,8 +1602,8 @@ void Manager::OutputBest(OutputCollector *collector) const else if (options().search.consensus) { const TrellisPath &conBestHypo = doConsensusDecoding(*this,nBestList); OutputBestHypo(conBestHypo, translationId, - staticData.GetReportSegmentation(), - staticData.GetReportAllFactors(),out); + options().output.ReportSegmentation, + options().output.ReportAllFactors, out); OutputAlignment(m_alignmentOut, conBestHypo); IFVERBOSE(2) { PrintUserTime("finished Consensus decoding"); @@ -1596,8 +1614,8 @@ void Manager::OutputBest(OutputCollector *collector) const else { const TrellisPath &mbrBestHypo = doMBR(nBestList); OutputBestHypo(mbrBestHypo, translationId, - staticData.GetReportSegmentation(), - staticData.GetReportAllFactors(),out); + options().output.ReportSegmentation, + options().output.ReportAllFactors, out); OutputAlignment(m_alignmentOut, mbrBestHypo); IFVERBOSE(2) { PrintUserTime("finished MBR decoding"); @@ -1624,7 +1642,7 @@ void Manager::OutputNBest(OutputCollector *collector) const long translationId = m_source.GetTranslationId(); if (options().lmbr.enabled) { - if (staticData.options().nbest.enabled) { + if (options().nbest.enabled) { collector->Write(translationId, m_latticeNBestOut.str()); } } else { @@ -1632,22 +1650,24 @@ void Manager::OutputNBest(OutputCollector *collector) const ostringstream out; CalcNBest(options().nbest.nbest_size, nBestList, options().nbest.only_distinct); - OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), + OutputNBest(out, nBestList, + staticData.GetOutputFactorOrder(), m_source.GetTranslationId(), - staticData.GetReportSegmentation()); + options().output.ReportSegmentation); collector->Write(m_source.GetTranslationId(), out.str()); } } -void Manager::OutputNBest(std::ostream& out - , const Moses::TrellisPathList &nBestList - , const std::vector& outputFactorOrder - , long translationId - , char reportSegmentation) const +void +Manager:: +OutputNBest(std::ostream& out, + const Moses::TrellisPathList &nBestList, + const std::vector& outputFactorOrder, + long translationId, char reportSegmentation) const { const StaticData &staticData = StaticData::Instance(); - NBestOptions const& nbo = staticData.options().nbest; + NBestOptions const& nbo = options().nbest; bool reportAllFactors = nbo.include_all_factors; bool includeSegmentation = nbo.include_segmentation; bool includeWordAlignment = nbo.include_alignment_info; @@ -1661,12 +1681,14 @@ void Manager::OutputNBest(std::ostream& out out << translationId << " ||| "; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; - OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors); + OutputSurface(out, edge, outputFactorOrder, reportSegmentation, + reportAllFactors); } out << " |||"; // print scores with feature names - path.GetScoreBreakdown()->OutputAllFeatureScores(out); + bool with_labels = options().nbest.include_feature_labels; + path.GetScoreBreakdown()->OutputAllFeatureScores(out, with_labels); // total out << " ||| " << path.GetTotalScore(); @@ -1704,7 +1726,7 @@ void Manager::OutputNBest(std::ostream& out } } - if (StaticData::Instance().IsPathRecoveryEnabled()) { + if (options().output.RecoverPath) { out << " ||| "; OutputInput(out, edges[0]); } @@ -1719,8 +1741,11 @@ void Manager::OutputNBest(std::ostream& out /*** * print surface factor only for the given phrase */ -void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector &outputFactorOrder, - char reportSegmentation, bool reportAllFactors) const +void +Manager:: +OutputSurface(std::ostream &out, const Hypothesis &edge, + const std::vector &outputFactorOrder, + char reportSegmentation, bool reportAllFactors) const { UTIL_THROW_IF2(outputFactorOrder.size() == 0, "Must specific at least 1 output factor"); @@ -1788,26 +1813,33 @@ void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std out << ","; ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown()); scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown()); - scoreBreakdown.OutputAllFeatureScores(out); + bool with_labels = options().nbest.include_feature_labels; + scoreBreakdown.OutputAllFeatureScores(out, with_labels); } out << "| "; } } -void Manager::OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset) const +void +Manager:: +OutputAlignment(ostream &out, const AlignmentInfo &ai, + size_t sourceOffset, size_t targetOffset) const { typedef std::vector< const std::pair* > AlignVec; - AlignVec alignments = ai.GetSortedAlignments(); + AlignVec alignments = ai.GetSortedAlignments(options().output.WA_SortOrder); AlignVec::const_iterator it; for (it = alignments.begin(); it != alignments.end(); ++it) { const std::pair &alignment = **it; - out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " "; + out << alignment.first + sourceOffset << "-" + << alignment.second + targetOffset << " "; } - + } -void Manager::OutputInput(std::ostream& os, const Hypothesis* hypo) const +void +Manager:: +OutputInput(std::ostream& os, const Hypothesis* hypo) const { size_t len = hypo->GetInput().GetSize(); std::vector inp_phrases(len, 0); @@ -1851,8 +1883,10 @@ void Manager::OutputLatticeSamples(OutputCollector *collector) const TrellisPathList latticeSamples; ostringstream out; CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples); - OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(), - staticData.GetReportSegmentation()); + OutputNBest(out,latticeSamples, + staticData.GetOutputFactorOrder(), + m_source.GetTranslationId(), + options().output.ReportSegmentation); collector->Write(m_source.GetTranslationId(), out.str()); } @@ -1970,14 +2004,10 @@ void Manager::OutputSearchGraphSLF() const long translationId = m_source.GetTranslationId(); // Output search graph in HTK standard lattice format (SLF) - bool slf = staticData.GetOutputSearchGraphSLF(); - if (slf) { + std::string const& slf = options().output.SearchGraphSLF; + if (slf.size()) { util::StringStream fileName; - - string dir; - staticData.GetParameter().SetParameter(dir, "output-search-graph-slf", ""); - - fileName << dir << "/" << translationId << ".slf"; + fileName << slf << "/" << translationId << ".slf"; ofstream *file = new ofstream; file->open(fileName.str().c_str()); if (file->is_open() && file->good()) { @@ -2045,7 +2075,11 @@ void Manager::OutputBestHypo(const std::vector& mbrBestHypo, long /*trans out << endl; } -void Manager::OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, char reportSegmentation, bool reportAllFactors, std::ostream &out) const +void +Manager:: +OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, + char reportSegmentation, bool reportAllFactors, + std::ostream &out) const { const std::vector &edges = path.GetEdges(); @@ -2056,9 +2090,12 @@ void Manager::OutputBestHypo(const Moses::TrellisPath &path, long /*translationI out << endl; } -void Manager::OutputAlignment(std::ostringstream &out, const TrellisPath &path) const +void +Manager:: +OutputAlignment(std::ostringstream &out, const TrellisPath &path) const { - Hypothesis::OutputAlignment(out, path.GetEdges()); + WordAlignmentSort waso = options().output.WA_SortOrder; + Hypothesis::OutputAlignment(out, path.GetEdges(), waso); // Used by --alignment-output-file so requires endl out << std::endl; } diff --git a/moses/Manager.h b/moses/Manager.h index dbc1bb738..720dee38b 100644 --- a/moses/Manager.h +++ b/moses/Manager.h @@ -131,7 +131,7 @@ protected: // nbest mutable std::ostringstream m_latticeNBestOut; mutable std::ostringstream m_alignmentOut; - + public: void OutputNBest(std::ostream& out , const Moses::TrellisPathList &nBestList , const std::vector& outputFactorOrder diff --git a/moses/MockHypothesis.cpp b/moses/MockHypothesis.cpp index 11c5d0f84..12527aee9 100644 --- a/moses/MockHypothesis.cpp +++ b/moses/MockHypothesis.cpp @@ -39,16 +39,19 @@ MockHypothesisGuard { BOOST_CHECK_EQUAL(alignments.size(), targetSegments.size()); std::vector factors(1,0); - m_sentence.reset(new Sentence(0, sourceSentence, &factors)); + AllOptions const& opts = StaticData::Instance().options(); + m_sentence.reset(new Sentence(0, sourceSentence, opts, &factors)); m_ttask = TranslationTask::create(m_sentence); m_manager.reset(new Manager(m_ttask)); //Initial empty hypothesis - Bitmaps bitmaps(m_sentence.get()->GetSize(), m_sentence.get()->m_sourceCompleted); + Bitmaps bitmaps(m_sentence.get()->GetSize(), + m_sentence.get()->m_sourceCompleted); m_manager->ResetSentenceStats(*m_sentence); const Bitmap &initBitmap = bitmaps.GetInitialBitmap(); - m_hypothesis = new Hypothesis(*m_manager, *m_sentence, m_initialTransOpt, initBitmap); + m_hypothesis = new Hypothesis(*m_manager, *m_sentence, m_initialTransOpt, + initBitmap); //create the chain vector::const_iterator ai = alignments.begin(); @@ -56,7 +59,8 @@ MockHypothesisGuard for (; ti != targetSegments.end() && ai != alignments.end(); ++ti,++ai) { Hypothesis* prevHypo = m_hypothesis; Range range(ai->first,ai->second); - const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(), range); + const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(), + range); m_targetPhrases.push_back(TargetPhrase(NULL)); // m_targetPhrases.back().CreateFromString(Input, factors, *ti, "|", NULL); diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp index f6058cbd8..206f15e50 100644 --- a/moses/Parameter.cpp +++ b/moses/Parameter.cpp @@ -1620,6 +1620,13 @@ SetParameter(bool ¶meter, std::string const& parameterName, } } +void +Parameter:: +SetParameter(bool& var, std::string const& name) +{ + SetParameter(var,name,false); +} + } // namespace diff --git a/moses/Parameter.h b/moses/Parameter.h index f6e20efc2..5dbe5fd30 100644 --- a/moses/Parameter.h +++ b/moses/Parameter.h @@ -149,6 +149,20 @@ public: } } + void SetParameter(bool& var, std::string const& name); + + bool SetBooleanSwitch(bool& val, std::string const name) { + // issues a warning if format is wrong + const PARAM_VEC *params = GetParam(name); + val = (params && params->size()); + if (val && params->size() != 1) + { + TRACE_ERR("ERROR: wrong format for switch -" << name); + return false; + } + return true; + } + }; template<> diff --git a/moses/ScoreComponentCollection.cpp b/moses/ScoreComponentCollection.cpp index d9810224e..1a9af8e57 100644 --- a/moses/ScoreComponentCollection.cpp +++ b/moses/ScoreComponentCollection.cpp @@ -305,35 +305,38 @@ void ScoreComponentCollection::PlusEquals(const FeatureFunction* sp, const Score } } -void ScoreComponentCollection::OutputAllFeatureScores(std::ostream &out) const +void +ScoreComponentCollection:: +OutputAllFeatureScores(std::ostream &out, bool with_labels) const { std::string lastName = ""; const vector& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions(); for( size_t i=0; iIsTuneable()) { - OutputFeatureScores( out, ff, lastName ); + OutputFeatureScores(out, ff, lastName, with_labels); } } const vector& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions(); for( size_t i=0; iIsTuneable()) { - OutputFeatureScores( out, ff, lastName ); + OutputFeatureScores(out, ff, lastName, with_labels); } } } -void ScoreComponentCollection::OutputFeatureScores( std::ostream& out - , const FeatureFunction *ff - , std::string &lastName ) const +void +ScoreComponentCollection:: +OutputFeatureScores(std::ostream& out, FeatureFunction const* ff, + std::string &lastName, bool with_labels) const { - const StaticData &staticData = StaticData::Instance(); - bool labeledOutput = staticData.options().nbest.include_feature_labels; + // const StaticData &staticData = StaticData::Instance(); + // bool labeledOutput = staticData.options().nbest.include_feature_labels; // regular features (not sparse) if (ff->HasTuneableComponents()) { - if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) { + if( with_labels && lastName != ff->GetScoreProducerDescription() ) { lastName = ff->GetScoreProducerDescription(); out << " " << lastName << "="; } diff --git a/moses/ScoreComponentCollection.h b/moses/ScoreComponentCollection.h index 696658c80..04da0db35 100644 --- a/moses/ScoreComponentCollection.h +++ b/moses/ScoreComponentCollection.h @@ -433,10 +433,9 @@ public: m_scores.merge(other.m_scores); } - void OutputAllFeatureScores(std::ostream &out) const; - void OutputFeatureScores( std::ostream& out - , const Moses::FeatureFunction *ff - , std::string &lastName ) const; + void OutputAllFeatureScores(std::ostream &out, bool with_labels) const; + void OutputFeatureScores(std::ostream& out, Moses::FeatureFunction const* ff, + std::string &lastName, bool with_labels) const; #ifdef MPI_ENABLE public: diff --git a/moses/Sentence.cpp b/moses/Sentence.cpp index f7bc1aeda..4aaf3d069 100644 --- a/moses/Sentence.cpp +++ b/moses/Sentence.cpp @@ -166,7 +166,8 @@ aux_interpret_xml(std::string& line, std::vector & xmlWalls, void Sentence:: -init(string line, std::vector const& factorOrder) +init(string line, std::vector const& factorOrder, + AllOptions const& opts) { using namespace std; const StaticData &SD = StaticData::Instance(); @@ -182,7 +183,8 @@ init(string line, std::vector const& factorOrder) aux_interpret_dlt(line); // some poorly documented cache-based stuff // if sentences is specified as "" - if (SD.IsPassthroughEnabled() || SD.options().nbest.include_passthrough) { + if (SD.options().output.PrintPassThrough || + SD.options().nbest.include_passthrough) { string pthru = PassthroughSGML(line,"passthrough"); this->SetPassthroughInformation(pthru); } @@ -230,12 +232,14 @@ init(string line, std::vector const& factorOrder) int Sentence:: -Read(std::istream& in,const std::vector& factorOrder) +Read(std::istream& in, + const std::vector& factorOrder, + AllOptions const& opts) { std::string line; if (getline(in, line, '\n').eof()) return 0; - init(line, factorOrder); + init(line, factorOrder, opts); return 1; } @@ -366,12 +370,14 @@ CreateFromString(vector const& FOrder, string const& phraseString) } Sentence:: -Sentence(size_t const transId, string const& stext, +Sentence(size_t const transId, + string const& stext, + AllOptions const& opts, vector const* IFO) : InputType(transId) { - if (IFO) init(stext, *IFO); - else init(stext, StaticData::Instance().GetInputFactorOrder()); + if (IFO) init(stext, *IFO, opts); + else init(stext, StaticData::Instance().GetInputFactorOrder(), opts); } } diff --git a/moses/Sentence.h b/moses/Sentence.h index 22ae81ec7..575ae26c1 100644 --- a/moses/Sentence.h +++ b/moses/Sentence.h @@ -1,6 +1,4 @@ -// -*- c++ -*- -// $Id$ - +// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh @@ -28,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "Word.h" #include "Phrase.h" #include "InputType.h" +#include "parameters/AllOptions.h" namespace Moses { @@ -66,7 +65,8 @@ protected: public: Sentence(); Sentence(size_t const transId, std::string const& stext, - std::vector const* IFO = NULL); + AllOptions const& opts, + std::vector const* IFO = NULL); // Sentence(size_t const transId, std::string const& stext); ~Sentence(); @@ -97,7 +97,10 @@ public: void GetXmlTranslationOptions(std::vector &list, size_t startPos, size_t endPos) const; std::vector GetXmlChartTranslationOptions() const; - virtual int Read(std::istream& in,const std::vector& factorOrder); + virtual int + Read(std::istream& in, const std::vector& factorOrder, + AllOptions const& opts); + void Print(std::ostream& out) const; TranslationOptionCollection* @@ -114,7 +117,8 @@ public: void - init(std::string line, std::vector const& factorOrder); + init(std::string line, std::vector const& factorOrder, + AllOptions const& opts); std::vector > const& GetDltMeta() const { diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp index 341e4d0cc..cc4b1ad35 100644 --- a/moses/StaticData.cpp +++ b/moses/StaticData.cpp @@ -63,7 +63,7 @@ StaticData StaticData::s_instance; StaticData::StaticData() : m_sourceStartPosMattersForRecombination(false) , m_requireSortingAfterSourceContext(false) - , m_inputType(SentenceInput) + // , m_inputType(SentenceInput) , m_lmEnableOOVFeature(false) , m_isAlwaysCreateDirectTranslationOption(false) , m_currentWeightSetting("default") @@ -132,23 +132,11 @@ StaticData const PARAM_VEC *params; // input type has to be specified BEFORE loading the phrase tables! - m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput); + // m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput); m_parameter->SetParameter(m_continuePartialTranslation, "continue-partial-translation", false ); - std::string s_it = "text input"; - if (m_inputType == 1) { - s_it = "confusion net"; - } - if (m_inputType == 2) { - s_it = "word lattice"; - } - if (m_inputType == 3) { - s_it = "tree"; - } - VERBOSE(2,"input type is: "<SetParameter(m_xmlInputType, "xml-input", XmlPassThrough); @@ -181,119 +169,30 @@ StaticData m_parameter->SetParameter(m_verboseLevel, "verbose", (size_t) 1); - m_parameter->SetParameter(m_recoverPath, "recover-input-path", false); - if (m_recoverPath && m_inputType == SentenceInput) { - TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n"); - m_recoverPath = false; - } + m_parameter->SetParameter(m_includeLHSInSearchGraph, + "include-lhs-in-search-graph", false ); - m_parameter->SetParameter(m_outputHypoScore, "output-hypo-score", false ); - m_parameter->SetParameter(m_PrintAlignmentInfo, "print-alignment-info", false ); - m_parameter->SetParameter(m_wordAlignmentSort, "sort-word-alignment", NoSort); - params = m_parameter->GetParam("alignment-output-file"); - if (params && params->size()) { - m_alignmentOutputFile = Scan(params->at(0)); - } - - m_parameter->SetParameter( m_PrintID, "print-id", false ); - m_parameter->SetParameter( m_PrintPassthroughInformation, "print-passthrough", false ); - - params = m_parameter->GetParam("output-word-graph"); - m_outputWordGraph = (params && params->size() == 2); - - params = m_parameter->GetParam("output-search-graph"); - if (params && params->size()) { - if (params->size() != 1) { - std::cerr << "ERROR: wrong format for switch -output-search-graph file"; - return false; - } - m_outputSearchGraph = true; - } - // ... in extended format - else if (m_parameter->GetParam("output-search-graph-extended") && - m_parameter->GetParam("output-search-graph-extended")->size()) { - if (m_parameter->GetParam("output-search-graph-extended")->size() != 1) { - std::cerr << "ERROR: wrong format for switch -output-search-graph-extended file"; - return false; - } - m_outputSearchGraph = true; - m_outputSearchGraphExtended = true; - } else { - m_outputSearchGraph = false; - } - - params = m_parameter->GetParam("output-search-graph-slf"); - if (params && params->size()) { - m_outputSearchGraphSLF = true; - } else { - m_outputSearchGraphSLF = false; - } - - params = m_parameter->GetParam("output-search-graph-hypergraph"); - if (params && params->size()) { - m_outputSearchGraphHypergraph = true; - } else { - m_outputSearchGraphHypergraph = false; - } - -#ifdef HAVE_PROTOBUF - params = m_parameter->GetParam("output-search-graph-pb"); - if (params && params->size()) { - if (params->size() != 1) { - cerr << "ERROR: wrong format for switch -output-search-graph-pb path"; - return false; - } - m_outputSearchGraphPB = true; - } else - m_outputSearchGraphPB = false; -#endif - - m_parameter->SetParameter( m_unprunedSearchGraph, "unpruned-search-graph", false ); - m_parameter->SetParameter( m_includeLHSInSearchGraph, "include-lhs-in-search-graph", false ); - - m_parameter->SetParameter(m_outputUnknownsFile, "output-unknowns", ""); - - // printing source phrase spans - m_parameter->SetParameter( m_reportSegmentation, "report-segmentation", false ); - m_parameter->SetParameter( m_reportSegmentationEnriched, "report-segmentation-enriched", false ); - - // print all factors of output translations - m_parameter->SetParameter( m_reportAllFactors, "report-all-factors", false ); + m_parameter->SetParameter(m_outputUnknownsFile, + "output-unknowns", ""); //Print Translation Options - m_parameter->SetParameter(m_printTranslationOptions, "print-translation-option", false ); - + m_parameter->SetParameter(m_printTranslationOptions, + "print-translation-option", false ); + //Print All Derivations - m_parameter->SetParameter(m_printAllDerivations , "print-all-derivations", false ); - - // additional output - m_parameter->SetParameter(m_detailedTranslationReportingFilePath, - "translation-details", ""); - m_parameter->SetParameter(m_detailedTreeFragmentsTranslationReportingFilePath, - "tree-translation-details", ""); - m_parameter->SetParameter(m_detailedAllTranslationReportingFilePath, - "translation-all-details", ""); - m_parameter->SetParameter(m_startTranslationId, "start-translation-id", 0); + m_parameter->SetParameter(m_printAllDerivations , + "print-all-derivations", false ); + + m_parameter->SetParameter(m_startTranslationId, + "start-translation-id", 0); //lattice samples - params = m_parameter->GetParam("lattice-samples"); - if (params) { - if (params->size() ==2 ) { - m_latticeSamplesFilePath = params->at(0); - m_latticeSamplesSize = Scan(params->at(1)); - } else { - std::cerr <<"wrong format for switch -lattice-samples file size"; - return false; - } - } else { - m_latticeSamplesSize = 0; - } return true; } void -StaticData -::ini_compact_table_options() +StaticData:: +ini_compact_table_options() { // Compact phrase table and reordering model m_parameter->SetParameter(m_minphrMemory, "minphr-memory", false ); @@ -301,8 +200,8 @@ StaticData } void -StaticData -::ini_lm_options() +StaticData:: +ini_lm_options() { m_parameter->SetParameter(m_lmcache_cleanup_threshold, "clean-lm-cache", 1); } @@ -349,8 +248,8 @@ StaticData } void -StaticData -::ini_factor_maps() +StaticData:: +ini_factor_maps() { const PARAM_VEC *params; // factor delimiter @@ -380,8 +279,8 @@ StaticData } void -StaticData -::ini_oov_options() +StaticData:: +ini_oov_options() { // unknown word processing m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false ); @@ -398,8 +297,8 @@ StaticData } void -StaticData -::ini_zombie_options() +StaticData:: +ini_zombie_options() { //Disable discarding m_parameter->SetParameter(m_disableDiscarding, "disable-discarding", false); @@ -434,20 +333,6 @@ bool StaticData::LoadData(Parameter *parameter) // search ini_oov_options(); - // set m_nbest_options.enabled = true if necessary: - if (m_options.mbr.enabled - || m_options.mira - || m_options.search.consensus - || m_outputSearchGraph - || m_outputSearchGraphSLF - || m_outputSearchGraphHypergraph -#ifdef HAVE_PROTOBUF - || m_outputSearchGraphPB -#endif - || m_latticeSamplesFilePath.size()) { - m_options.nbest.enabled = true; - } - // S2T decoder m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm", RecursiveCYKPlus); @@ -455,8 +340,9 @@ bool StaticData::LoadData(Parameter *parameter) ini_zombie_options(); // probably dead, or maybe not - m_parameter->SetParameter(m_placeHolderFactor, "placeholder-factor", NOT_FOUND); - + m_parameter->SetParameter(m_placeHolderFactor, "placeholder-factor", + NOT_FOUND); + // FEATURE FUNCTION INITIALIZATION HAPPENS HERE =============================== initialize_features(); @@ -507,7 +393,8 @@ void StaticData::SetWeight(const FeatureFunction* sp, float weight) m_allWeights.Assign(sp,weight); } -void StaticData::SetWeights(const FeatureFunction* sp, const std::vector& weights) +void StaticData::SetWeights(const FeatureFunction* sp, + const std::vector& weights) { m_allWeights.Resize(); m_allWeights.Assign(sp,weights); @@ -557,8 +444,10 @@ void StaticData::LoadChartDecodingParameters() LoadNonTerminals(); // source label overlap - m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap", SourceLabelOverlapAdd); - m_parameter->SetParameter(m_ruleLimit, "rule-limit", DEFAULT_MAX_TRANS_OPT_SIZE); + m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap", + SourceLabelOverlapAdd); + m_parameter->SetParameter(m_ruleLimit, "rule-limit", + DEFAULT_MAX_TRANS_OPT_SIZE); } @@ -596,12 +485,16 @@ void StaticData::LoadDecodeGraphs() } } -void StaticData::LoadDecodeGraphsOld(const vector &mappingVector, const vector &maxChartSpans) +void +StaticData:: +LoadDecodeGraphsOld(const vector &mappingVector, + const vector &maxChartSpans) { const vector& pts = PhraseDictionary::GetColl(); const vector& gens = GenerationDictionary::GetColl(); - const std::vector *featuresRemaining = &FeatureFunction::GetFeatureFunctions(); + const std::vector *featuresRemaining + = &FeatureFunction::GetFeatureFunctions(); DecodeStep *prev = 0; size_t prevDecodeGraphInd = 0; @@ -620,7 +513,8 @@ void StaticData::LoadDecodeGraphsOld(const vector &mappingVector, const // For specifying multiple translation model decodeGraphInd = Scan(token[0]); //the vectorList index can only increment by one - UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd && decodeGraphInd != prevDecodeGraphInd + 1, + UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd + && decodeGraphInd != prevDecodeGraphInd + 1, "Malformed mapping"); if (decodeGraphInd > prevDecodeGraphInd) { prev = NULL; @@ -707,7 +601,8 @@ void StaticData::LoadDecodeGraphsNew(const std::vector &mappingVect decodeGraphInd = Scan(token[0]); //the vectorList index can only increment by one - UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd && decodeGraphInd != prevDecodeGraphInd + 1, + UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd + && decodeGraphInd != prevDecodeGraphInd + 1, "Malformed mapping"); if (decodeGraphInd > prevDecodeGraphInd) { prev = NULL; @@ -783,17 +678,6 @@ void StaticData::ReLoadBleuScoreFeatureParameter(float weight) void StaticData::SetExecPath(const std::string &path) { - /* - namespace fs = boost::filesystem; - - fs::path full_path( fs::initial_path() ); - - full_path = fs::system_complete( fs::path( path ) ); - - //Without file name - m_binPath = full_path.parent_path().string(); - */ - // NOT TESTED size_t pos = path.rfind("/"); if (pos != string::npos) { @@ -810,34 +694,33 @@ const string &StaticData::GetBinDirectory() const float StaticData::GetWeightWordPenalty() const { float weightWP = GetWeight(&WordPenaltyProducer::Instance()); - //VERBOSE(1, "Read weightWP from translation sytem: " << weightWP << std::endl); return weightWP; } void -StaticData -::InitializeForInput(ttasksptr const& ttask) const +StaticData:: +InitializeForInput(ttasksptr const& ttask) const { const std::vector &producers - = FeatureFunction::GetFeatureFunctions(); + = FeatureFunction::GetFeatureFunctions(); for(size_t i=0; i" - // bool m_mbr; //! use MBR decoder - // bool m_useLatticeMBR; //! use MBR decoder - // bool m_mira; // do mira training - // bool m_useConsensusDecoding; //! Use Consensus decoding (DeNero et al 2009) - // size_t m_mbrSize; //! number of translation candidates considered - // float m_mbrScale; //! scaling factor for computing marginal probability of candidate translation - // size_t m_lmbrPruning; //! average number of nodes per word wanted in pruned lattice - // std::vector m_lmbrThetas; //! theta(s) for lattice mbr calculation - // bool m_useLatticeHypSetForLatticeMBR; //! to use nbest as hypothesis set during lattice MBR - // float m_lmbrPrecision; //! unigram precision theta - see Tromble et al 08 for more details - // float m_lmbrPRatio; //! decaying factor for ngram thetas - see Tromble et al 08 for more details - // float m_lmbrMapWeight; //! Weight given to the map solution. See Kumar et al 09 for details - size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1) bool m_lmEnableOOVFeature; @@ -167,15 +141,15 @@ protected: bool m_isAlwaysCreateDirectTranslationOption; //! constructor. only the 1 static variable can be created - bool m_outputWordGraph; //! whether to output word graph - bool m_outputSearchGraph; //! whether to output search graph - bool m_outputSearchGraphExtended; //! ... in extended format - bool m_outputSearchGraphSLF; //! whether to output search graph in HTK standard lattice format (SLF) - bool m_outputSearchGraphHypergraph; //! whether to output search graph in hypergraph + // bool m_outputWordGraph; //! whether to output word graph + // bool m_outputSearchGraph; //! whether to output search graph + // bool m_outputSearchGraphExtended; //! ... in extended format + // bool m_outputSearchGraphSLF; //! whether to output search graph in HTK standard lattice format (SLF) + // bool m_outputSearchGraphHypergraph; //! whether to output search graph in hypergraph #ifdef HAVE_PROTOBUF - bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf + // bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf #endif - bool m_unprunedSearchGraph; //! do not exclude dead ends (chart decoder only) + // bool m_unprunedSearchGraph; //! do not exclude dead ends (chart decoder only) bool m_includeLHSInSearchGraph; //! include LHS of rules in search graph std::string m_outputUnknownsFile; //! output unknowns in this file @@ -190,7 +164,7 @@ protected: Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal; SourceLabelOverlap m_sourceLabelOverlap; UnknownLHSList m_unknownLHS; - WordAlignmentSort m_wordAlignmentSort; + // WordAlignmentSort m_wordAlignmentSort; int m_threadCount; long m_startTranslationId; @@ -229,10 +203,6 @@ protected: const StatefulFeatureFunction* m_treeStructure; - // number of nonterminal labels -// size_t m_nonTerminalSize; - - void ini_compact_table_options(); void ini_consensus_decoding_options(); void ini_cube_pruning_options(); @@ -278,7 +248,8 @@ public: } #endif - //! Load data into static instance. This function is required as LoadData() is not const + //! Load data into static instance. This function is required as + // LoadData() is not const static bool LoadDataStatic(Parameter *parameter, const std::string &execPath); //! Main function to load everything. Also initialize the Parameter object @@ -336,22 +307,6 @@ public: bool IsWordDeletionEnabled() const { return m_wordDeletionEnabled; } - // size_t GetMaxHypoStackSize() const { - // return m_options.search.stack_size; - // } - // size_t GetMinHypoStackDiversity() const { - // return m_options.search.stack_diversity; - // } - - size_t IsPathRecoveryEnabled() const { - return m_recoverPath; - } - bool IsIDEnabled() const { - return m_PrintID; - } - bool IsPassthroughEnabled() const { - return m_PrintPassthroughInformation; - } int GetMaxDistortion() const { return m_options.reordering.max_distortion; @@ -384,47 +339,6 @@ public: void SetVerboseLevel(int x) const { m_verboseLevel = x; } - char GetReportSegmentation() const { - if (m_reportSegmentation) return 1; - if (m_reportSegmentationEnriched) return 2; - return 0; - } - void SetReportSegmentation(const int &val) { - if (val == 0) - m_reportSegmentation = m_reportSegmentationEnriched = false; - else if (val == 1) - m_reportSegmentation = true; - else if (val == 2) - m_reportSegmentationEnriched = true; - else - std::cerr << "Warning: Invalid value for reportSegmentation (0 - 2)! Ignoring"; - } - - bool GetReportAllFactors() const { - return m_reportAllFactors; - } - - bool IsDetailedTranslationReportingEnabled() const { - return !m_detailedTranslationReportingFilePath.empty(); - } - - bool IsDetailedAllTranslationReportingEnabled() const { - return !m_detailedAllTranslationReportingFilePath.empty(); - } - - const std::string &GetDetailedTranslationReportingFilePath() const { - return m_detailedTranslationReportingFilePath; - } - bool IsDetailedTreeFragmentsTranslationReportingEnabled() const { - return !m_detailedTreeFragmentsTranslationReportingFilePath.empty(); - } - const std::string &GetDetailedTreeFragmentsTranslationReportingFilePath() const { - return m_detailedTreeFragmentsTranslationReportingFilePath; - } - - // bool IsLabeledNBestList() const { - // return m_options.nbest.include_feature_labels; - // } bool UseMinphrInMemory() const { return m_minphrMemory; @@ -434,19 +348,6 @@ public: return m_minlexrMemory; } - // for mert - // size_t GetNBestSize() const { - // return m_options.nbest.nbest_size; - // } - - // const std::string &GetNBestFilePath() const { - // return m_options.nbest.output_file_path; - // } - - // bool IsNBestEnabled() const { - // return m_options.nbest.enabled; - // } - size_t GetLatticeSamplesSize() const { return m_latticeSamplesSize; } @@ -455,22 +356,6 @@ public: return m_latticeSamplesFilePath; } - // size_t GetNBestFactor() const { - // return m_options.nbest.factor; - // } - bool GetOutputWordGraph() const { - return m_outputWordGraph; - } - - //! Sets the global score vector weights for a given FeatureFunction. - InputTypeEnum GetInputType() const { - return m_inputType; - } - - // SearchAlgorithm GetSearchAlgorithm() const { - // return m_searchAlgorithm; - // } - bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const { if (algo == DefaultSearchAlgorithm) algo = m_options.search.algo; @@ -577,33 +462,36 @@ public: return m_lmEnableOOVFeature; } - bool GetOutputSearchGraph() const { - return m_outputSearchGraph; - } - void SetOutputSearchGraph(bool outputSearchGraph) { - m_outputSearchGraph = outputSearchGraph; - } - bool GetOutputSearchGraphExtended() const { - return m_outputSearchGraphExtended; - } - bool GetOutputSearchGraphSLF() const { - return m_outputSearchGraphSLF; - } - bool GetOutputSearchGraphHypergraph() const { - return m_outputSearchGraphHypergraph; - } -#ifdef HAVE_PROTOBUF - bool GetOutputSearchGraphPB() const { - return m_outputSearchGraphPB; - } -#endif + // bool GetOutputSearchGraph() const { + // return m_outputSearchGraph; + // } + + // void SetOutputSearchGraph(bool outputSearchGraph) { + // m_outputSearchGraph = outputSearchGraph; + // } + + // bool GetOutputSearchGraphExtended() const { + // return m_outputSearchGraphExtended; + // } + // GetOutputSearchGraphSLF() const { + // return m_outputSearchGraphSLF; + // } + // bool GetOutputSearchGraphHypergraph() const { + // return m_outputSearchGraphHypergraph; + // } + +// #ifdef HAVE_PROTOBUF +// bool GetOutputSearchGraphPB() const { +// return m_outputSearchGraphPB; +// } +// #endif const std::string& GetOutputUnknownsFile() const { return m_outputUnknownsFile; } - bool GetUnprunedSearchGraph() const { - return m_unprunedSearchGraph; - } + // bool GetUnprunedSearchGraph() const { + // return m_unprunedSearchGraph; + // } bool GetIncludeLHSInSearchGraph() const { return m_includeLHSInSearchGraph; @@ -640,9 +528,9 @@ public: return m_sourceLabelOverlap; } - bool GetOutputHypoScore() const { - return m_outputHypoScore; - } + // bool GetOutputHypoScore() const { + // return m_outputHypoScore; + // } size_t GetRuleLimit() const { return m_ruleLimit; } @@ -675,16 +563,16 @@ public: return m_bookkeeping_options.need_alignment_info; // return m_needAlignmentInfo; } - const std::string &GetAlignmentOutputFile() const { - return m_alignmentOutputFile; - } - bool PrintAlignmentInfo() const { - return m_PrintAlignmentInfo; - } + // const std::string &GetAlignmentOutputFile() const { + // return m_alignmentOutputFile; + // } + // bool PrintAlignmentInfo() const { + // return m_PrintAlignmentInfo; + // } - WordAlignmentSort GetWordAlignmentSort() const { - return m_wordAlignmentSort; - } + // WordAlignmentSort GetWordAlignmentSort() const { + // return m_wordAlignmentSort; + // } bool GetHasAlternateWeightSettings() const { return m_weightSetting.size() > 0; diff --git a/moses/Syntax/Manager.cpp b/moses/Syntax/Manager.cpp index 10b0d25c9..9a22b593a 100644 --- a/moses/Syntax/Manager.cpp +++ b/moses/Syntax/Manager.cpp @@ -26,12 +26,12 @@ void Manager::OutputBest(OutputCollector *collector) const const SHyperedge *best = GetBestSHyperedge(); if (best == NULL) { VERBOSE(1, "NO BEST TRANSLATION" << std::endl); - if (StaticData::Instance().GetOutputHypoScore()) { + if (options().output.ReportHypoScore) { out << "0 "; } out << '\n'; } else { - if (StaticData::Instance().GetOutputHypoScore()) { + if (options().output.ReportHypoScore) { out << best->label.score << " "; } Phrase yield = GetOneBestTargetYield(*best); @@ -49,12 +49,10 @@ void Manager::OutputBest(OutputCollector *collector) const void Manager::OutputNBest(OutputCollector *collector) const { if (collector) { - const StaticData &staticData = StaticData::Instance(); long translationId = m_source.GetTranslationId(); - KBestExtractor::KBestVec nBestList; - ExtractKBest(staticData.options().nbest.nbest_size, nBestList, - staticData.options().nbest.only_distinct); + ExtractKBest(options().nbest.nbest_size, nBestList, + options().nbest.only_distinct); OutputNBestList(collector, nBestList, translationId); } } @@ -111,7 +109,8 @@ void Manager::OutputNBestList(OutputCollector *collector, out << translationId << " ||| "; OutputSurface(out, outputPhrase, outputFactorOrder, false); out << " ||| "; - derivation.scoreBreakdown.OutputAllFeatureScores(out); + bool with_labels = options().nbest.include_feature_labels; + derivation.scoreBreakdown.OutputAllFeatureScores(out, with_labels); out << " ||| " << derivation.score; // optionally, print word alignments diff --git a/moses/Syntax/S2T/OovHandler-inl.h b/moses/Syntax/S2T/OovHandler-inl.h index 9f26563d5..3655a0155 100644 --- a/moses/Syntax/S2T/OovHandler-inl.h +++ b/moses/Syntax/S2T/OovHandler-inl.h @@ -66,7 +66,7 @@ template TargetPhrase *OovHandler::SynthesizeTargetPhrase( const Word &oov, const Phrase &srcPhrase, const Word &targetLhs, float prob) { - const StaticData &staticData = StaticData::Instance(); + const StaticData &SD = StaticData::Instance(); const UnknownWordPenaltyProducer &unknownWordPenaltyProducer = UnknownWordPenaltyProducer::Instance(); @@ -82,8 +82,8 @@ TargetPhrase *OovHandler::SynthesizeTargetPhrase( targetPhrase->EvaluateInIsolation(srcPhrase); targetPhrase->SetTargetLHS(&targetLhs); targetPhrase->SetAlignmentInfo("0-0"); - if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || - staticData.GetTreeStructure() != NULL) { + if (!SD.options().output.detailed_tree_transrep_filepath.empty() || + SD.GetTreeStructure() != NULL) { std::string value = "[ " + targetLhs[0]->GetString().as_string() + " " + oov[0]->GetString().as_string() + " ]"; targetPhrase->SetProperty("Tree", value); diff --git a/moses/TabbedSentence.cpp b/moses/TabbedSentence.cpp index ae0876595..74e3de8f2 100644 --- a/moses/TabbedSentence.cpp +++ b/moses/TabbedSentence.cpp @@ -45,7 +45,11 @@ void TabbedSentence::CreateFromString(const std::vector &factorOrder } } -int TabbedSentence::Read(std::istream& in, const std::vector& factorOrder) +int +TabbedSentence:: +Read(std::istream& in, + std::vector const& factorOrder, + AllOptions const& opts) { TabbedColumns allColumns; @@ -58,14 +62,14 @@ int TabbedSentence::Read(std::istream& in, const std::vector& factor if(allColumns.size() < 2) { std::stringstream dummyStream; dummyStream << line << std::endl; - return Sentence::Read(dummyStream, factorOrder); + return Sentence::Read(dummyStream, factorOrder, opts); } else { m_columns.resize(allColumns.size() - 1); std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin()); std::stringstream dummyStream; dummyStream << allColumns[0] << std::endl; - return Sentence::Read(dummyStream, factorOrder); + return Sentence::Read(dummyStream, factorOrder, opts); } } diff --git a/moses/TabbedSentence.h b/moses/TabbedSentence.h index e481e6dac..de08afa0f 100644 --- a/moses/TabbedSentence.h +++ b/moses/TabbedSentence.h @@ -67,7 +67,9 @@ public: virtual void CreateFromString(const std::vector &factorOrder , const std::string &tabbedString); - virtual int Read(std::istream& in,const std::vector& factorOrder); + virtual int + Read(std::istream& in,const std::vector& factorOrder, + AllOptions const& opts); const TabbedColumns& GetColumns() const { return m_columns; diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp index e86767b46..1110bc16a 100644 --- a/moses/TranslationModel/UG/mmsapt.cpp +++ b/moses/TranslationModel/UG/mmsapt.cpp @@ -867,10 +867,10 @@ namespace Moses boost::unique_lock ctxlock(context->lock); if (localcache) std::cerr << "have local cache " << std::endl; - std::cerr << "BOO at " << HERE << std::endl; + // std::cerr << "BOO at " << HERE << std::endl; if (!localcache) { - std::cerr << "no local cache at " << HERE << std::endl; + // std::cerr << "no local cache at " << HERE << std::endl; setup_bias(ttask); if (context->bias) { diff --git a/moses/TranslationModel/UG/ptable-lookup.cc b/moses/TranslationModel/UG/ptable-lookup.cc index d37097c97..d350d7a3e 100644 --- a/moses/TranslationModel/UG/ptable-lookup.cc +++ b/moses/TranslationModel/UG/ptable-lookup.cc @@ -69,7 +69,7 @@ int main(int argc, char* argv[]) while (true) { boost::shared_ptr phrase(new Sentence); - if (!phrase->Read(cin,ifo)) break; + if (!phrase->Read(cin,ifo, StaticData::Instance().options())) break; boost::shared_ptr ttask; ttask = TranslationTask::create(phrase); if (pdta) diff --git a/moses/TranslationOptionCollectionConfusionNet.cpp b/moses/TranslationOptionCollectionConfusionNet.cpp index 761bc4137..397ca01a5 100644 --- a/moses/TranslationOptionCollectionConfusionNet.cpp +++ b/moses/TranslationOptionCollectionConfusionNet.cpp @@ -1,4 +1,4 @@ -// $Id$ +// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- #include #include @@ -12,7 +12,7 @@ #include "TranslationModel/PhraseDictionaryTreeAdaptor.h" #include "util/exception.hh" #include - +#include "TranslationTask.h" using namespace std; namespace Moses @@ -41,7 +41,7 @@ TranslationOptionCollectionConfusionNet(ttasksptr const& ttask, size_t inputSize = input.GetSize(); m_inputPathMatrix.resize(inputSize); - size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); + size_t maxSizePhrase = ttask->options().search.max_phrase_length; maxSizePhrase = std::min(inputSize, maxSizePhrase); // 1-word phrases @@ -234,8 +234,10 @@ CreateTranslationOptionsForRangeLEGACY(const DecodeGraph &decodeGraph, size_t st list ::const_iterator iterStep = decodeGraph.begin(); const DecodeStep &decodeStep = **iterStep; - static_cast(decodeStep).ProcessInitialTranslationLEGACY - (m_source, *oldPtoc, startPos, endPos, adhereTableLimit, inputPathList); + DecodeStepTranslation const& dstep + = static_cast(decodeStep); + dstep.ProcessInitialTransLEGACY(m_source, *oldPtoc, startPos, endPos, + adhereTableLimit, inputPathList); // do rest of decode steps int indexStep = 0; diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp index 122d8313b..5eab55c82 100644 --- a/moses/TranslationTask.cpp +++ b/moses/TranslationTask.cpp @@ -186,7 +186,8 @@ void TranslationTask::Run() // report thread number #if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS) - VERBOSE(2, "Translating line " << translationId << " in thread id " << pthread_self() << endl); + VERBOSE(2, "Translating line " << translationId << " in thread id " + << pthread_self() << endl); #endif @@ -214,8 +215,8 @@ void TranslationTask::Run() OutputCollector* ocoll; Timer additionalReportingTime; additionalReportingTime.start(); - boost::shared_ptr const& io = m_ioWrapper; + manager->OutputBest(io->GetSingleBestOutputCollector()); // output word graph @@ -229,7 +230,7 @@ void TranslationTask::Run() // Output search graph in hypergraph format for Kenneth Heafield's // lazy hypergraph decoder; writes to stderr - if (StaticData::Instance().GetOutputSearchGraphHypergraph()) { + if (options().output.SearchGraphHG.size()) { size_t transId = manager->GetSource().GetTranslationId(); string fname = io->GetHypergraphOutputFileName(transId); manager->OutputSearchGraphAsHypergraph(fname, PRECISION); diff --git a/moses/TreeInput.cpp b/moses/TreeInput.cpp index 89ce0ba65..9d384deb3 100644 --- a/moses/TreeInput.cpp +++ b/moses/TreeInput.cpp @@ -237,7 +237,10 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector } //! populate this InputType with data from in stream -int TreeInput::Read(std::istream& in,const std::vector& factorOrder) +int +TreeInput:: +Read(std::istream& in, const std::vector& factorOrder, + AllOptions const& opts) { const StaticData &staticData = StaticData::Instance(); @@ -254,8 +257,8 @@ int TreeInput::Read(std::istream& in,const std::vector& factorOrder) stringstream strme; strme << line << endl; - Sentence::Read(strme, factorOrder); - + Sentence::Read(strme, factorOrder, opts); + // size input chart size_t sourceSize = GetSize(); m_sourceChart.resize(sourceSize); diff --git a/moses/TreeInput.h b/moses/TreeInput.h index 2716831cc..2116334c1 100644 --- a/moses/TreeInput.h +++ b/moses/TreeInput.h @@ -53,7 +53,10 @@ public: } //! populate this InputType with data from in stream - virtual int Read(std::istream& in,const std::vector& factorOrder); + virtual int + Read(std::istream& in, + const std::vector& factorOrder, + AllOptions const& opts); //! Output debugging info to stream out virtual void Print(std::ostream&) const; diff --git a/moses/WordLattice.cpp b/moses/WordLattice.cpp index 51ef4be9e..e597affc5 100644 --- a/moses/WordLattice.cpp +++ b/moses/WordLattice.cpp @@ -147,7 +147,11 @@ InitializeFromPCNDataType return !cn.empty(); } -int WordLattice::Read(std::istream& in,const std::vector& factorOrder) +int +WordLattice:: +Read(std::istream& in, + std::vector const& factorOrder, + AllOptions const& opts) { Clear(); std::string line; diff --git a/moses/WordLattice.h b/moses/WordLattice.h index 70b1602f2..992fb9498 100644 --- a/moses/WordLattice.h +++ b/moses/WordLattice.h @@ -43,8 +43,10 @@ public: int InitializeFromPCNDataType(const PCN::CN& cn, const std::vector& factorOrder, const std::string& debug_line = ""); /** Read from PLF format (1 lattice per line) */ - int Read(std::istream& in,const std::vector& factorOrder); - + int Read(std::istream& in, + std::vector const& factorOrder, + AllOptions const& opts); + /** Convert internal representation into an edge matrix * @note edges[1][2] means there is an edge from 1 to 2 */ diff --git a/moses/parameters/AllOptions.cpp b/moses/parameters/AllOptions.cpp index 32a2bcd36..3f481e77d 100644 --- a/moses/parameters/AllOptions.cpp +++ b/moses/parameters/AllOptions.cpp @@ -21,6 +21,7 @@ namespace Moses if (!input.init(param)) return false; if (!mbr.init(param)) return false; if (!lmbr.init(param)) return false; + if (!output.init(param)) return false; param.SetParameter(mira, "mira", false); @@ -45,12 +46,31 @@ namespace Moses { if (mbr.enabled) { - cerr << "Error: Cannot use consensus decoding together with mbr" << endl; + cerr << "Error: Cannot use consensus decoding together with mbr" + << endl; return false; } mbr.enabled = true; } + // RecoverPath should only be used with confusion net or word lattice input + if (output.RecoverPath && input.input_type == SentenceInput) + { + TRACE_ERR("--recover-input-path should only be used with " + <<"confusion net or word lattice input!\n"); + output.RecoverPath = false; + } + + // set m_nbest_options.enabled = true if necessary: + nbest.enabled = (nbest.enabled || mira || search.consensus + || nbest.nbest_size > 0 + || !output.SearchGraph.empty() + || !output.SearchGraphExtended.empty() + || !output.SearchGraphSLF.empty() + || !output.SearchGraphHG.empty() + || !output.SearchGraphPB.empty() + || output.lattice_sample_size != 0); + return true; } @@ -67,9 +87,24 @@ namespace Moses if (!input.update(param)) return false; if (!mbr.update(param)) return false; if (!lmbr.update(param)) return false; - return true; + if (!output.update(param)) return false; + return sanity_check(); } + #endif + bool + AllOptions:: + NBestDistinct() const + { + return (nbest.only_distinct + || mbr.enabled || lmbr.enabled + || output.lattice_sample_size + || !output.SearchGraph.empty() + || !output.SearchGraphExtended.empty() + || !output.SearchGraphSLF.empty() + || !output.SearchGraphHG.empty()); + } + } diff --git a/moses/parameters/AllOptions.h b/moses/parameters/AllOptions.h index 5f9949a76..c5f6e44e2 100644 --- a/moses/parameters/AllOptions.h +++ b/moses/parameters/AllOptions.h @@ -11,6 +11,7 @@ #include "InputOptions.h" #include "MBR_Options.h" #include "LMBR_Options.h" +#include "ReportingOptions.h" namespace Moses { struct @@ -24,7 +25,7 @@ namespace Moses InputOptions input; MBR_Options mbr; LMBR_Options lmbr; - + ReportingOptions output; bool mira; // StackOptions stack; @@ -38,6 +39,8 @@ namespace Moses bool update(std::mapconst& param); #endif + bool NBestDistinct() const; + }; } diff --git a/moses/parameters/NBestOptions.cpp b/moses/parameters/NBestOptions.cpp index d61a67c2f..e916c3437 100644 --- a/moses/parameters/NBestOptions.cpp +++ b/moses/parameters/NBestOptions.cpp @@ -1,4 +1,4 @@ -// -*- mode: c++; cc-style: gnu -*- +// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- #include "moses/Parameter.h" #include "NBestOptions.h" @@ -33,4 +33,21 @@ init(Parameter const& P) enabled = output_file_path.size(); return true; } + +#ifdef HAVE_XMLRPC_C +bool +NBestOptions:: +update(std::mapconst& param) +{ + typedef std::map params_t; + params_t::const_iterator si = param.find("nbest"); + if (si != param.end()) + nbest_size = xmlrpc_c::value_int(si->second); + only_distinct = check(param, "nbest-distinct"); + enabled = (nbest_size > 0); + return true; +} +#endif + + } // namespace Moses diff --git a/moses/parameters/NBestOptions.h b/moses/parameters/NBestOptions.h index 61e3c9806..05da48508 100644 --- a/moses/parameters/NBestOptions.h +++ b/moses/parameters/NBestOptions.h @@ -24,6 +24,10 @@ struct NBestOptions : public OptionsBaseClass bool init(Parameter const& param); +#ifdef HAVE_XMLRPC_C + bool update(std::mapconst& param); +#endif + }; } diff --git a/moses/parameters/OptionsBaseClass.cpp b/moses/parameters/OptionsBaseClass.cpp index e0b23babf..0bb914417 100644 --- a/moses/parameters/OptionsBaseClass.cpp +++ b/moses/parameters/OptionsBaseClass.cpp @@ -10,6 +10,16 @@ namespace Moses { return true; } + + bool + OptionsBaseClass:: + check(std::map const& param, + std::string const key) + { + std::map::const_iterator m; + return (param.find(key) != param.end()); + } + #endif } diff --git a/moses/parameters/OptionsBaseClass.h b/moses/parameters/OptionsBaseClass.h index cb62467cf..55713a174 100644 --- a/moses/parameters/OptionsBaseClass.h +++ b/moses/parameters/OptionsBaseClass.h @@ -12,6 +12,10 @@ namespace Moses #ifdef HAVE_XMLRPC_C virtual bool update(std::mapconst& params); + + bool + check(std::map const& param, + std::string const key); #endif }; } diff --git a/moses/parameters/ReportingOptions.cpp b/moses/parameters/ReportingOptions.cpp index 25ae2f779..fa06eb06f 100644 --- a/moses/parameters/ReportingOptions.cpp +++ b/moses/parameters/ReportingOptions.cpp @@ -1,5 +1,4 @@ // -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- -#if 0 #include "ReportingOptions.h" #include "moses/Parameter.h" @@ -9,82 +8,70 @@ namespace Moses { ReportingOptions:: init(Parameter const& param) { + // including factors in the output + param.SetParameter(ReportAllFactors, "report-all-factors", false); + + // segmentation reporting + ReportSegmentation = (param.GetParam("report-segmentation-enriched") + ? 2 : param.GetParam("report-segmentation") + ? 1 : 0); + + // word alignment reporting + param.SetParameter(PrintAlignmentInfo, "print-alignment-info", false); + param.SetParameter(WA_SortOrder, "sort-word-alignment", NoSort); + std::string e; // hack to save us param.SetParameter(...) + param.SetParameter(AlignmentOutputFile,"alignment-output-file", e); + + // output a word graph PARAM_VEC const* params; - - param.SetParameter(segmentation, "report-segmentation", false ); - param.SetParameter(segmentation_enriched, "report-segmentation-enriched", false); - param.SetParameter(all_factors, "report-all-factors", false ); - - // print ... - param.SetParameter(id, "print-id", false ); - param.SetParameter(aln_info, "print-alignment-info", false); - param.SetParameter(passthrough, "print-passthrough", false ); - - param.SetParameter(detailed_transrep_filepath, "translation-details", ""); - param.SetParameter(detailed_tree_transrep_filepath, - "tree-translation-details", ""); - param.SetParameter(detailed_all_transrep_filepath, - "translation-all-details", ""); - - // output search graph - param.SetParameter(output, - "translation-all-details", ""); - - - - param.SetParameter(sort_word_alignment, "sort-word-alignment", NoSort); - - - // Is there a reason why we can't use SetParameter here? [UG] - = param.GetParam("alignment-output-file"); - if (params && params->size()) { - m_alignmentOutputFile = Scan(params->at(0)); - } - params = param.GetParam("output-word-graph"); - output_word_graph = (params && params->size() == 2); - - // bizarre code ahead! Why do we need to do the checks here? - // as adapted from StaticData.cpp - params = param.GetParam("output-search-graph"); - if (params && params->size()) { - if (params->size() != 1) { - std::cerr << "ERROR: wrong format for switch -output-search-graph file"; - return false; - } - output_search_graph = true; - } - else if (m_parameter->GetParam("output-search-graph-extended") && - m_parameter->GetParam("output-search-graph-extended")->size()) { - if (m_parameter->GetParam("output-search-graph-extended")->size() != 1) { - std::cerr << "ERROR: wrong format for switch -output-search-graph-extended file"; - return false; - } - output_search_graph = true; - m_outputSearchGraphExtended = true; - } else { - m_outputSearchGraph = false; - } - - params = m_parameter->GetParam("output-search-graph-slf"); - output_search_graph_slf = params && params->size(); - params = m_parameter->GetParam("output-search-graph-hypergraph"); - output_search_graph_hypergraph = params && params->size(); + WordGraph = (params && params->size() == 2); // what are the two options? + // dump the search graph + param.SetParameter(SearchGraph, "output-search-graph", e); + param.SetParameter(SearchGraphExtended, "output-search-graph-extended", e); + param.SetParameter(SearchGraphSLF,"output-search-graph-slf", e); + param.SetParameter(SearchGraphHG, "output-search-graph-hypergraph", e); #ifdef HAVE_PROTOBUF - params = m_parameter->GetParam("output-search-graph-pb"); - if (params && params->size()) { - if (params->size() != 1) { - cerr << "ERROR: wrong format for switch -output-search-graph-pb path"; - return false; - } - m_outputSearchGraphPB = true; - } else - m_outputSearchGraphPB = false; + param.SetParameter(SearchGraphPB, "output-search-graph-pb", e); #endif + param.SetParameter(DontPruneSearchGraph, "unpruned-search-graph", false); + + + // miscellaneous + param.SetParameter(RecoverPath, "recover-input-path",false); + param.SetParameter(ReportHypoScore, "output-hypo-score",false); + param.SetParameter(PrintID, "print-id",false); + param.SetParameter(PrintPassThrough, "print-passthrough",false); + param.SetParameter(detailed_all_transrep_filepath, + "translation-all-details", e); + param.SetParameter(detailed_transrep_filepath, "translation-details", e); + param.SetParameter(detailed_tree_transrep_filepath, + "tree-translation-details", e); + params = param.GetParam("lattice-samples"); + if (params) { + if (params->size() ==2 ) { + lattice_sample_filepath = params->at(0); + lattice_sample_size = Scan(params->at(1)); + } else { + std::cerr <<"wrong format for switch -lattice-samples file size"; + return false; + } + } else { + lattice_sample_size = 0; + } + return true; + } + +#ifdef HAVE_XMLRPC_C + bool + ReportingOptions:: + update(std::mapconst& param) + { + ReportAllFactors = check(param, "report-all-factors"); return true; } -} #endif +} diff --git a/moses/parameters/ReportingOptions.h b/moses/parameters/ReportingOptions.h index 343bd58ec..0c4c2ac58 100644 --- a/moses/parameters/ReportingOptions.h +++ b/moses/parameters/ReportingOptions.h @@ -2,40 +2,59 @@ #pragma once #include #include "moses/Parameter.h" +#include "OptionsBaseClass.h" + namespace Moses { struct - ReportingOptions + ReportingOptions : public OptionsBaseClass { + bool ReportAllFactors; // m_reportAllFactors; - WordAlignmentSort sort_word_alignment; // 0: no, 1: target order + int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched - - bool segmentation; // m_reportSegmentation; - bool segmentation_enriched; // m_reportSegmentationEnriched; - bool all_factors; // m_reportAllFactors; + bool PrintAlignmentInfo; // m_PrintAlignmentInfo + WordAlignmentSort WA_SortOrder; // 0: no, 1: target order + std::string AlignmentOutputFile; - bool output_word_graph; - bool output_search_graph; - bool output_search_graph_extended; - bool output_search_graph_slf; - bool output_search_graph_hypergraph; - bool output_search_graph_protobuf; + bool WordGraph; + + std::string SearchGraph; + std::string SearchGraphExtended; + std::string SearchGraphSLF; + std::string SearchGraphHG; + std::string SearchGraphPB; + bool DontPruneSearchGraph; + + bool RecoverPath; // recover input path? + bool ReportHypoScore; + + bool PrintID; + bool PrintPassThrough; // print .. bool aln_info; // m_PrintAlignmentInfo; - bool id; // m_PrintID; - bool passthrough; // m_PrintPassthroughInformation; // transrep = translation reporting std::string detailed_transrep_filepath; std::string detailed_tree_transrep_filepath; std::string detailed_all_transrep_filepath; - - std::string aln_output_file; // m_alignmentOutputFile; + + std::string lattice_sample_filepath; + size_t lattice_sample_size; bool init(Parameter const& param); + + /// do we need to keep the search graph from decoding? + bool NeedSearchGraph() const { + return !(SearchGraph.empty() && SearchGraphExtended.empty()); + } + +#ifdef HAVE_XMLRPC_C + bool update(std::mapconst& param); +#endif + }; } diff --git a/moses/parameters/SearchOptions.cpp b/moses/parameters/SearchOptions.cpp index 6636ba744..39ac64515 100644 --- a/moses/parameters/SearchOptions.cpp +++ b/moses/parameters/SearchOptions.cpp @@ -36,6 +36,7 @@ namespace Moses beam_width = TransformScore(beam_width); trans_opt_threshold = TransformScore(trans_opt_threshold); early_discarding_threshold = TransformScore(early_discarding_threshold); + return true; } diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index 09d21c322..085d16622 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -25,8 +25,7 @@ using Moses::Sentence; boost::shared_ptr TranslationRequest:: create(Translator* translator, xmlrpc_c::paramList const& paramList, - boost::condition_variable& cond, - boost::mutex& mut) + boost::condition_variable& cond, boost::mutex& mut) { boost::shared_ptr ret; ret.reset(new TranslationRequest(paramList, cond, mut)); @@ -60,10 +59,9 @@ Run() Moses::StaticData const& SD = Moses::StaticData::Instance(); //Make sure alternative paths are retained, if necessary - if (m_withGraphInfo || m_nbestSize>0) - // why on earth is this a global variable? Is this even thread-safe???? UG - (const_cast(SD)).SetOutputSearchGraph(true); - + // if (m_withGraphInfo || m_nbestSize>0) + // why on earth is this a global variable? Is this even thread-safe???? UG + // (const_cast(SD)).SetOutputSearchGraph(true); // std::stringstream out, graphInfo, transCollOpts; if (SD.IsSyntax()) @@ -170,7 +168,14 @@ outputNBest(const Manager& manager, map& retData) { TrellisPathList nBestList; vector nBestXml; - manager.CalcNBest(m_nbestSize, nBestList, m_nbestDistinct); + manager.CalcNBest(m_options.nbest.nbest_size, nBestList, + m_options.nbest.only_distinct); + + StaticData const& SD = StaticData::Instance(); + manager.OutputNBest(cout, nBestList, + SD.GetOutputFactorOrder(), + m_source->GetTranslationId(), + options().output.ReportSegmentation); BOOST_FOREACH(Moses::TrellisPath const* path, nBestList) { vector const& E = path->GetEdges(); @@ -180,7 +185,8 @@ outputNBest(const Manager& manager, map& retData) if (m_withScoreBreakdown) { // should the score breakdown be reported in a more structured manner? ostringstream buf; - path->GetScoreBreakdown()->OutputAllFeatureScores(buf); + bool with_labels = m_options.nbest.include_feature_labels; + path->GetScoreBreakdown()->OutputAllFeatureScores(buf, with_labels); nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str()); } @@ -228,23 +234,23 @@ insertTranslationOptions(Moses::Manager& manager, retData["topt"] = xmlrpc_c::value_array(toptsXml); } -bool -check(std::map const& params, std::string const key) -{ - std::map::const_iterator m; - return (params.find(key) != params.end()); -} - TranslationRequest:: TranslationRequest(xmlrpc_c::paramList const& paramList, boost::condition_variable& cond, boost::mutex& mut) : m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList) - , m_nbestSize(0) + // , m_nbestSize(0) , m_session_id(0) { m_options = StaticData::Instance().options(); } +bool +check(std::map const& param, + std::string const key) +{ + std::map::const_iterator m; + return (param.find(key) != param.end()); +} void TranslationRequest:: @@ -274,10 +280,9 @@ parse_request(std::map const& params) m_withWordAlignInfo = check(params, "word-align"); m_withGraphInfo = check(params, "sg"); m_withTopts = check(params, "topt"); - m_reportAllFactors = check(params, "report-all-factors"); - m_nbestDistinct = check(params, "nbest-distinct"); + // m_reportAllFactors = check(params, "report-all-factors"); + // m_nbestDistinct = check(params, "nbest-distinct"); m_withScoreBreakdown = check(params, "add-score-breakdown"); - m_source.reset(new Sentence(0,m_source_string)); si = params.find("lambda"); if (si != params.end()) { @@ -298,9 +303,9 @@ parse_request(std::map const& params) } } - si = params.find("nbest"); - if (si != params.end()) - m_nbestSize = xmlrpc_c::value_int(si->second); + // si = params.find("nbest"); + // if (si != params.end()) + // m_nbestSize = xmlrpc_c::value_int(si->second); si = params.find("context"); if (si != params.end()) @@ -309,6 +314,8 @@ parse_request(std::map const& params) VERBOSE(1,"CONTEXT " << context); m_context.reset(new std::vector(1,context)); } + + // // biased sampling for suffix-array-based sampling phrase table? // if ((si = params.find("bias")) != params.end()) // { @@ -317,6 +324,7 @@ parse_request(std::map const& params) // for (size_t i = 1; i < tmp.size(); i += 2) // m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]); // } + m_source.reset(new Sentence(0,m_source_string,m_options)); } // end of Translationtask::parse_request() @@ -326,8 +334,8 @@ run_chart_decoder() { Moses::TreeInput tinput; istringstream buf(m_source_string + "\n"); - tinput.Read(buf, StaticData::Instance().GetInputFactorOrder()); - + tinput.Read(buf, StaticData::Instance().GetInputFactorOrder(), m_options); + Moses::ChartManager manager(this->self()); manager.Decode(); @@ -393,8 +401,13 @@ void TranslationRequest:: run_phrase_decoder() { + if (m_withGraphInfo || m_options.nbest.nbest_size>0) + m_options.output.SearchGraph = "true"; + Manager manager(this->self()); // if (m_bias.size()) manager.SetBias(&m_bias); + + manager.Decode(); pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData); @@ -403,10 +416,10 @@ run_phrase_decoder() if (m_withGraphInfo) insertGraphInfo(manager,m_retData); if (m_withTopts) insertTranslationOptions(manager,m_retData); - if (m_nbestSize) outputNBest(manager, m_retData); + if (m_options.nbest.nbest_size) outputNBest(manager, m_retData); - (const_cast(Moses::StaticData::Instance())) - .SetOutputSearchGraph(false); + // (const_cast(Moses::StaticData::Instance())) + // .SetOutputSearchGraph(false); // WTF? one more reason not to have this as global variable! --- UG } diff --git a/moses/server/TranslationRequest.h b/moses/server/TranslationRequest.h index 866eca20e..9736a7a8f 100644 --- a/moses/server/TranslationRequest.h +++ b/moses/server/TranslationRequest.h @@ -43,9 +43,9 @@ TranslationRequest : public virtual Moses::TranslationTask bool m_withGraphInfo; bool m_withTopts; bool m_reportAllFactors; - bool m_nbestDistinct; + // bool m_nbestDistinct; bool m_withScoreBreakdown; - size_t m_nbestSize; + // size_t m_nbestSize; uint64_t m_session_id; // 0 means none, 1 means new