From c013dbbabe25fef60ff4ff5c1507b0240ceda496 Mon Sep 17 00:00:00 2001 From: nicolabertoldi Date: Wed, 28 Oct 2015 22:26:18 +0100 Subject: [PATCH 1/5] changes to resolve compilation issue with g++.4.4.7 --- moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp | 2 +- .../TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp index 4b354d346..2992cccdc 100644 --- a/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp +++ b/moses/TranslationModel/CompactPT/PhraseDictionaryCompact.cpp @@ -44,7 +44,7 @@ using namespace boost::algorithm; namespace Moses { -typename PhraseDictionaryCompact::SentenceCache PhraseDictionaryCompact::m_sentenceCache; +PhraseDictionaryCompact::SentenceCache PhraseDictionaryCompact::m_sentenceCache; PhraseDictionaryCompact::PhraseDictionaryCompact(const std::string &line) :PhraseDictionary(line, true) diff --git a/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp b/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp index 964ab4528..e837a6610 100644 --- a/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp +++ b/moses/TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp @@ -25,7 +25,7 @@ namespace Moses { -boost::thread_specific_ptr +boost::thread_specific_ptr TargetPhraseCollectionCache::m_phraseCache; } From c8cc11dbd3f8815c9c985bde66afa4d76d7b624f Mon Sep 17 00:00:00 2001 From: nicolabertoldi Date: Wed, 28 Oct 2015 23:22:08 +0100 Subject: [PATCH 2/5] restored the not-adaptive version of moses --- moses/LM/IRST.cpp | 217 ++++++++++++---------------------------------- moses/LM/IRST.h | 14 ++- 2 files changed, 62 insertions(+), 169 deletions(-) diff --git a/moses/LM/IRST.cpp b/moses/LM/IRST.cpp index 0cc62b63c..be0213c97 100644 --- a/moses/LM/IRST.cpp +++ b/moses/LM/IRST.cpp @@ -1,4 +1,5 @@ -// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- +// $Id$ + /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh @@ -25,14 +26,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "n_gram.h" #include "lmContainer.h" -// should be defined in lmContainer.h, if the version of IRSTLM used provides -// context-dependent functionality -#ifndef _IRSTLM_LMCONTEXTDEPENDENT -#define _IRSTLM_LMCONTEXTDEPENDENT 5 -#else -#define IRSTLM_CONTEXT_DEPENDENT -#endif - using namespace irstlm; #include "IRST.h" @@ -67,10 +60,9 @@ public: } }; -LanguageModelIRST:: -LanguageModelIRST(const std::string &line) - : LanguageModelSingleFactor(line) - , m_lmtb_dub(0), m_lmtb_size(0) +LanguageModelIRST::LanguageModelIRST(const std::string &line) + :LanguageModelSingleFactor(line) + ,m_lmtb_dub(0), m_lmtb_size(0) { const StaticData &staticData = StaticData::Instance(); int threadCount = staticData.ThreadCount(); @@ -86,9 +78,9 @@ LanguageModelIRST(const std::string &line) VERBOSE(4, GetScoreProducerDescription() << " LanguageModelIRST::LanguageModelIRST() m_lmtb_size:|" << m_lmtb_size << "|" << std::endl); } -LanguageModelIRST:: -~LanguageModelIRST() +LanguageModelIRST::~LanguageModelIRST() { + #ifndef WIN32 TRACE_ERR( "reset mmap\n"); if (m_lmtb) m_lmtb->reset_mmap(); @@ -98,17 +90,13 @@ LanguageModelIRST:: } -bool -LanguageModelIRST:: -IsUseable(const FactorMask &mask) const +bool LanguageModelIRST::IsUseable(const FactorMask &mask) const { bool ret = mask[m_factorType]; return ret; } -void -LanguageModelIRST:: -Load() +void LanguageModelIRST::Load() { FactorCollection &factorCollection = FactorCollection::Instance(); @@ -135,9 +123,7 @@ Load() if (m_lmtb_dub > 0) m_lmtb->setlogOOVpenalty(m_lmtb_dub); } -void -LanguageModelIRST:: -CreateFactors(FactorCollection &factorCollection) +void LanguageModelIRST::CreateFactors(FactorCollection &factorCollection) { // add factors which have srilm id // code copied & paste from SRI LM class. should do template function @@ -179,23 +165,17 @@ CreateFactors(FactorCollection &factorCollection) } } -int -LanguageModelIRST:: -GetLmID( const std::string &str ) const +int LanguageModelIRST::GetLmID( const std::string &str ) const { return d->encode( str.c_str() ); // at the level of micro tags } -int -LanguageModelIRST:: -GetLmID( const Word &word ) const +int LanguageModelIRST::GetLmID( const Word &word ) const { return GetLmID( word.GetFactor(m_factorType) ); } -int -LanguageModelIRST:: -GetLmID( const Factor *factor ) const +int LanguageModelIRST::GetLmID( const Factor *factor ) const { size_t factorId = factor->GetId(); @@ -216,21 +196,21 @@ GetLmID( const Factor *factor ) const /////////// ///OLD PROBLEM - SOLVED - //////////// - /// IL PPROBLEMA ERA QUI - /// m_lmIdLookup.push_back(code); - /// PERCHE' USANDO PUSH_BACK IN REALTA' INSEREVIVAMO L'ELEMENTO NUOVO - /// IN POSIZIONE (factorID-1) invece che in posizione factrID dove dopo andiamo a leggerlo (vedi caso C - /// Cosi' funziona .... - /// ho un dubbio su cosa c'e' nelle prime posizioni di m_lmIdLookup - /// quindi - /// e scopro che rimane vuota una entry ogni due - /// perche' factorID cresce di due in due (perche' codifica sia source che target) "vuota" la posizione (factorID-1) - /// non da problemi di correttezza, ma solo di "spreco" di memoria - /// potremmo sostituirerendere m_lmIdLookup una std:map invece che un std::vector, - /// ma si perde in efficienza nell'accesso perche' non e' piu' possibile quello random dei vettori - /// a te la scelta!!!! - //////////////// +//////////// +/// IL PPROBLEMA ERA QUI +/// m_lmIdLookup.push_back(code); +/// PERCHE' USANDO PUSH_BACK IN REALTA' INSEREVIVAMO L'ELEMENTO NUOVO +/// IN POSIZIONE (factorID-1) invece che in posizione factrID dove dopo andiamo a leggerlo (vedi caso C +/// Cosi' funziona .... +/// ho un dubbio su cosa c'e' nelle prime posizioni di m_lmIdLookup +/// quindi +/// e scopro che rimane vuota una entry ogni due +/// perche' factorID cresce di due in due (perche' codifica sia source che target) "vuota" la posizione (factorID-1) +/// non da problemi di correttezza, ma solo di "spreco" di memoria +/// potremmo sostituirerendere m_lmIdLookup una std:map invece che un std::vector, +/// ma si perde in efficienza nell'accesso perche' non e' piu' possibile quello random dei vettori +/// a te la scelta!!!! +//////////////// if (factorId >= m_lmIdLookup.size()) { @@ -251,34 +231,21 @@ GetLmID( const Factor *factor ) const } } -FFState const* -LanguageModelIRST:: -EmptyHypothesisState(const InputType &/*input*/) const +const FFState* LanguageModelIRST::EmptyHypothesisState(const InputType &/*input*/) const { std::auto_ptr ret(new IRSTLMState()); return ret.release(); } -void -LanguageModelIRST:: -CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const +void LanguageModelIRST::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const { - bool isContextAdaptive - = m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT; - fullScore = 0; ngramScore = 0; oovCount = 0; if ( !phrase.GetSize() ) return; - //get the context_weight map here - SPTR const> CW; - if (isContextAdaptive && phrase.HasScope()) { - CW = phrase.GetScope()->GetContextWeights(); - } - int _min = min(m_lmtb_size - 1, (int) phrase.GetSize()); int codes[m_lmtb_size]; @@ -289,78 +256,36 @@ CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oov char* msp = NULL; float before_boundary = 0.0; - -#ifdef IRSTLM_CONTEXT_DEPENDENT - if (CW) { - for (; position < _min; ++position) { - codes[idx] = GetLmID(phrase.GetWord(position)); - if (codes[idx] == m_unknownId) ++oovCount; - before_boundary += m_lmtb->clprob(codes,idx+1,*CW,NULL,NULL,&msp); - ++idx; - } - } else { -#endif - for (; position < _min; ++position) { - codes[idx] = GetLmID(phrase.GetWord(position)); - if (codes[idx] == m_unknownId) ++oovCount; - before_boundary += m_lmtb->clprob(codes,idx+1,NULL,NULL,&msp); - ++idx; - } -#ifdef IRSTLM_CONTEXT_DEPENDENT + for (; position < _min; ++position) { + codes[idx] = GetLmID(phrase.GetWord(position)); + if (codes[idx] == m_unknownId) ++oovCount; + before_boundary += m_lmtb->clprob(codes,idx+1,NULL,NULL,&msp); + ++idx; } -#endif + ngramScore = 0.0; int end_loop = (int) phrase.GetSize(); -#ifdef IRSTLM_CONTEXT_DEPENDENT - if (CW) { - for (; position < end_loop; ++position) { - for (idx = 1; idx < m_lmtb_size; ++idx) { - codes[idx-1] = codes[idx]; - } - codes[idx-1] = GetLmID(phrase.GetWord(position)); - if (codes[idx-1] == m_unknownId) ++oovCount; - ngramScore += m_lmtb->clprob(codes,idx,*CW,NULL,NULL,&msp); + for (; position < end_loop; ++position) { + for (idx = 1; idx < m_lmtb_size; ++idx) { + codes[idx-1] = codes[idx]; } - } else { -#endif - for (; position < end_loop; ++position) { - for (idx = 1; idx < m_lmtb_size; ++idx) { - codes[idx-1] = codes[idx]; - } - codes[idx-1] = GetLmID(phrase.GetWord(position)); - if (codes[idx-1] == m_unknownId) ++oovCount; - ngramScore += m_lmtb->clprob(codes,idx,NULL,NULL,&msp); - } -#ifdef IRSTLM_CONTEXT_DEPENDENT + codes[idx-1] = GetLmID(phrase.GetWord(position)); + if (codes[idx-1] == m_unknownId) ++oovCount; + ngramScore += m_lmtb->clprob(codes,idx,NULL,NULL,&msp); } -#endif before_boundary = TransformLMScore(before_boundary); ngramScore = TransformLMScore(ngramScore); fullScore = ngramScore + before_boundary; } -FFState* -LanguageModelIRST:: -EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, - ScoreComponentCollection *out) const +FFState* LanguageModelIRST::EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const { - bool isContextAdaptive - = m_lmtb->getLanguageModelType() == _IRSTLM_LMCONTEXTDEPENDENT; - if (!hypo.GetCurrTargetLength()) { std::auto_ptr ret(new IRSTLMState(ps)); return ret.release(); } - //get the context_weight map here - SPTR const> CW; - if (isContextAdaptive) { - ttasksptr ttask = hypo.GetManager().GetTtask(); - if (ttask) CW = ttask->GetScope()->GetContextWeights(); - } - - //[begin, end) in STL-like fashion. const int begin = (const int) hypo.GetCurrTargetWordsRange().GetStartPos(); const int end = (const int) hypo.GetCurrTargetWordsRange().GetEndPos() + 1; @@ -383,34 +308,18 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, } char* msp = NULL; + float score = m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp); + position = (const int) begin+1; - float score; -#ifdef IRSTLM_CONTEXT_DEPENDENT - if (CW) { - score = m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp); - while (position < adjust_end) { - for (idx=1; idxclprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp); - ++position; + while (position < adjust_end) { + for (idx=1; idxclprob(codes,m_lmtb_size,NULL,NULL,&msp); - position = (const int) begin+1; - while (position < adjust_end) { - for (idx=1; idxclprob(codes,m_lmtb_size,NULL,NULL,&msp); - ++position; - } -#ifdef IRSTLM_CONTEXT_DEPENDENT + codes[idx-1] = GetLmID(hypo.GetWord(position)); + score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp); + ++position; } -#endif + //adding probability of having sentenceEnd symbol, after this phrase; //this could happen only when all source words are covered if (hypo.IsSourceCompleted()) { @@ -427,13 +336,8 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, codes[idx] = m_lmtb_sentenceStart; --idx; } -#ifdef IRSTLM_CONTEXT_DEPENDENT - if (CW) score += m_lmtb->clprob(codes,m_lmtb_size,*CW,NULL,NULL,&msp); - else -#else score += m_lmtb->clprob(codes,m_lmtb_size,NULL,NULL,&msp); -#endif - } else { + } else { // need to set the LM state if (adjust_end < end) { //the LMstate of this target phrase refers to the last m_lmtb_size-1 words @@ -454,9 +358,7 @@ EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, return ret.release(); } -LMResult -LanguageModelIRST:: -GetValue(const vector &contextFactor, State* finalState) const +LMResult LanguageModelIRST::GetValue(const vector &contextFactor, State* finalState) const { // set up context size_t count = contextFactor.size(); @@ -492,8 +394,7 @@ GetValue(const vector &contextFactor, State* finalState) const return result; } -bool -LMCacheCleanup(const int sentences_done, const size_t m_lmcache_cleanup_threshold) +bool LMCacheCleanup(const int sentences_done, const size_t m_lmcache_cleanup_threshold) { if (sentences_done==-1) return true; if (m_lmcache_cleanup_threshold) @@ -510,9 +411,7 @@ void LanguageModelIRST::InitializeForInput(ttasksptr const& ttask) #endif } -void -LanguageModelIRST:: -CleanUpAfterSentenceProcessing(const InputType& source) +void LanguageModelIRST::CleanUpAfterSentenceProcessing(const InputType& source) { const StaticData &staticData = StaticData::Instance(); static int sentenceCount = 0; @@ -526,9 +425,7 @@ CleanUpAfterSentenceProcessing(const InputType& source) } } -void -LanguageModelIRST:: -SetParameter(const std::string& key, const std::string& value) +void LanguageModelIRST::SetParameter(const std::string& key, const std::string& value) { if (key == "dub") { m_lmtb_dub = Scan(value); diff --git a/moses/LM/IRST.h b/moses/LM/IRST.h index 059a30a48..c84df493c 100644 --- a/moses/LM/IRST.h +++ b/moses/LM/IRST.h @@ -1,4 +1,3 @@ -// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- // $Id$ /*********************************************************************** @@ -92,20 +91,17 @@ public: void Load(); const FFState *EmptyHypothesisState(const InputType &/*input*/) const; -protected: virtual LMResult GetValue(const std::vector &contextFactor, State* finalState = NULL) const; -public: - virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const; - /* - virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const; - - virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const; - */ + virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const; +/* + virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const; + virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const; +*/ void InitializeForInput(ttasksptr const& ttask); void CleanUpAfterSentenceProcessing(const InputType& source); From b40bc81811399b6b951f9cc13a6f5532fa9f8b24 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Fri, 30 Oct 2015 14:31:24 +0000 Subject: [PATCH 3/5] Made a few things compile with Makefile outside of the bjam compile. --- Jamroot | 5 +- moses/TranslationModel/UG/Jamfile | 12 +++ moses/TranslationModel/UG/Makefile | 33 +++++--- moses/TranslationModel/UG/check-coverage.cc | 81 +++++++++++++++++++ .../UG/mm/{Makefile => Makefile.x} | 2 +- .../UG/mm/ug_bitext_agenda_worker.h | 4 +- .../UG/mm/ug_bitext_jstats.cc | 8 +- .../TranslationModel/UG/mm/ug_bitext_jstats.h | 8 +- .../UG/mm/ug_bitext_pstats.cc | 2 +- .../TranslationModel/UG/mm/ug_bitext_pstats.h | 20 ++--- .../UG/mm/ug_bitext_sampler.h | 7 +- moses/TranslationModel/UG/mm/ug_im_tsa.h | 4 + .../UG/mm/ug_lexical_reordering.cc | 38 ++++----- .../UG/mm/ug_lexical_reordering.h | 2 +- moses/TranslationModel/UG/mm/ug_phrasepair.h | 8 +- moses/TranslationModel/UG/mmsapt.cpp | 6 ++ moses/TranslationModel/UG/mmsapt.h | 4 + moses/TranslationModel/UG/util/Makefile | 12 +-- 18 files changed, 190 insertions(+), 66 deletions(-) create mode 100644 moses/TranslationModel/UG/check-coverage.cc rename moses/TranslationModel/UG/mm/{Makefile => Makefile.x} (98%) diff --git a/Jamroot b/Jamroot index 2711783bf..10eacf95b 100644 --- a/Jamroot +++ b/Jamroot @@ -1,5 +1,5 @@ #BUILDING MOSES -# + #PACKAGES #Language models (optional): #--with-irstlm=/path/to/irstlm @@ -245,7 +245,7 @@ if [ option.get "with-mm" : : "yes" ] moses/TranslationModel/UG//ptable-describe-features moses/TranslationModel/UG//count-ptable-features moses/TranslationModel/UG//ptable-lookup - # moses/TranslationModel/UG//spe-check-coverage + moses/TranslationModel/UG//check-coverage moses/TranslationModel/UG/mm//mtt-demo1 moses/TranslationModel/UG/mm//mtt-build moses/TranslationModel/UG/mm//mtt-dump @@ -256,6 +256,7 @@ if [ option.get "with-mm" : : "yes" ] moses/TranslationModel/UG/mm//mmlex-lookup moses/TranslationModel/UG/mm//mtt-count-words moses/TranslationModel/UG/mm//calc-coverage + moses/TranslationModel/UG//check-coverage moses/TranslationModel/UG//try-align ; } diff --git a/moses/TranslationModel/UG/Jamfile b/moses/TranslationModel/UG/Jamfile index 954640b7a..108d358ab 100644 --- a/moses/TranslationModel/UG/Jamfile +++ b/moses/TranslationModel/UG/Jamfile @@ -59,6 +59,18 @@ $(TOP)/moses/TranslationModel/UG//mmsapt $(TOP)/util//kenutil ; +exe check-coverage : +check-coverage.cc +$(TOP)/moses//moses +$(TOP)/moses/TranslationModel/UG/generic//generic +$(TOP)//boost_iostreams +$(TOP)//boost_filesystem +$(TOP)//boost_program_options +$(TOP)/moses/TranslationModel/UG/mm//mm +$(TOP)/moses/TranslationModel/UG//mmsapt +$(TOP)/util//kenutil +; + exe sim-pe : sim-pe.cc $(TOP)/moses//moses diff --git a/moses/TranslationModel/UG/Makefile b/moses/TranslationModel/UG/Makefile index 589582e9b..213f87e0c 100644 --- a/moses/TranslationModel/UG/Makefile +++ b/moses/TranslationModel/UG/Makefile @@ -17,7 +17,7 @@ echo $$d endef MOSES_ROOT := $(shell $(find_moses_root)) - +$(info MOSES_ROOT=${MOSES_ROOT}) # =============================================================================== # COMPILATION PREFERENCES # =============================================================================== @@ -35,7 +35,9 @@ CXXFLAGS += -DMAX_NUM_FACTORS=4 CXXFLAGS += -DKENLM_MAX_ORDER=5 CXXFLAGS += -DWITH_THREADS CXXFLAGS += -DNO_MOSES -CXXFLAGS += -I${MOSES_ROOT} -I. +CXXFLAGS += -DMMT +CXXFLAGS += -I$(dir ${MOSES_ROOT})mmt-only +CXXFLAGS += -I${MOSES_ROOT} -I. -I${MOSES_ROOT}/opt/include ifeq ($(variant),debug) CXXFLAGS += -ggdb -O0 @@ -45,7 +47,7 @@ else ifeq ($(variant),syntax) CXXFLAGS += -fsyntax-only endif -# LDFLAGS = -L${MOSES_ROOT}/lib -L ./lib/ +LDFLAGS = -L${MOSES_ROOT}/opt/lib64 -L./lib/ # WDIR = build/$(variant)/${HOSTTYPE}/${KERNEL} WDIR = build/$(variant) @@ -60,14 +62,22 @@ nil: # libraries required -LIBS = m z bz2 pthread dl ${BOOSTLIBS} -#LIBS += tcmalloc -BOOSTLIBS := thread system filesystem program_options iostreams -BOOSTLIBS := $(addprefix boost_,${BOOSTLIBS}) -ifdef ($(BOOSTLIBTAG),"") +BOOSTLIBS := program_options iostreams thread system filesystem +BOOSTLIBS := $(addprefix -lboost_,${BOOSTLIBS}) +ifeq ($(BOOSTLIBTAG),"") BOOSTLIBS := $(addsuffix ${BOOSTLIBTAG},${BOOSTLIBS}) endif +STATIC_LIBS = m bz2 z dl rt +DYNAMIC_LIBS = pthread +#DYNAMIC_LIBS += tcmalloc + +LIBS = -Wl,-B$(link) +LIBS += -L${MOSES_ROOT}/opt/lib64 ${BOOSTLIBS} +LIBS += $(addprefix -l,${STATIC_LIBS}) +LIBS += -Wl,-Bdynamic +LIBS += $(addprefix -l,${DYNAMIC_LIBS}) + cc2obj = $(addsuffix .o,$(patsubst ${MOSES_ROOT}%,$(WDIR)%,\ $(patsubst .%,$(WDIR)%,$(basename $1)))) cc2exe = $(addprefix ./bin/$(variant)/,$(basename $(notdir $1))) @@ -79,7 +89,7 @@ DEP += $(basename $(call cc2obj,$1)).d $(call cc2obj,$1): $1 @echo -e "COMPILING $1" @mkdir -p $$(@D) - @${CXX} ${CXXFLAGS} -MD -MP -c $$< -o $$@ + ${CXX} ${CXXFLAGS} -MD -MP -c $$< -o $$@ endef @@ -90,7 +100,7 @@ $(call cc2exe,$1): $(call cc2obj,$1) $(LIBOBJ) ifneq ($(variant),syntax) @echo -e "LINKING $$@" @mkdir -p $${@D} - @${CXX} ${CXXFLAGS} -o $$@ $(LIBOBJ) $(addprefix -l,${LIBS}) $$< + ${CXX} ${CXXFLAGS} -o $$@ $$< $(LIBOBJ) ${LIBS} endif endef @@ -106,7 +116,8 @@ skip += ug_splice_arglist.cc # skip += ug_lexical_reordering.cc # objects from elsewhere in the moses tree that are needed -extra = ${MOSES_ROOT}/util/exception.cc +extra = ${MOSES_ROOT}/util/exception.cc +extra += ${MOSES_ROOT}/util/integer_to_string.cc $(foreach f,$(skip),$(eval broken+=$(shell find -name $f))) broken += $(wildcard ./mm/stashed/*) diff --git a/moses/TranslationModel/UG/check-coverage.cc b/moses/TranslationModel/UG/check-coverage.cc new file mode 100644 index 000000000..28d3e8968 --- /dev/null +++ b/moses/TranslationModel/UG/check-coverage.cc @@ -0,0 +1,81 @@ +// #include "mmsapt.h" +// #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h" +// #include "moses/TranslationTask.h" +#include +#include +#include +#include +#include +#include +#include "mm/ug_bitext.h" +#include "generic/file_io/ug_stream.h" +#include +#include + +using namespace Moses; +using namespace sapt; +using namespace std; +using namespace boost; + +typedef sapt::L2R_Token Token; +typedef mmBitext bitext_t; + +struct mycmp +{ + bool operator() (pair const& a, + pair const& b) const + { + return a.second > b.second; + } +}; + +string +basename(string const path, string const suffix) +{ + size_t p = path.find_last_of("/"); + size_t k = path.size() - suffix.size(); + cout << path << " " << suffix << endl; + cout << path.substr(0,p) << " " << path.substr(k) << endl; + return path.substr(p, suffix == &path[k] ? k-p : path.size() - p); +} + +int main(int argc, char* argv[]) +{ + bitext_t B; + B.open(argv[1],argv[2],argv[3]); + string line; + string ifile = argv[4]; + string docname = basename(ifile, string(".") + argv[2] + ".gz"); + boost::iostreams::filtering_istream in; + ugdiss::open_input_stream(ifile,in); + while(getline(in,line)) + { + cout << line << " [" << docname << "]" << endl; + vector snt; + B.V1->fillIdSeq(line,snt); + for (size_t i = 0; i < snt.size(); ++i) + { + bitext_t::iter m(B.I1.get()); + for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k) + { + if (m.ca() > 500) continue; + sapt::tsa::ArrayEntry I(m.lower_bound(-1)); + char const* stop = m.upper_bound(-1); + map cnt; + while (I.next != stop) + { + m.root->readEntry(I.next,I); + ++cnt[B.docname(I.sid)]; + } + cout << setw(8) << int(m.ca()) << " " << B.V1->toString(&snt[i],&snt[k+1]) << endl; + typedef pair entry; + vector ranked; ranked.reserve(cnt.size()); + BOOST_FOREACH(entry const& e, cnt) ranked.push_back(e); + sort(ranked.begin(),ranked.end(),mycmp()); + BOOST_FOREACH(entry const& e, ranked) + cout << setw(12) << " " << e.second << " " << e.first << endl; + cout << endl; + } + } + } +} diff --git a/moses/TranslationModel/UG/mm/Makefile b/moses/TranslationModel/UG/mm/Makefile.x similarity index 98% rename from moses/TranslationModel/UG/mm/Makefile rename to moses/TranslationModel/UG/mm/Makefile.x index e0ba5afff..32f81c892 100644 --- a/moses/TranslationModel/UG/mm/Makefile +++ b/moses/TranslationModel/UG/mm/Makefile.x @@ -29,7 +29,7 @@ HOST ?= $(shell hostname) HOSTTYPE ?= $(shell uname -m) KERNEL = $(shell uname -r) -MOSES_ROOT = ${HOME}/code/mosesdecoder +MOSES_ROOT ?= ${HOME}/code/mosesdecoder WDIR = build/${HOSTTYPE}/${KERNEL}/${OPTI} VPATH = ${HOME}/code/mosesdecoder/ CXXFLAGS = ${PROF} -ggdb -Wall -O${OPTI} ${INCLUDES} diff --git a/moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h b/moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h index 5dc05de11..af862bc2d 100644 --- a/moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h +++ b/moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h @@ -28,8 +28,8 @@ Bitext::agenda while (j->nextSample(sid,offset)) { aln.clear(); - int po_fwd = Moses::LRModel::NONE; - int po_bwd = Moses::LRModel::NONE; + int po_fwd = LRModel::NONE; + int po_bwd = LRModel::NONE; int docid = j->m_bias ? j->m_bias->GetClass(sid) : -1; bitvector* full_aln = j->fwd ? &full_alignment : NULL; diff --git a/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc b/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc index 97a7203a9..4b4e3ed7c 100644 --- a/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc +++ b/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc @@ -17,7 +17,7 @@ namespace sapt jstats() : my_rcnt(0), my_cnt2(0), my_wcnt(0), my_bcnt(0) { - for (int i = 0; i <= Moses::LRModel::NONE; ++i) + for (int i = 0; i <= LRModel::NONE; ++i) ofwd[i] = obwd[i] = 0; my_aln.reserve(1); } @@ -30,7 +30,7 @@ namespace sapt my_bcnt = other.bcnt(); my_aln = other.aln(); indoc = other.indoc; - for (int i = 0; i <= Moses::LRModel::NONE; i++) + for (int i = 0; i <= LRModel::NONE; i++) { ofwd[i] = other.ofwd[i]; obwd[i] = other.obwd[i]; @@ -41,7 +41,7 @@ namespace sapt jstats:: dcnt_fwd(PhraseOrientation const idx) const { - assert(idx <= Moses::LRModel::NONE); + assert(idx <= LRModel::NONE); return ofwd[idx]; } @@ -49,7 +49,7 @@ namespace sapt jstats:: dcnt_bwd(PhraseOrientation const idx) const { - assert(idx <= Moses::LRModel::NONE); + assert(idx <= LRModel::NONE); return obwd[idx]; } diff --git a/moses/TranslationModel/UG/mm/ug_bitext_jstats.h b/moses/TranslationModel/UG/mm/ug_bitext_jstats.h index f7b1910cf..dd82a79e3 100644 --- a/moses/TranslationModel/UG/mm/ug_bitext_jstats.h +++ b/moses/TranslationModel/UG/mm/ug_bitext_jstats.h @@ -24,8 +24,8 @@ namespace sapt std::vector > > my_aln; // internal word alignment - uint32_t ofwd[Moses::LRModel::NONE+1]; // forward distortion type counts - uint32_t obwd[Moses::LRModel::NONE+1]; // backward distortion type counts + uint32_t ofwd[LRModel::NONE+1]; // forward distortion type counts + uint32_t obwd[LRModel::NONE+1]; // backward distortion type counts public: std::map indoc; @@ -48,8 +48,8 @@ namespace sapt bool valid(); uint32_t dcnt_fwd(PhraseOrientation const idx) const; uint32_t dcnt_bwd(PhraseOrientation const idx) const; - void fill_lr_vec(Moses::LRModel::Direction const& dir, - Moses::LRModel::ModelType const& mdl, + void fill_lr_vec(LRModel::Direction const& dir, + LRModel::ModelType const& mdl, std::vector& v); }; } diff --git a/moses/TranslationModel/UG/mm/ug_bitext_pstats.cc b/moses/TranslationModel/UG/mm/ug_bitext_pstats.cc index 6a5cf036a..998932023 100644 --- a/moses/TranslationModel/UG/mm/ug_bitext_pstats.cc +++ b/moses/TranslationModel/UG/mm/ug_bitext_pstats.cc @@ -12,7 +12,7 @@ namespace sapt pstats:: pstats() : raw_cnt(0), sample_cnt(0), good(0), sum_pairs(0), in_progress(0) { - for (int i = 0; i <= Moses::LRModel::NONE; ++i) + for (int i = 0; i <= LRModel::NONE; ++i) ofwd[i] = obwd[i] = 0; } diff --git a/moses/TranslationModel/UG/mm/ug_bitext_pstats.h b/moses/TranslationModel/UG/mm/ug_bitext_pstats.h index 4e40a26d1..0bc31ad1c 100644 --- a/moses/TranslationModel/UG/mm/ug_bitext_pstats.h +++ b/moses/TranslationModel/UG/mm/ug_bitext_pstats.h @@ -30,8 +30,8 @@ namespace sapt size_t sum_pairs; // total number of target phrases extracted (can be > raw_cnt) size_t in_progress; // how many threads are currently working on this? - uint32_t ofwd[Moses::LRModel::NONE+1]; // distribution of fwd phrase orientations - uint32_t obwd[Moses::LRModel::NONE+1]; // distribution of bwd phrase orientations + uint32_t ofwd[LRModel::NONE+1]; // distribution of fwd phrase orientations + uint32_t obwd[LRModel::NONE+1]; // distribution of bwd phrase orientations indoc_map_t indoc; trg_map_t trg; @@ -43,14 +43,14 @@ namespace sapt bool add(uint64_t const pid, // target phrase id - float const w, // sample weight (1./(# of phrases extractable)) - float const b, // sample bias score - alnvec const& a, // local alignment - uint32_t const cnt2, // raw target phrase count - uint32_t fwd_o, // fwd. phrase orientation - uint32_t bwd_o, // bwd. phrase orientation - int const docid); // document where sample was found - + float const w, // sample weight (1./(# of phrases extractable)) + float const b, // sample bias score + alnvec const& a, // local alignment + uint32_t const cnt2, // raw target phrase count + uint32_t fwd_o, // fwd. phrase orientation + uint32_t bwd_o, // bwd. phrase orientation + int const docid); // document where sample was found + void count_sample(int const docid, // document where sample was found size_t const num_pairs, // # of phrases extractable here diff --git a/moses/TranslationModel/UG/mm/ug_bitext_sampler.h b/moses/TranslationModel/UG/mm/ug_bitext_sampler.h index 79014e35a..a217a7716 100644 --- a/moses/TranslationModel/UG/mm/ug_bitext_sampler.h +++ b/moses/TranslationModel/UG/mm/ug_bitext_sampler.h @@ -74,8 +74,11 @@ BitextSampler : public Moses::reference_counter public: BitextSampler(BitextSampler const& other); BitextSampler const& operator=(BitextSampler const& other); - BitextSampler(bitext const* const bitext, typename bitext::iter const& phrase, - SPTR const& bias, size_t const min_samples, size_t const max_samples, + BitextSampler(bitext const* const bitext, + typename bitext::iter const& phrase, + SPTR const& bias, + size_t const min_samples, + size_t const max_samples, sampling_method const method); ~BitextSampler(); SPTR stats(); diff --git a/moses/TranslationModel/UG/mm/ug_im_tsa.h b/moses/TranslationModel/UG/mm/ug_im_tsa.h index 33c61afc0..1293e838c 100644 --- a/moses/TranslationModel/UG/mm/ug_im_tsa.h +++ b/moses/TranslationModel/UG/mm/ug_im_tsa.h @@ -227,7 +227,9 @@ namespace sapt // Now sort the array if (log) *log << "sorting .... with " << threads << " threads." << std::endl; +#ifndef NO_MOSES double start_time = util::WallTime(); +#endif boost::scoped_ptr tpool; tpool.reset(new ug::ThreadPool(threads)); @@ -252,8 +254,10 @@ namespace sapt } } tpool.reset(); +#ifndef NO_MOSES if (log) *log << "Done sorting after " << util::WallTime() - start_time << " seconds." << std::endl; +#endif this->startArray = reinterpret_cast(&(*sufa.begin())); this->endArray = reinterpret_cast(&(*sufa.end())); this->numTokens = sufa.size(); diff --git a/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc b/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc index 3273905bd..2963a7d6a 100644 --- a/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc +++ b/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc @@ -4,7 +4,7 @@ namespace sapt { using namespace std; - Moses::LRModel::ReorderingType po_other = Moses::LRModel::NONE; + LRModel::ReorderingType po_other = LRModel::NONE; // check if min and max in the aligmnet vector v are within the // bounds LFT and RGT and update the actual bounds L and R; update // the total count of alignment links in the underlying phrase @@ -83,54 +83,56 @@ namespace sapt return ret; } - Moses::LRModel::ReorderingType + // LRModel::ReorderingType + sapt::PhraseOrientation find_po_fwd(vector >& a1, vector >& a2, size_t s1, size_t e1, size_t s2, size_t e2) { if (e2 == a2.size()) // end of target sentence - return Moses::LRModel::M; + return LRModel::M; size_t y = e2, L = e2, R = a2.size()-1; // won't change size_t x = e1, T = e1, B = a1.size()-1; if (e1 < a1.size() && expand_block(a1,a2,x,y,T,L,B,R) >= 0) - return Moses::LRModel::M; + return LRModel::M; B = x = s1-1; T = 0; if (s1 && expand_block(a1,a2,x,y,T,L,B,R) >= 0) - return Moses::LRModel::S; + return LRModel::S; while (e2 < a2.size() && a2[e2].size() == 0) ++e2; if (e2 == a2.size()) // should never happen, actually - return Moses::LRModel::NONE; + return LRModel::NONE; if (a2[e2].back() < s1) - return Moses::LRModel::DL; + return LRModel::DL; if (a2[e2].front() >= e1) - return Moses::LRModel::DR; - return Moses::LRModel::NONE; + return LRModel::DR; + return LRModel::NONE; } - Moses::LRModel::ReorderingType + // LRModel::ReorderingType + PhraseOrientation find_po_bwd(vector >& a1, vector >& a2, size_t s1, size_t e1, size_t s2, size_t e2) { - if (s1 == 0 && s2 == 0) return Moses::LRModel::M; - if (s2 == 0) return Moses::LRModel::DR; - if (s1 == 0) return Moses::LRModel::DL; + if (s1 == 0 && s2 == 0) return LRModel::M; + if (s2 == 0) return LRModel::DR; + if (s1 == 0) return LRModel::DL; size_t y = s2-1, L = 0, R = s2-1; // won't change size_t x = s1-1, T = 0, B = s1-1; if (expand_block(a1,a2,x,y,T,L,B,R) >= 0) - return Moses::LRModel::M; + return LRModel::M; T = x = e1; B = a1.size()-1; if (expand_block(a1,a2,x,y,T,L,B,R) >= 0) - return Moses::LRModel::S; + return LRModel::S; while (s2-- && a2[s2].size() == 0); - Moses::LRModel::ReorderingType ret; + LRModel::ReorderingType ret; ret = (a2[s2].size() == 0 ? po_other : - a2[s2].back() < s1 ? Moses::LRModel::DR : - a2[s2].front() >= e1 ? Moses::LRModel::DL : + a2[s2].back() < s1 ? LRModel::DR : + a2[s2].front() >= e1 ? LRModel::DL : po_other); #if 0 cout << "s1=" << s1 << endl; diff --git a/moses/TranslationModel/UG/mm/ug_lexical_reordering.h b/moses/TranslationModel/UG/mm/ug_lexical_reordering.h index 846f9436d..eb3d96589 100644 --- a/moses/TranslationModel/UG/mm/ug_lexical_reordering.h +++ b/moses/TranslationModel/UG/mm/ug_lexical_reordering.h @@ -12,7 +12,7 @@ namespace sapt { #ifdef NO_MOSES class LRModel{ - +public: enum ModelType { Monotonic, MSD, MSLR, LeftRight, None }; enum Direction { Forward, Backward, Bidirectional }; diff --git a/moses/TranslationModel/UG/mm/ug_phrasepair.h b/moses/TranslationModel/UG/mm/ug_phrasepair.h index 9f6f3ebb7..21056e80b 100644 --- a/moses/TranslationModel/UG/mm/ug_phrasepair.h +++ b/moses/TranslationModel/UG/mm/ug_phrasepair.h @@ -26,8 +26,8 @@ namespace sapt uint32_t raw1, raw2, sample1, sample2, good1, good2, joint; float cum_bias; std::vector fvals; - float dfwd[Moses::LRModel::NONE+1]; // distortion counts // counts or probs? - float dbwd[Moses::LRModel::NONE+1]; // distortion counts + float dfwd[LRModel::NONE+1]; // distortion counts // counts or probs? + float dbwd[LRModel::NONE+1]; // distortion counts std::vector aln; float score; bool inverse; @@ -125,7 +125,7 @@ namespace sapt // } // should we do that here or leave the raw counts? - for (int i = 0; i <= Moses::LRModel::NONE; i++) + for (int i = 0; i <= LRModel::NONE; i++) { PhraseOrientation po = static_cast(i); dfwd[i] = js.dcnt_fwd(po); @@ -201,7 +201,7 @@ namespace sapt , inverse(o.inverse) , indoc(o.indoc) { - for (int i = 0; i <= Moses::LRModel::NONE; ++i) + for (int i = 0; i <= LRModel::NONE; ++i) { dfwd[i] = o.dfwd[i]; dbwd[i] = o.dbwd[i]; diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp index 10274c1bb..e86767b46 100644 --- a/moses/TranslationModel/UG/mmsapt.cpp +++ b/moses/TranslationModel/UG/mmsapt.cpp @@ -63,7 +63,9 @@ namespace Moses , btfix(new mmbitext) , m_bias_log(NULL) , m_bias_loglevel(0) +#ifndef NO_MOSES , m_lr_func(NULL) +#endif , m_sampling_method(random_sampling) , bias_key(((char*)this)+3) , cache_key(((char*)this)+2) @@ -597,6 +599,7 @@ namespace Moses // Evaluate with all features that can be computed using available factors tp->EvaluateInIsolation(src, m_featuresToApply); +#ifndef NO_MOSES if (m_lr_func) { LRModel::ModelType mdl = m_lr_func->GetModel().GetModelType(); @@ -605,6 +608,7 @@ namespace Moses pool.fill_lr_vec(dir, mdl, *scores); tp->SetExtraScores(m_lr_func, scores); } +#endif return tp; } @@ -879,6 +883,7 @@ namespace Moses if (!context->cache1) context->cache1.reset(new pstats::cache_t); if (!context->cache2) context->cache2.reset(new pstats::cache_t); +#ifndef NO_MOSES if (m_lr_func_name.size() && m_lr_func == NULL) { FeatureFunction* lr = &FeatureFunction::FindFeatureFunction(m_lr_func_name); @@ -887,6 +892,7 @@ namespace Moses << " does not seem to be a lexical reordering function!"); // todo: verify that lr_func implements a hierarchical reordering model } +#endif } bool diff --git a/moses/TranslationModel/UG/mmsapt.h b/moses/TranslationModel/UG/mmsapt.h index 0dc8f575f..76a8589d0 100644 --- a/moses/TranslationModel/UG/mmsapt.h +++ b/moses/TranslationModel/UG/mmsapt.h @@ -26,7 +26,9 @@ #include "moses/TranslationModel/UG/TargetPhraseCollectionCache.h" +#ifndef NO_MOSES #include "moses/FF/LexicalReordering/LexicalReordering.h" +#endif #include "moses/InputFileStream.h" #include "moses/FactorTypeSet.h" @@ -82,7 +84,9 @@ namespace Moses boost::scoped_ptr m_bias_logger; // for logging to a file std::ostream* m_bias_log; int m_bias_loglevel; +#ifndef NO_MOSES LexicalReordering* m_lr_func; // associated lexical reordering function +#endif std::string m_lr_func_name; // name of associated lexical reordering function sapt::sampling_method m_sampling_method; // sampling method, see ug_bitext_sampler boost::scoped_ptr m_thread_pool; diff --git a/moses/TranslationModel/UG/util/Makefile b/moses/TranslationModel/UG/util/Makefile index afe8c7b86..2f2ab6400 100644 --- a/moses/TranslationModel/UG/util/Makefile +++ b/moses/TranslationModel/UG/util/Makefile @@ -1,7 +1,7 @@ -# -*- makefile -*- +# # -*- makefile -*- -MOSES_CODE=/fs/gna0/germann/code/mosesdecoder -MOSES_ROOT=/fs/gna0/germann/moses -LIBS = $(addprefix -l,moses icuuc icuio icui18n boost_iostreams) -ibm1-align: ibm1-align.cc - g++ -o $@ -L ${MOSES_ROOT}/lib -I ${MOSES_CODE} $^ ${LIBS} -ggdb \ No newline at end of file +# MOSES_CODE=/fs/gna0/germann/code/mosesdecoder +# MOSES_ROOT=/fs/gna0/germann/moses +# LIBS = $(addprefix -l,moses icuuc icuio icui18n boost_iostreams) +# ibm1-align: ibm1-align.cc +# g++ -o $@ -L ${MOSES_ROOT}/lib -I ${MOSES_CODE} $^ ${LIBS} -ggdb \ No newline at end of file From dc8ad899454bd82408c6a371d5f50e497ede0caa Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Fri, 30 Oct 2015 14:32:12 +0000 Subject: [PATCH 4/5] More analysis tools; work in progress. --- moses/TranslationModel/UG/check-coverage2.cc | 67 +++++++++++++++++++ moses/TranslationModel/UG/check-coverage3.cc | 70 ++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 moses/TranslationModel/UG/check-coverage2.cc create mode 100644 moses/TranslationModel/UG/check-coverage3.cc diff --git a/moses/TranslationModel/UG/check-coverage2.cc b/moses/TranslationModel/UG/check-coverage2.cc new file mode 100644 index 000000000..2ff0836e6 --- /dev/null +++ b/moses/TranslationModel/UG/check-coverage2.cc @@ -0,0 +1,67 @@ +// for each word in the input, keep track of the longest matching ngram covering it +#include +#include +#include +#include +#include +#include +#include "mm/ug_bitext.h" +#include "generic/file_io/ug_stream.h" +#include +#include + +using namespace Moses; +using namespace sapt; +using namespace std; +using namespace boost; + +typedef sapt::L2R_Token Token; +typedef mmBitext bitext_t; + +struct mycmp +{ + bool operator() (pair const& a, + pair const& b) const + { + return a.second > b.second; + } +}; + +string +basename(string const path, string const suffix) +{ + size_t p = path.find_last_of("/"); + size_t k = path.size() - suffix.size(); + cout << path << " " << suffix << endl; + cout << path.substr(0,p) << " " << path.substr(k) << endl; + return path.substr(p, suffix == &path[k] ? k-p : path.size() - p); +} + +int main(int argc, char* argv[]) +{ + bitext_t B; + B.open(argv[1],argv[2],argv[3]); + B.V1->setDynamic(true); + string line; + string ifile = argv[4]; + string docname = basename(ifile, string(".") + argv[2] + ".gz"); + boost::iostreams::filtering_istream in; + ugdiss::open_input_stream(ifile,in); + while(getline(in,line)) + { + cout << line << " [" << docname << "]" << endl; + vector snt; + B.V1->fillIdSeq(line,snt); + vector match(snt.size(),0); + for (size_t i = 0; i < snt.size(); ++i) + { + bitext_t::iter m(B.I1.get()); + for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k); + for (size_t j = 0; j < m.size(); ++j) + + match[i+j] = max(match[i+j], m.size()); + } + for (size_t i = 0; i < snt.size(); ++i) + cout << setw(3) << match[i] << " " << (*B.V1)[snt[i]] << endl; + } +} diff --git a/moses/TranslationModel/UG/check-coverage3.cc b/moses/TranslationModel/UG/check-coverage3.cc new file mode 100644 index 000000000..d41e10ef3 --- /dev/null +++ b/moses/TranslationModel/UG/check-coverage3.cc @@ -0,0 +1,70 @@ +// #include "mmsapt.h" +// #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h" +// #include "moses/TranslationTask.h" +#include +#include +#include +#include +#include +#include +#include "mm/ug_bitext.h" +#include "generic/file_io/ug_stream.h" +#include +#include +#include "mm/ug_bitext_sampler.h" + +using namespace Moses; +using namespace sapt; +using namespace std; +using namespace boost; + +typedef sapt::L2R_Token Token; +typedef mmBitext bitext_t; + +struct mycmp +{ + bool operator() (pair const& a, + pair const& b) const + { + return a.second > b.second; + } +}; + +string +basename(string const path, string const suffix) +{ + size_t p = path.find_last_of("/"); + size_t k = path.size() - suffix.size(); + cout << path << " " << suffix << endl; + cout << path.substr(0,p) << " " << path.substr(k) << endl; + return path.substr(p, suffix == &path[k] ? k-p : path.size() - p); +} + +int main(int argc, char* argv[]) +{ + boost::intrusive_ptr B(new bitext_t); + B->open(argv[1],argv[2],argv[3]); + string line; + string ifile = argv[4]; + string docname = basename(ifile, string(".") + argv[2] + ".gz"); + boost::iostreams::filtering_istream in; + ugdiss::open_input_stream(ifile,in); + while(getline(in,line)) + { + cout << line << " [" << docname << "]" << endl; + vector snt; + B->V1->fillIdSeq(line,snt); + for (size_t i = 0; i < snt.size(); ++i) + { + bitext_t::iter m(B->I1.get()); + for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k) + { + SPTR zilch; + BitextSampler s(B.get(), m, zilch, 1000, 1000, + sapt::random_sampling); + s(); + cout << m.size() << " " << s.stats()->trg.size() << endl; + } + } + } +} From 324c378f7fc54f02a8ea264915d3eade867ff23d Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Sat, 31 Oct 2015 12:50:52 +0000 Subject: [PATCH 5/5] Options refactoring. Moses crashed in server mode when asked to provide n-best translations with scores. --- contrib/server/mosesserver.cpp | 17 +- moses-cmd/LatticeMBRGrid.cpp | 5 +- moses/AlignmentInfo.cpp | 24 +- moses/AlignmentInfo.h | 5 +- moses/ChartCell.cpp | 17 +- moses/ChartHypothesis.cpp | 11 +- moses/ChartHypothesisCollection.cpp | 11 +- moses/ChartHypothesisCollection.h | 3 +- moses/ChartManager.cpp | 11 +- moses/ChartParser.cpp | 9 +- moses/ConfusionNet.cpp | 25 +- moses/ConfusionNet.h | 3 +- moses/DecodeStepTranslation.cpp | 40 +-- moses/DecodeStepTranslation.h | 11 +- moses/ForestInput.cpp | 8 +- moses/ForestInput.h | 5 +- moses/HypergraphOutput.cpp | 4 +- moses/Hypothesis.cpp | 51 ++-- moses/Hypothesis.h | 15 +- moses/HypothesisStackNormal.cpp | 2 +- moses/IOWrapper.cpp | 10 +- moses/IOWrapper.h | 5 +- moses/Incremental.cpp | 30 ++- moses/InputType.h | 9 +- moses/Manager.cpp | 165 +++++++----- moses/Manager.h | 2 +- moses/MockHypothesis.cpp | 12 +- moses/Parameter.cpp | 7 + moses/Parameter.h | 14 ++ moses/ScoreComponentCollection.cpp | 21 +- moses/ScoreComponentCollection.h | 7 +- moses/Sentence.cpp | 20 +- moses/Sentence.h | 16 +- moses/StaticData.cpp | 219 ++++------------ moses/StaticData.h | 234 +++++------------- moses/Syntax/Manager.cpp | 13 +- moses/Syntax/S2T/OovHandler-inl.h | 6 +- moses/TabbedSentence.cpp | 10 +- moses/TabbedSentence.h | 4 +- moses/TranslationModel/UG/mmsapt.cpp | 4 +- moses/TranslationModel/UG/ptable-lookup.cc | 2 +- ...ranslationOptionCollectionConfusionNet.cpp | 12 +- moses/TranslationTask.cpp | 7 +- moses/TreeInput.cpp | 9 +- moses/TreeInput.h | 5 +- moses/WordLattice.cpp | 6 +- moses/WordLattice.h | 6 +- moses/parameters/AllOptions.cpp | 39 ++- moses/parameters/AllOptions.h | 5 +- moses/parameters/NBestOptions.cpp | 19 +- moses/parameters/NBestOptions.h | 4 + moses/parameters/OptionsBaseClass.cpp | 10 + moses/parameters/OptionsBaseClass.h | 4 + moses/parameters/ReportingOptions.cpp | 127 +++++----- moses/parameters/ReportingOptions.h | 51 ++-- moses/parameters/SearchOptions.cpp | 1 + moses/server/TranslationRequest.cpp | 67 +++-- moses/server/TranslationRequest.h | 4 +- 58 files changed, 731 insertions(+), 732 deletions(-) diff --git a/contrib/server/mosesserver.cpp b/contrib/server/mosesserver.cpp index 9b34adb6c..79b48f54c 100644 --- a/contrib/server/mosesserver.cpp +++ b/contrib/server/mosesserver.cpp @@ -257,9 +257,9 @@ public: const StaticData &staticData = StaticData::Instance(); //Make sure alternative paths are retained, if necessary - if (addGraphInfo || nbest_size>0) { - (const_cast(staticData)).SetOutputSearchGraph(true); - } + // if (addGraphInfo || nbest_size>0) { + // (const_cast(staticData)).SetOutputSearchGraph(true); + // } stringstream out, graphInfo, transCollOpts; @@ -269,7 +269,7 @@ public: boost::shared_ptr tinput(new TreeInput); const vector& IFO = staticData.GetInputFactorOrder(); istringstream in(source + "\n"); - tinput->Read(in,IFO); + tinput->Read(in,IFO,staticData.options()); ttasksptr task = Moses::TranslationTask::create(tinput); ChartManager manager(task); manager.Decode(); @@ -285,7 +285,8 @@ public: else { // size_t lineNumber = 0; // TODO: Include sentence request number here? - boost::shared_ptr sentence(new Sentence(0,source)); + boost::shared_ptr sentence; + sentence.reset(new Sentence(0,source,staticData.options())); ttasksptr task = Moses::TranslationTask::create(sentence); Manager manager(task); manager.Decode(); @@ -320,7 +321,7 @@ public: outputNBest(manager, m_retData, nbest_size, nbest_distinct, reportAllFactors, addAlignInfo, addScoreBreakdown); } - (const_cast(staticData)).SetOutputSearchGraph(false); + // (const_cast(staticData)).SetOutputSearchGraph(false); } m_retData["text"] = value_string(out.str()); XVERBOSE(1,"Output: " << out.str() << endl); @@ -479,7 +480,9 @@ public: { // should the score breakdown be reported in a more structured manner? ostringstream buf; - path.GetScoreBreakdown()->OutputAllFeatureScores(buf); + bool with_labels + = StaticData::Instance().options().nbest.include_feature_labels; + path.GetScoreBreakdown()->OutputAllFeatureScores(buf, with_labels); nBestXMLItem["fvals"] = xmlrpc_c::value_string(buf.str()); } diff --git a/moses-cmd/LatticeMBRGrid.cpp b/moses-cmd/LatticeMBRGrid.cpp index a0c0a7852..356f90525 100644 --- a/moses-cmd/LatticeMBRGrid.cpp +++ b/moses-cmd/LatticeMBRGrid.cpp @@ -202,8 +202,9 @@ int main(int argc, char* argv[]) << " ||| "; vector mbrBestHypo = doLatticeMBR(manager,nBestList); manager.OutputBestHypo(mbrBestHypo, lineCount, - SD.GetReportSegmentation(), - SD.GetReportAllFactors(),cout); + manager.options().output.ReportSegmentation, + manager.options().output.ReportAllFactors, + cout); } } } diff --git a/moses/AlignmentInfo.cpp b/moses/AlignmentInfo.cpp index 97efc25eb..15e8b6d52 100644 --- a/moses/AlignmentInfo.cpp +++ b/moses/AlignmentInfo.cpp @@ -106,7 +106,9 @@ std::set AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const } -bool compare_target(const std::pair *a, const std::pair *b) +bool +compare_target(std::pair const* a, + std::pair const* b) { if(a->second < b->second) return true; if(a->second == b->second) return (a->first < b->first); @@ -114,29 +116,29 @@ bool compare_target(const std::pair *a, const std::pair* > AlignmentInfo::GetSortedAlignments() const +std::vector< const std::pair* > +AlignmentInfo:: +GetSortedAlignments(WordAlignmentSort SortOrder) const { std::vector< const std::pair* > ret; - + CollType::const_iterator iter; for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) { const std::pair &alignPair = *iter; ret.push_back(&alignPair); } - - const StaticData &staticData = StaticData::Instance(); - WordAlignmentSort wordAlignmentSort = staticData.GetWordAlignmentSort(); - - switch (wordAlignmentSort) { + + switch (SortOrder) { case NoSort: break; - + case TargetOrder: std::sort(ret.begin(), ret.end(), compare_target); break; - + default: - UTIL_THROW(util::Exception, "Unknown alignment sort option: " << wordAlignmentSort); + UTIL_THROW(util::Exception, "Unknown word alignment sort option: " + << SortOrder); } return ret; diff --git a/moses/AlignmentInfo.h b/moses/AlignmentInfo.h index c74ff340c..50a4bf550 100644 --- a/moses/AlignmentInfo.h +++ b/moses/AlignmentInfo.h @@ -26,7 +26,7 @@ #include #include - +#include "TypeDef.h" namespace Moses { @@ -83,7 +83,8 @@ public: return m_collection.size(); } - std::vector< const std::pair* > GetSortedAlignments() const; + std::vector< const std::pair* > + GetSortedAlignments(WordAlignmentSort SortOrder) const; std::vector GetSourceIndex2PosMap() const; diff --git a/moses/ChartCell.cpp b/moses/ChartCell.cpp index f0a25986a..89108ebf9 100644 --- a/moses/ChartCell.cpp +++ b/moses/ChartCell.cpp @@ -27,7 +27,6 @@ #include "RuleCube.h" #include "Range.h" #include "Util.h" -#include "StaticData.h" #include "ChartTranslationOptions.h" #include "ChartTranslationOptionList.h" #include "ChartManager.h" @@ -52,8 +51,7 @@ ChartCellBase::~ChartCellBase() {} ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) : ChartCellBase(startPos, endPos), m_manager(manager) { - const StaticData &staticData = StaticData::Instance(); - m_nBestIsEnabled = staticData.options().nbest.enabled; + m_nBestIsEnabled = manager.options().nbest.enabled; } ChartCell::~ChartCell() {} @@ -66,7 +64,14 @@ ChartCell::~ChartCell() {} bool ChartCell::AddHypothesis(ChartHypothesis *hypo) { const Word &targetLHS = hypo->GetTargetLHS(); - return m_hypoColl[targetLHS].AddHypothesis(hypo, m_manager); + MapType::iterator m = m_hypoColl.find(targetLHS); + if (m == m_hypoColl.end()) + { + std::pair + e(targetLHS, ChartHypothesisCollection(m_manager.options())); + m = m_hypoColl.insert(e).first; + } + return m->second.AddHypothesis(hypo, m_manager); } /** Prune each collection in this cell to a particular size */ @@ -87,8 +92,6 @@ void ChartCell::PruneToSize() void ChartCell::Decode(const ChartTranslationOptionList &transOptList , const ChartCellCollection &allChartCells) { - const StaticData &staticData = StaticData::Instance(); - // priority queue for applicable rules with selected hypotheses RuleCubeQueue queue(m_manager); @@ -100,7 +103,7 @@ void ChartCell::Decode(const ChartTranslationOptionList &transOptList } // pluck things out of queue and add to hypo collection - const size_t popLimit = staticData.options().cube.pop_limit; + const size_t popLimit = m_manager.options().cube.pop_limit; for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) { ChartHypothesis *hypo = queue.Pop(); AddHypothesis(hypo); diff --git a/moses/ChartHypothesis.cpp b/moses/ChartHypothesis.cpp index cf2904e47..7c9f17621 100644 --- a/moses/ChartHypothesis.cpp +++ b/moses/ChartHypothesis.cpp @@ -256,12 +256,13 @@ void ChartHypothesis::CleanupArcList() * However, may not be enough if only unique candidates are needed, * so we'll keep all of arc list if nedd distinct n-best list */ + AllOptions const& opts = StaticData::Instance().options(); const StaticData &staticData = StaticData::Instance(); - size_t nBestSize = staticData.options().nbest.nbest_size; - bool distinctNBest = (staticData.options().nbest.only_distinct - || staticData.options().mbr.enabled - || staticData.GetOutputSearchGraph() - || staticData.GetOutputSearchGraphHypergraph()); + size_t nBestSize = opts.nbest.nbest_size; + bool distinctNBest = (opts.nbest.only_distinct + || opts.mbr.enabled + || opts.output.NeedSearchGraph() + || !opts.output.SearchGraphHG.empty()); if (!distinctNBest && m_arcList->size() > nBestSize) { // prune arc list only if there too many arcs diff --git a/moses/ChartHypothesisCollection.cpp b/moses/ChartHypothesisCollection.cpp index 068194287..f7002bfad 100644 --- a/moses/ChartHypothesisCollection.cpp +++ b/moses/ChartHypothesisCollection.cpp @@ -26,6 +26,7 @@ #include "ChartManager.h" #include "HypergraphOutput.h" #include "util/exception.hh" +#include "parameters/AllOptions.h" using namespace std; using namespace Moses; @@ -33,13 +34,13 @@ using namespace Moses; namespace Moses { -ChartHypothesisCollection::ChartHypothesisCollection() +ChartHypothesisCollection::ChartHypothesisCollection(AllOptions const& opts) { - const StaticData &staticData = StaticData::Instance(); + // const StaticData &staticData = StaticData::Instance(); - m_beamWidth = staticData.GetBeamWidth(); - m_maxHypoStackSize = staticData.options().search.stack_size; - m_nBestIsEnabled = staticData.options().nbest.enabled; + m_beamWidth = opts.search.beam_width; // staticData.GetBeamWidth(); + m_maxHypoStackSize = opts.search.stack_size; // staticData.options().search.stack_size; + m_nBestIsEnabled = opts.nbest.enabled; // staticData.options().nbest.enabled; m_bestScore = -std::numeric_limits::infinity(); } diff --git a/moses/ChartHypothesisCollection.h b/moses/ChartHypothesisCollection.h index 169e81f19..9d682d76d 100644 --- a/moses/ChartHypothesisCollection.h +++ b/moses/ChartHypothesisCollection.h @@ -29,6 +29,7 @@ namespace Moses { class ChartSearchGraphWriter; + class AllOptions; //! functor to compare (chart) hypotheses by (descending) score class ChartHypothesisScoreOrderer @@ -70,7 +71,7 @@ public: return m_hypos.end(); } - ChartHypothesisCollection(); + ChartHypothesisCollection(AllOptions const& opts); ~ChartHypothesisCollection(); bool AddHypothesis(ChartHypothesis *hypo, ChartManager &manager); diff --git a/moses/ChartManager.cpp b/moses/ChartManager.cpp index 9c672e00a..767c5b44b 100644 --- a/moses/ChartManager.cpp +++ b/moses/ChartManager.cpp @@ -371,7 +371,8 @@ void ChartManager::OutputNBestList(OutputCollector *collector, OutputSurface(out, outputPhrase, outputFactorOrder, false); out << " ||| "; boost::shared_ptr scoreBreakdown = ChartKBestExtractor::GetOutputScoreBreakdown(derivation); - scoreBreakdown->OutputAllFeatureScores(out); + bool with_labels = options().nbest.include_feature_labels; + scoreBreakdown->OutputAllFeatureScores(out, with_labels); out << " ||| " << derivation.score; // optionally, print word alignments @@ -618,7 +619,7 @@ void ChartManager::OutputDetailedTranslationReport( //DIMw const StaticData &staticData = StaticData::Instance(); - if (staticData.IsDetailedAllTranslationReportingEnabled()) { + if (options().output.detailed_all_transrep_filepath.size()) { const Sentence &sentence = static_cast(m_source); size_t nBestSize = staticData.options().nbest.nbest_size; std::vector > nBestList; @@ -835,11 +836,11 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe Backtrack(hypo); VERBOSE(3,"0" << std::endl); - if (StaticData::Instance().GetOutputHypoScore()) { + if (options().output.ReportHypoScore) { out << hypo->GetTotalScore() << " "; } - if (StaticData::Instance().IsPathRecoveryEnabled()) { + if (options().output.RecoverPath) { out << "||| "; } Phrase outPhrase(ARRAY_SIZE_INCR); @@ -858,7 +859,7 @@ void ChartManager::OutputBestHypo(OutputCollector *collector, const ChartHypothe } else { VERBOSE(1, "NO BEST TRANSLATION" << endl); - if (StaticData::Instance().GetOutputHypoScore()) { + if (options().output.ReportHypoScore) { out << "0 "; } diff --git a/moses/ChartParser.cpp b/moses/ChartParser.cpp index 897f1828f..19aeb8515 100644 --- a/moses/ChartParser.cpp +++ b/moses/ChartParser.cpp @@ -107,8 +107,13 @@ void ChartParserUnknown::Process(const Word &sourceWord, const Range &range, Cha targetPhrase->SetAlignmentInfo("0-0"); targetPhrase->EvaluateInIsolation(*unksrc); - if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.options().nbest.print_trees || staticData.GetTreeStructure() != NULL) { - targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]"); + AllOptions const& opts = staticData.options(); + if (!opts.output.detailed_tree_transrep_filepath.empty() || + opts.nbest.print_trees || staticData.GetTreeStructure() != NULL) { + std::string prop = "[ "; + prop += (*targetLHS)[0]->GetString().as_string() + " "; + prop += sourceWord[0]->GetString().as_string() + " ]"; + targetPhrase->SetProperty("Tree", prop); } // chart rule diff --git a/moses/ConfusionNet.cpp b/moses/ConfusionNet.cpp index 41522268d..188c57438 100644 --- a/moses/ConfusionNet.cpp +++ b/moses/ConfusionNet.cpp @@ -110,29 +110,14 @@ ReadF(std::istream& in, const std::vector& factorOrder, int format) int ConfusionNet:: Read(std::istream& in, - const std::vector& factorOrder) + const std::vector& factorOrder, + AllOptions const& opts) { int rv=ReadF(in,factorOrder,0); if(rv) stats.collect(*this); return rv; } -#if 0 -// Deprecated due to code duplication; -// use Word::CreateFromString() instead -void -ConfusionNet:: -String2Word(const std::string& s,Word& w, - const std::vector& factorOrder) -{ - std::vector factorStrVector = Tokenize(s, "|"); - for(size_t i=0; i& factorOrder) @@ -161,7 +146,8 @@ ReadFormat0(std::istream& in, const std::vector& factorOrder) for(size_t i=0; i < numInputScores; i++) { double prob; if (!(is>>prob)) { - TRACE_ERR("ERROR: unable to parse CN input - bad link probability, or wrong number of scores\n"); + TRACE_ERR("ERROR: unable to parse CN input - bad link probability, " + << "or wrong number of scores\n"); return false; } if(prob<0.0) { @@ -174,7 +160,8 @@ ReadFormat0(std::istream& in, const std::vector& factorOrder) probs[i] = (std::max(static_cast(log(prob)),LOWEST_SCORE)); } - //store 'real' word count in last feature if we have one more weight than we do arc scores and not epsilon + // store 'real' word count in last feature if we have one more + // weight than we do arc scores and not epsilon if (addRealWordCount && word!=EPSILON && word!="") probs.back() = -1.0; diff --git a/moses/ConfusionNet.h b/moses/ConfusionNet.h index 85e5f338d..834148864 100644 --- a/moses/ConfusionNet.h +++ b/moses/ConfusionNet.h @@ -67,7 +67,8 @@ public: bool ReadF(std::istream&,const std::vector& factorOrder,int format=0); virtual void Print(std::ostream&) const; - int Read(std::istream& in,const std::vector& factorOrder); + int Read(std::istream& in,const std::vector& factorOrder, + AllOptions const& opts); Phrase GetSubString(const Range&) const; //TODO not defined std::string GetStringRep(const std::vector factorsToPrint) const; //TODO not defined diff --git a/moses/DecodeStepTranslation.cpp b/moses/DecodeStepTranslation.cpp index c593f8ebc..25d160626 100644 --- a/moses/DecodeStepTranslation.cpp +++ b/moses/DecodeStepTranslation.cpp @@ -100,12 +100,14 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO } } -void DecodeStepTranslation::ProcessInitialTranslation( - const InputType &source - ,PartialTranslOptColl &outputPartialTranslOptColl - , size_t startPos, size_t endPos, bool adhereTableLimit - , const InputPath &inputPath - , TargetPhraseCollection::shared_ptr phraseColl) const +void +DecodeStepTranslation:: +ProcessInitialTranslation(InputType const& source, + PartialTranslOptColl &outputPartialTranslOptColl, + size_t startPos, size_t endPos, + bool adhereTableLimit, + InputPath const& inputPath, + TargetPhraseCollection::shared_ptr phraseColl) const { const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature(); const size_t tableLimit = phraseDictionary->GetTableLimit(); @@ -114,12 +116,13 @@ void DecodeStepTranslation::ProcessInitialTranslation( if (phraseColl != NULL) { IFVERBOSE(3) { - if(StaticData::Instance().GetInputType() == SentenceInput) - TRACE_ERR("[" << source.GetSubString(range) << "; " << startPos << "-" << endPos << "]\n"); + if(source.GetType() == SentenceInput) + TRACE_ERR("[" << source.GetSubString(range) << "; " + << startPos << "-" << endPos << "]\n"); else TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl); } - + TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd; iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit; @@ -137,11 +140,13 @@ void DecodeStepTranslation::ProcessInitialTranslation( } } -void DecodeStepTranslation::ProcessInitialTranslationLEGACY( - const InputType &source - ,PartialTranslOptColl &outputPartialTranslOptColl - , size_t startPos, size_t endPos, bool adhereTableLimit - , const InputPathList &inputPathList) const +void +DecodeStepTranslation:: +ProcessInitialTransLEGACY(InputType const& source, + PartialTranslOptColl &outputPartialTranslOptColl, + size_t startPos, size_t endPos, + bool adhereTableLimit, + InputPathList const& inputPathList) const { const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature(); const size_t tableLimit = phraseDictionary->GetTableLimit(); @@ -152,12 +157,13 @@ void DecodeStepTranslation::ProcessInitialTranslationLEGACY( if (phraseColl != NULL) { IFVERBOSE(3) { - if(StaticData::Instance().GetInputType() == SentenceInput) - TRACE_ERR("[" << source.GetSubString(range) << "; " << startPos << "-" << endPos << "]\n"); + if(source.GetType() == SentenceInput) + TRACE_ERR("[" << source.GetSubString(range) << "; " + << startPos << "-" << endPos << "]\n"); else TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl); } - + const std::vector &sourcePhrases = phraseColl->GetSourcePhrases(); TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd; diff --git a/moses/DecodeStepTranslation.h b/moses/DecodeStepTranslation.h index eceebb940..25af693eb 100644 --- a/moses/DecodeStepTranslation.h +++ b/moses/DecodeStepTranslation.h @@ -61,10 +61,13 @@ public: , TargetPhraseCollection::shared_ptr phraseColl) const; // legacy - void ProcessInitialTranslationLEGACY(const InputType &source - , PartialTranslOptColl &outputPartialTranslOptColl - , size_t startPos, size_t endPos, bool adhereTableLimit - , const InputPathList &inputPathList) const; + void + ProcessInitialTransLEGACY(InputType const& source, + PartialTranslOptColl &outputPartialTranslOptColl, + size_t startPos, size_t endPos, + bool adhereTableLimit, + InputPathList const& inputPathList) const; + void ProcessLEGACY(const TranslationOption &inputPartialTranslOpt , const DecodeStep &decodeStep , PartialTranslOptColl &outputPartialTranslOptColl diff --git a/moses/ForestInput.cpp b/moses/ForestInput.cpp index 2977c0636..57b8fa472 100644 --- a/moses/ForestInput.cpp +++ b/moses/ForestInput.cpp @@ -17,8 +17,10 @@ namespace Moses { //! populate this InputType with data from in stream -int ForestInput::Read(std::istream &in, - const std::vector& factorOrder) +int ForestInput:: +Read(std::istream &in, + std::vector const& factorOrder, + AllOptions const& opts) { using Syntax::F2S::Forest; @@ -56,7 +58,7 @@ int ForestInput::Read(std::istream &in, // not sure ForestInput needs to. std::stringstream strme; strme << " " << sentence << " " << std::endl; - Sentence::Read(strme, factorOrder); + Sentence::Read(strme, factorOrder, opts); // Find the maximum end position of any vertex (0 if forest is empty). std::size_t maxEnd = FindMaxEnd(*m_forest); diff --git a/moses/ForestInput.h b/moses/ForestInput.h index 3ad764402..61cb08d83 100644 --- a/moses/ForestInput.h +++ b/moses/ForestInput.h @@ -28,7 +28,10 @@ public: } //! populate this InputType with data from in stream - virtual int Read(std::istream& in,const std::vector& factorOrder); + virtual int + Read(std::istream& in, + std::vector const& factorOrder, + AllOptions const& opts); //! Output debugging info to stream out virtual void Print(std::ostream&) const; diff --git a/moses/HypergraphOutput.cpp b/moses/HypergraphOutput.cpp index 830f97ba8..373c2109f 100644 --- a/moses/HypergraphOutput.cpp +++ b/moses/HypergraphOutput.cpp @@ -56,7 +56,7 @@ WriteHypos(const ChartHypothesisCollection& hypos, ChartHypothesisCollection::const_iterator iter; for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) { ChartHypothesis &mainHypo = **iter; - if (StaticData::Instance().GetUnprunedSearchGraph() || + if (StaticData::Instance().options().output.DontPruneSearchGraph || reachable.find(mainHypo.GetId()) != reachable.end()) { (*m_out) << m_lineNumber << " " << mainHypo << endl; } @@ -90,7 +90,7 @@ WriteHypos(const ChartHypothesisCollection& hypos, ChartHypothesisCollection::const_iterator iter; for (iter = hypos.begin() ; iter != hypos.end() ; ++iter) { const ChartHypothesis* mainHypo = *iter; - if (!StaticData::Instance().GetUnprunedSearchGraph() && + if (!StaticData::Instance().options().output.DontPruneSearchGraph && reachable.find(mainHypo->GetId()) == reachable.end()) { //Ignore non reachable nodes continue; diff --git a/moses/Hypothesis.cpp b/moses/Hypothesis.cpp index c6a248419..c5745c5de 100644 --- a/moses/Hypothesis.cpp +++ b/moses/Hypothesis.cpp @@ -195,9 +195,8 @@ EvaluateWhenApplied(float futureScore) const StatefulFeatureFunction &ff = *ffs[i]; const StaticData &staticData = StaticData::Instance(); if (! staticData.IsFeatureFunctionIgnored(ff)) { - m_ffStates[i] = ff.EvaluateWhenApplied(*this, - m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL, - &m_currScoreBreakdown); + FFState const* s = m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL; + m_ffStates[i] = ff.EvaluateWhenApplied(*this, s, &m_currScoreBreakdown); } } @@ -276,15 +275,11 @@ CleanupArcList() * However, may not be enough if only unique candidates are needed, * so we'll keep all of arc list if nedd distinct n-best list */ + const StaticData &staticData = StaticData::Instance(); - size_t nBestSize = staticData.options().nbest.nbest_size; - bool distinctNBest = (m_manager.options().nbest.only_distinct || - staticData.GetLatticeSamplesSize() || - m_manager.options().mbr.enabled || - staticData.GetOutputSearchGraph() || - staticData.GetOutputSearchGraphSLF() || - staticData.GetOutputSearchGraphHypergraph() || - m_manager.options().lmbr.enabled); + AllOptions const& opts = m_manager.options(); + size_t nBestSize = opts.nbest.nbest_size; + bool distinctNBest = opts.NBestDistinct(); if (!distinctNBest && m_arcList->size() > nBestSize * 5) { // prune arc list only if there too many arcs @@ -292,9 +287,8 @@ CleanupArcList() m_arcList->end(), CompareHypothesisTotalScore()); // delete bad ones - ArcList::iterator iter; - for (iter = m_arcList->begin() + nBestSize; iter != m_arcList->end() ; ++iter) - delete *iter; + ArcList::iterator i = m_arcList->begin() + nBestSize; + while (i != m_arcList->end()) delete *i++; m_arcList->erase(m_arcList->begin() + nBestSize, m_arcList->end()); } @@ -386,14 +380,16 @@ OutputAlignment(std::ostream &out) const edges.push_back(currentHypo); currentHypo = currentHypo->GetPrevHypo(); } - - OutputAlignment(out, edges); + + OutputAlignment(out, edges, m_manager.options().output.WA_SortOrder); } void Hypothesis:: -OutputAlignment(ostream &out, const vector &edges) +OutputAlignment(ostream &out, + vector const& edges, + WordAlignmentSort waso) { size_t targetOffset = 0; @@ -402,7 +398,7 @@ OutputAlignment(ostream &out, const vector &edges) const TargetPhrase &tp = edge.GetCurrTargetPhrase(); size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos(); - OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset); + OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset, waso); targetOffset += tp.GetSize(); } @@ -412,15 +408,17 @@ OutputAlignment(ostream &out, const vector &edges) void Hypothesis:: OutputAlignment(ostream &out, const AlignmentInfo &ai, - size_t sourceOffset, size_t targetOffset) + size_t sourceOffset, size_t targetOffset, + WordAlignmentSort waso) { typedef std::vector< const std::pair* > AlignVec; - AlignVec alignments = ai.GetSortedAlignments(); + AlignVec alignments = ai.GetSortedAlignments(waso); AlignVec::const_iterator it; for (it = alignments.begin(); it != alignments.end(); ++it) { const std::pair &alignment = **it; - out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " "; + out << alignment.first + sourceOffset << "-" + << alignment.second + targetOffset << " "; } } @@ -526,15 +524,17 @@ OutputSurface(std::ostream &out, const Hypothesis &edge, const int sourceEnd = sourceRange.GetEndPos(); out << "|" << sourceStart << "-" << sourceEnd; // enriched "-tt" if (reportSegmentation == 2) { + WordAlignmentSort waso = m_manager.options().output.WA_SortOrder; out << ",wa="; const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm(); - Hypothesis::OutputAlignment(out, ai, 0, 0); + Hypothesis::OutputAlignment(out, ai, 0, 0, waso); out << ",total="; out << edge.GetScore() - edge.GetPrevHypo()->GetScore(); out << ","; ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown()); scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown()); - scoreBreakdown.OutputAllFeatureScores(out); + bool with_labels = m_manager.options().nbest.include_feature_labels; + scoreBreakdown.OutputAllFeatureScores(out, with_labels); } out << "| "; } @@ -604,9 +604,10 @@ OutputLocalWordAlignment(vector& dest) const using namespace std; Range const& src = this->GetCurrSourceWordsRange(); Range const& trg = this->GetCurrTargetWordsRange(); - + + WordAlignmentSort waso = m_manager.options().output.WA_SortOrder; vector const* > a - = this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments(); + = this->GetCurrTargetPhrase().GetAlignTerm().GetSortedAlignments(waso); typedef pair item; map M; BOOST_FOREACH(item const* p, a) { diff --git a/moses/Hypothesis.h b/moses/Hypothesis.h index 125aef530..c1d6c4598 100644 --- a/moses/Hypothesis.h +++ b/moses/Hypothesis.h @@ -251,9 +251,18 @@ public: return m_transOpt; } - void OutputAlignment(std::ostream &out) const; - static void OutputAlignment(std::ostream &out, const std::vector &edges); - static void OutputAlignment(std::ostream &out, const Moses::AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset); + void + OutputAlignment(std::ostream &out) const; + + static void + OutputAlignment(std::ostream &out, + const std::vector &edges, + WordAlignmentSort waso); + + static void + OutputAlignment(std::ostream &out, const Moses::AlignmentInfo &ai, + size_t sourceOffset, size_t targetOffset, + WordAlignmentSort waso); void OutputInput(std::ostream& os) const; static void OutputInput(std::vector& map, const Hypothesis* hypo); diff --git a/moses/HypothesisStackNormal.cpp b/moses/HypothesisStackNormal.cpp index 9f13213d4..77347baaf 100644 --- a/moses/HypothesisStackNormal.cpp +++ b/moses/HypothesisStackNormal.cpp @@ -36,7 +36,7 @@ namespace Moses HypothesisStackNormal::HypothesisStackNormal(Manager& manager) : HypothesisStack(manager) { - m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled; + m_nBestIsEnabled = manager.options().nbest.enabled; m_bestScore = -std::numeric_limits::infinity(); m_worstScore = -std::numeric_limits::infinity(); } diff --git a/moses/IOWrapper.cpp b/moses/IOWrapper.cpp index 62c753269..8a83547e7 100644 --- a/moses/IOWrapper.cpp +++ b/moses/IOWrapper.cpp @@ -79,12 +79,6 @@ namespace Moses IOWrapper::IOWrapper() : m_nBestStream(NULL) - // , m_outputWordGraphStream(NULL) - // , m_outputSearchGraphStream(NULL) - // , m_detailedTranslationReportingStream(NULL) - // , m_unknownsStream(NULL) - // , m_alignmentInfoStream(NULL) - // , m_latticeSamplesStream(NULL) , m_surpressSingleBestOutput(false) , m_look_ahead(0) , m_look_back(0) @@ -100,8 +94,8 @@ IOWrapper::IOWrapper() m_look_ahead = staticData.options().context.look_ahead; m_look_back = staticData.options().context.look_back; - m_inputType = staticData.GetInputType(); - + m_inputType = staticData.options().input.input_type; + UTIL_THROW_IF2((m_look_ahead || m_look_back) && m_inputType != SentenceInput, "Context-sensitive decoding currently works only with sentence input."); diff --git a/moses/IOWrapper.h b/moses/IOWrapper.h index c55793329..02c3470bb 100644 --- a/moses/IOWrapper.h +++ b/moses/IOWrapper.h @@ -216,6 +216,7 @@ boost::shared_ptr IOWrapper:: BufferInput() { + AllOptions const& opts = StaticData::Instance().options(); boost::shared_ptr source; boost::shared_ptr ret; if (m_future_input.size()) { @@ -224,13 +225,13 @@ BufferInput() m_buffered_ahead -= ret->GetSize(); } else { source.reset(new itype); - if (!source->Read(*m_inputStream, *m_inputFactorOrder)) + if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts)) return ret; ret = source; } while (m_buffered_ahead < m_look_ahead) { source.reset(new itype); - if (!source->Read(*m_inputStream, *m_inputFactorOrder)) + if (!source->Read(*m_inputStream, *m_inputFactorOrder, opts)) break; m_future_input.push_back(source); m_buffered_ahead += source->GetSize(); diff --git a/moses/Incremental.cpp b/moses/Incremental.cpp index 407cf0f9d..c004cda5d 100644 --- a/moses/Incremental.cpp +++ b/moses/Incremental.cpp @@ -320,10 +320,15 @@ void Manager::OutputNBest(OutputCollector *collector) const OutputNBestList(collector, *completed_nbest_, m_source.GetTranslationId()); } -void Manager::OutputNBestList(OutputCollector *collector, const std::vector &nbest, long translationId) const +void +Manager:: +OutputNBestList(OutputCollector *collector, + std::vector const& nbest, + long translationId) const { const StaticData &staticData = StaticData::Instance(); - const std::vector &outputFactorOrder = staticData.GetOutputFactorOrder(); + const std::vector &outputFactorOrder + = staticData.GetOutputFactorOrder(); std::ostringstream out; // wtf? copied from the original OutputNBestList @@ -332,18 +337,21 @@ void Manager::OutputNBestList(OutputCollector *collector, const std::vector::const_iterator i = nbest.begin(); i != nbest.end(); ++i) { + for (std::vector::const_iterator i = nbest.begin(); + i != nbest.end(); ++i) { Incremental::PhraseAndFeatures(*i, outputPhrase, features); // and UTIL_THROW_IF2(outputPhrase.GetSize() < 2, - "Output phrase should have contained at least 2 words (beginning and end-of-sentence)"); + "Output phrase should have contained at least 2 words " + << "(beginning and end-of-sentence)"); outputPhrase.RemoveWord(0); outputPhrase.RemoveWord(outputPhrase.GetSize() - 1); out << translationId << " ||| "; OutputSurface(out, outputPhrase, outputFactorOrder, false); out << " ||| "; - features.OutputAllFeatureScores(out); + bool with_labels = options().nbest.include_feature_labels; + features.OutputAllFeatureScores(out, with_labels); out << " ||| " << i->GetScore() << '\n'; } out << std::flush; @@ -351,7 +359,9 @@ void Manager::OutputNBestList(OutputCollector *collector, const std::vectorWrite(translationId, out.str()); } -void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const +void +Manager:: +OutputDetailedTranslationReport(OutputCollector *collector) const { if (collector && !completed_nbest_->empty()) { const search::Applied &applied = completed_nbest_->at(0); @@ -498,7 +508,7 @@ void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied if (collector == NULL) return; std::ostringstream out; FixPrecision(out); - if (StaticData::Instance().GetOutputHypoScore()) { + if (options().output.ReportHypoScore) { out << applied.GetScore() << ' '; } Phrase outPhrase; @@ -515,10 +525,12 @@ void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied VERBOSE(1,"BEST TRANSLATION: " << outPhrase << "[total=" << applied.GetScore() << "]" << std::endl); } -void Manager::OutputBestNone(OutputCollector *collector, long translationId) const +void +Manager:: +OutputBestNone(OutputCollector *collector, long translationId) const { if (collector == NULL) return; - if (StaticData::Instance().GetOutputHypoScore()) { + if (options().output.ReportHypoScore) { collector->Write(translationId, "0 \n"); } else { collector->Write(translationId, "\n"); diff --git a/moses/InputType.h b/moses/InputType.h index af0a73b0c..9f3777530 100644 --- a/moses/InputType.h +++ b/moses/InputType.h @@ -1,5 +1,4 @@ -// -*- c++ -*- -// $Id$ +// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- // vim:tabstop=2 /*********************************************************************** @@ -31,6 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "ReorderingConstraint.h" #include "NonTerminal.h" #include "Range.h" +#include "parameters/AllOptions.h" namespace Moses { @@ -184,7 +184,10 @@ public: } //! populate this InputType with data from in stream - virtual int Read(std::istream& in,const std::vector& factorOrder) =0; + virtual int + Read(std::istream& in, + std::vector const& factorOrder, + AllOptions const& opts) =0; //! Output debugging info to stream out virtual void Print(std::ostream&) const =0; diff --git a/moses/Manager.cpp b/moses/Manager.cpp index 71b384fd1..7e34302ba 100644 --- a/moses/Manager.cpp +++ b/moses/Manager.cpp @@ -1,6 +1,5 @@ -// $Id$ +// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- // vim:tabstop=2 - /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh @@ -49,6 +48,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "moses/mbr.h" #include "moses/LatticeMBR.h" +#include + #ifdef HAVE_PROTOBUF #include "hypergraph.pb.h" #include "rule.pb.h" @@ -98,6 +99,10 @@ Manager::GetSource() const */ void Manager::Decode() { + + std::cerr << options().nbest.nbest_size << " " + << options().nbest.enabled << " " << std::endl; + // initialize statistics ResetSentenceStats(m_source); IFVERBOSE(2) { @@ -123,7 +128,8 @@ void Manager::Decode() // some reporting on how long this took IFVERBOSE(1) { GetSentenceStats().StopTimeCollectOpts(); - TRACE_ERR("Line "<< m_source.GetTranslationId() << ": Collecting options took " + TRACE_ERR("Line "<< m_source.GetTranslationId() + << ": Collecting options took " << GetSentenceStats().GetTimeCollectOpts() << " seconds at " << __FILE__ << ":" << __LINE__ << endl); } @@ -1112,11 +1118,13 @@ void Manager::OutputSearchGraphAsSLF(long translationId, std::ostream &outputSea } -void OutputSearchNode(long translationId, std::ostream &outputSearchGraphStream, - const SearchGraphNode& searchNode) +void +OutputSearchNode(AllOptions const& opts, long translationId, + std::ostream &outputSearchGraphStream, + SearchGraphNode const& searchNode) { const vector &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); - bool extendedFormat = StaticData::Instance().GetOutputSearchGraphExtended(); + bool extendedFormat = opts.output.SearchGraphExtended.size(); outputSearchGraphStream << translationId; // special case: initial hypothesis @@ -1369,24 +1377,32 @@ void Manager::SerializeSearchGraphPB( } #endif -void Manager::OutputSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const +void +Manager:: +OutputSearchGraph(long translationId, std::ostream &out) const { vector searchGraph; GetSearchGraph(searchGraph); for (size_t i = 0; i < searchGraph.size(); ++i) { - OutputSearchNode(translationId,outputSearchGraphStream,searchGraph[i]); + OutputSearchNode(options(),translationId,out,searchGraph[i]); } } -void Manager::GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected, - std::vector< const Hypothesis* >* pConnectedList, std::map < const Hypothesis*, set< const Hypothesis* > >* pOutgoingHyps, vector< float>* pFwdBwdScores) const +void +Manager:: +GetForwardBackwardSearchGraph +( std::map< int, bool >* pConnected, + std::vector* pConnectedList, + std::map >* pOutgoingHyps, + vector< float>* pFwdBwdScores) const { std::map < int, bool > &connected = *pConnected; std::vector< const Hypothesis *>& connectedList = *pConnectedList; std::map < int, int > forward; std::map < int, double > forwardScore; - std::map < const Hypothesis*, set > & outgoingHyps = *pOutgoingHyps; + std::map < const Hypothesis*, set > & outgoingHyps + = *pOutgoingHyps; vector< float> & estimatedScores = *pFwdBwdScores; // *** find connected hypotheses *** @@ -1395,7 +1411,8 @@ void Manager::GetForwardBackwardSearchGraph(std::map< int, bool >* pConnected, // ** compute best forward path for each hypothesis *** // // forward cost of hypotheses on final stack is 0 - const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks(); + const std::vector < HypothesisStack* > &hypoStackColl + = m_search->GetHypothesisStacks(); const HypothesisStack &finalStack = *hypoStackColl.back(); HypothesisStack::const_iterator iterHypo; for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo) { @@ -1504,34 +1521,34 @@ void Manager::OutputBest(OutputCollector *collector) const if (!options().mbr.enabled) { bestHypo = GetBestHypothesis(); if (bestHypo) { - if (StaticData::Instance().GetOutputHypoScore()) { + if (options().output.ReportHypoScore) { out << bestHypo->GetTotalScore() << ' '; } - if (staticData.IsPathRecoveryEnabled()) { + if (options().output.RecoverPath) { bestHypo->OutputInput(out); out << "||| "; } - const PARAM_VEC *params = staticData.GetParameter().GetParam("print-id"); - if (params && params->size() && Scan(params->at(0)) ) { - out << translationId << " "; - } - - // VN : I put back the code for OutputPassthroughInformation - if (staticData.IsPassthroughEnabled()) { - OutputPassthroughInformation(out, bestHypo); + // const PARAM_VEC *params = staticData.GetParameter().GetParam("print-id"); + if (options().output.PrintID) { + out << translationId << " "; + } + + // VN : I put back the code for OutputPassthroughInformation + if (options().output.PrintPassThrough) { + OutputPassthroughInformation(out, bestHypo); } // end of add back - if (staticData.GetReportSegmentation() == 2) { + if (options().output.ReportSegmentation == 2) { GetOutputLanguageModelOrder(out, bestHypo); } bestHypo->OutputBestSurface( out, staticData.GetOutputFactorOrder(), - staticData.GetReportSegmentation(), - staticData.GetReportAllFactors()); - if (staticData.PrintAlignmentInfo()) { + options().output.ReportSegmentation, + options().output.ReportAllFactors); + if (options().output.PrintAlignmentInfo) { out << "||| "; bestHypo->OutputAlignment(out); } @@ -1572,8 +1589,9 @@ void Manager::OutputBest(OutputCollector *collector) const } else { //Lattice MBR decoding vector mbrBestHypo = doLatticeMBR(*this,nBestList); - OutputBestHypo(mbrBestHypo, translationId, staticData.GetReportSegmentation(), - staticData.GetReportAllFactors(),out); + OutputBestHypo(mbrBestHypo, translationId, + options().output.ReportSegmentation, + options().output.ReportAllFactors, out); IFVERBOSE(2) { PrintUserTime("finished Lattice MBR decoding"); } @@ -1584,8 +1602,8 @@ void Manager::OutputBest(OutputCollector *collector) const else if (options().search.consensus) { const TrellisPath &conBestHypo = doConsensusDecoding(*this,nBestList); OutputBestHypo(conBestHypo, translationId, - staticData.GetReportSegmentation(), - staticData.GetReportAllFactors(),out); + options().output.ReportSegmentation, + options().output.ReportAllFactors, out); OutputAlignment(m_alignmentOut, conBestHypo); IFVERBOSE(2) { PrintUserTime("finished Consensus decoding"); @@ -1596,8 +1614,8 @@ void Manager::OutputBest(OutputCollector *collector) const else { const TrellisPath &mbrBestHypo = doMBR(nBestList); OutputBestHypo(mbrBestHypo, translationId, - staticData.GetReportSegmentation(), - staticData.GetReportAllFactors(),out); + options().output.ReportSegmentation, + options().output.ReportAllFactors, out); OutputAlignment(m_alignmentOut, mbrBestHypo); IFVERBOSE(2) { PrintUserTime("finished MBR decoding"); @@ -1624,7 +1642,7 @@ void Manager::OutputNBest(OutputCollector *collector) const long translationId = m_source.GetTranslationId(); if (options().lmbr.enabled) { - if (staticData.options().nbest.enabled) { + if (options().nbest.enabled) { collector->Write(translationId, m_latticeNBestOut.str()); } } else { @@ -1632,22 +1650,24 @@ void Manager::OutputNBest(OutputCollector *collector) const ostringstream out; CalcNBest(options().nbest.nbest_size, nBestList, options().nbest.only_distinct); - OutputNBest(out, nBestList, staticData.GetOutputFactorOrder(), + OutputNBest(out, nBestList, + staticData.GetOutputFactorOrder(), m_source.GetTranslationId(), - staticData.GetReportSegmentation()); + options().output.ReportSegmentation); collector->Write(m_source.GetTranslationId(), out.str()); } } -void Manager::OutputNBest(std::ostream& out - , const Moses::TrellisPathList &nBestList - , const std::vector& outputFactorOrder - , long translationId - , char reportSegmentation) const +void +Manager:: +OutputNBest(std::ostream& out, + const Moses::TrellisPathList &nBestList, + const std::vector& outputFactorOrder, + long translationId, char reportSegmentation) const { const StaticData &staticData = StaticData::Instance(); - NBestOptions const& nbo = staticData.options().nbest; + NBestOptions const& nbo = options().nbest; bool reportAllFactors = nbo.include_all_factors; bool includeSegmentation = nbo.include_segmentation; bool includeWordAlignment = nbo.include_alignment_info; @@ -1661,12 +1681,14 @@ void Manager::OutputNBest(std::ostream& out out << translationId << " ||| "; for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { const Hypothesis &edge = *edges[currEdge]; - OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors); + OutputSurface(out, edge, outputFactorOrder, reportSegmentation, + reportAllFactors); } out << " |||"; // print scores with feature names - path.GetScoreBreakdown()->OutputAllFeatureScores(out); + bool with_labels = options().nbest.include_feature_labels; + path.GetScoreBreakdown()->OutputAllFeatureScores(out, with_labels); // total out << " ||| " << path.GetTotalScore(); @@ -1704,7 +1726,7 @@ void Manager::OutputNBest(std::ostream& out } } - if (StaticData::Instance().IsPathRecoveryEnabled()) { + if (options().output.RecoverPath) { out << " ||| "; OutputInput(out, edges[0]); } @@ -1719,8 +1741,11 @@ void Manager::OutputNBest(std::ostream& out /*** * print surface factor only for the given phrase */ -void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector &outputFactorOrder, - char reportSegmentation, bool reportAllFactors) const +void +Manager:: +OutputSurface(std::ostream &out, const Hypothesis &edge, + const std::vector &outputFactorOrder, + char reportSegmentation, bool reportAllFactors) const { UTIL_THROW_IF2(outputFactorOrder.size() == 0, "Must specific at least 1 output factor"); @@ -1788,26 +1813,33 @@ void Manager::OutputSurface(std::ostream &out, const Hypothesis &edge, const std out << ","; ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown()); scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown()); - scoreBreakdown.OutputAllFeatureScores(out); + bool with_labels = options().nbest.include_feature_labels; + scoreBreakdown.OutputAllFeatureScores(out, with_labels); } out << "| "; } } -void Manager::OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset) const +void +Manager:: +OutputAlignment(ostream &out, const AlignmentInfo &ai, + size_t sourceOffset, size_t targetOffset) const { typedef std::vector< const std::pair* > AlignVec; - AlignVec alignments = ai.GetSortedAlignments(); + AlignVec alignments = ai.GetSortedAlignments(options().output.WA_SortOrder); AlignVec::const_iterator it; for (it = alignments.begin(); it != alignments.end(); ++it) { const std::pair &alignment = **it; - out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " "; + out << alignment.first + sourceOffset << "-" + << alignment.second + targetOffset << " "; } - + } -void Manager::OutputInput(std::ostream& os, const Hypothesis* hypo) const +void +Manager:: +OutputInput(std::ostream& os, const Hypothesis* hypo) const { size_t len = hypo->GetInput().GetSize(); std::vector inp_phrases(len, 0); @@ -1851,8 +1883,10 @@ void Manager::OutputLatticeSamples(OutputCollector *collector) const TrellisPathList latticeSamples; ostringstream out; CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples); - OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), m_source.GetTranslationId(), - staticData.GetReportSegmentation()); + OutputNBest(out,latticeSamples, + staticData.GetOutputFactorOrder(), + m_source.GetTranslationId(), + options().output.ReportSegmentation); collector->Write(m_source.GetTranslationId(), out.str()); } @@ -1970,14 +2004,10 @@ void Manager::OutputSearchGraphSLF() const long translationId = m_source.GetTranslationId(); // Output search graph in HTK standard lattice format (SLF) - bool slf = staticData.GetOutputSearchGraphSLF(); - if (slf) { + std::string const& slf = options().output.SearchGraphSLF; + if (slf.size()) { util::StringStream fileName; - - string dir; - staticData.GetParameter().SetParameter(dir, "output-search-graph-slf", ""); - - fileName << dir << "/" << translationId << ".slf"; + fileName << slf << "/" << translationId << ".slf"; ofstream *file = new ofstream; file->open(fileName.str().c_str()); if (file->is_open() && file->good()) { @@ -2045,7 +2075,11 @@ void Manager::OutputBestHypo(const std::vector& mbrBestHypo, long /*trans out << endl; } -void Manager::OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, char reportSegmentation, bool reportAllFactors, std::ostream &out) const +void +Manager:: +OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, + char reportSegmentation, bool reportAllFactors, + std::ostream &out) const { const std::vector &edges = path.GetEdges(); @@ -2056,9 +2090,12 @@ void Manager::OutputBestHypo(const Moses::TrellisPath &path, long /*translationI out << endl; } -void Manager::OutputAlignment(std::ostringstream &out, const TrellisPath &path) const +void +Manager:: +OutputAlignment(std::ostringstream &out, const TrellisPath &path) const { - Hypothesis::OutputAlignment(out, path.GetEdges()); + WordAlignmentSort waso = options().output.WA_SortOrder; + Hypothesis::OutputAlignment(out, path.GetEdges(), waso); // Used by --alignment-output-file so requires endl out << std::endl; } diff --git a/moses/Manager.h b/moses/Manager.h index dbc1bb738..720dee38b 100644 --- a/moses/Manager.h +++ b/moses/Manager.h @@ -131,7 +131,7 @@ protected: // nbest mutable std::ostringstream m_latticeNBestOut; mutable std::ostringstream m_alignmentOut; - + public: void OutputNBest(std::ostream& out , const Moses::TrellisPathList &nBestList , const std::vector& outputFactorOrder diff --git a/moses/MockHypothesis.cpp b/moses/MockHypothesis.cpp index 11c5d0f84..12527aee9 100644 --- a/moses/MockHypothesis.cpp +++ b/moses/MockHypothesis.cpp @@ -39,16 +39,19 @@ MockHypothesisGuard { BOOST_CHECK_EQUAL(alignments.size(), targetSegments.size()); std::vector factors(1,0); - m_sentence.reset(new Sentence(0, sourceSentence, &factors)); + AllOptions const& opts = StaticData::Instance().options(); + m_sentence.reset(new Sentence(0, sourceSentence, opts, &factors)); m_ttask = TranslationTask::create(m_sentence); m_manager.reset(new Manager(m_ttask)); //Initial empty hypothesis - Bitmaps bitmaps(m_sentence.get()->GetSize(), m_sentence.get()->m_sourceCompleted); + Bitmaps bitmaps(m_sentence.get()->GetSize(), + m_sentence.get()->m_sourceCompleted); m_manager->ResetSentenceStats(*m_sentence); const Bitmap &initBitmap = bitmaps.GetInitialBitmap(); - m_hypothesis = new Hypothesis(*m_manager, *m_sentence, m_initialTransOpt, initBitmap); + m_hypothesis = new Hypothesis(*m_manager, *m_sentence, m_initialTransOpt, + initBitmap); //create the chain vector::const_iterator ai = alignments.begin(); @@ -56,7 +59,8 @@ MockHypothesisGuard for (; ti != targetSegments.end() && ai != alignments.end(); ++ti,++ai) { Hypothesis* prevHypo = m_hypothesis; Range range(ai->first,ai->second); - const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(), range); + const Bitmap &newBitmap = bitmaps.GetBitmap(prevHypo->GetWordsBitmap(), + range); m_targetPhrases.push_back(TargetPhrase(NULL)); // m_targetPhrases.back().CreateFromString(Input, factors, *ti, "|", NULL); diff --git a/moses/Parameter.cpp b/moses/Parameter.cpp index f6058cbd8..206f15e50 100644 --- a/moses/Parameter.cpp +++ b/moses/Parameter.cpp @@ -1620,6 +1620,13 @@ SetParameter(bool ¶meter, std::string const& parameterName, } } +void +Parameter:: +SetParameter(bool& var, std::string const& name) +{ + SetParameter(var,name,false); +} + } // namespace diff --git a/moses/Parameter.h b/moses/Parameter.h index f6e20efc2..5dbe5fd30 100644 --- a/moses/Parameter.h +++ b/moses/Parameter.h @@ -149,6 +149,20 @@ public: } } + void SetParameter(bool& var, std::string const& name); + + bool SetBooleanSwitch(bool& val, std::string const name) { + // issues a warning if format is wrong + const PARAM_VEC *params = GetParam(name); + val = (params && params->size()); + if (val && params->size() != 1) + { + TRACE_ERR("ERROR: wrong format for switch -" << name); + return false; + } + return true; + } + }; template<> diff --git a/moses/ScoreComponentCollection.cpp b/moses/ScoreComponentCollection.cpp index d9810224e..1a9af8e57 100644 --- a/moses/ScoreComponentCollection.cpp +++ b/moses/ScoreComponentCollection.cpp @@ -305,35 +305,38 @@ void ScoreComponentCollection::PlusEquals(const FeatureFunction* sp, const Score } } -void ScoreComponentCollection::OutputAllFeatureScores(std::ostream &out) const +void +ScoreComponentCollection:: +OutputAllFeatureScores(std::ostream &out, bool with_labels) const { std::string lastName = ""; const vector& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions(); for( size_t i=0; iIsTuneable()) { - OutputFeatureScores( out, ff, lastName ); + OutputFeatureScores(out, ff, lastName, with_labels); } } const vector& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions(); for( size_t i=0; iIsTuneable()) { - OutputFeatureScores( out, ff, lastName ); + OutputFeatureScores(out, ff, lastName, with_labels); } } } -void ScoreComponentCollection::OutputFeatureScores( std::ostream& out - , const FeatureFunction *ff - , std::string &lastName ) const +void +ScoreComponentCollection:: +OutputFeatureScores(std::ostream& out, FeatureFunction const* ff, + std::string &lastName, bool with_labels) const { - const StaticData &staticData = StaticData::Instance(); - bool labeledOutput = staticData.options().nbest.include_feature_labels; + // const StaticData &staticData = StaticData::Instance(); + // bool labeledOutput = staticData.options().nbest.include_feature_labels; // regular features (not sparse) if (ff->HasTuneableComponents()) { - if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) { + if( with_labels && lastName != ff->GetScoreProducerDescription() ) { lastName = ff->GetScoreProducerDescription(); out << " " << lastName << "="; } diff --git a/moses/ScoreComponentCollection.h b/moses/ScoreComponentCollection.h index 696658c80..04da0db35 100644 --- a/moses/ScoreComponentCollection.h +++ b/moses/ScoreComponentCollection.h @@ -433,10 +433,9 @@ public: m_scores.merge(other.m_scores); } - void OutputAllFeatureScores(std::ostream &out) const; - void OutputFeatureScores( std::ostream& out - , const Moses::FeatureFunction *ff - , std::string &lastName ) const; + void OutputAllFeatureScores(std::ostream &out, bool with_labels) const; + void OutputFeatureScores(std::ostream& out, Moses::FeatureFunction const* ff, + std::string &lastName, bool with_labels) const; #ifdef MPI_ENABLE public: diff --git a/moses/Sentence.cpp b/moses/Sentence.cpp index f7bc1aeda..4aaf3d069 100644 --- a/moses/Sentence.cpp +++ b/moses/Sentence.cpp @@ -166,7 +166,8 @@ aux_interpret_xml(std::string& line, std::vector & xmlWalls, void Sentence:: -init(string line, std::vector const& factorOrder) +init(string line, std::vector const& factorOrder, + AllOptions const& opts) { using namespace std; const StaticData &SD = StaticData::Instance(); @@ -182,7 +183,8 @@ init(string line, std::vector const& factorOrder) aux_interpret_dlt(line); // some poorly documented cache-based stuff // if sentences is specified as "" - if (SD.IsPassthroughEnabled() || SD.options().nbest.include_passthrough) { + if (SD.options().output.PrintPassThrough || + SD.options().nbest.include_passthrough) { string pthru = PassthroughSGML(line,"passthrough"); this->SetPassthroughInformation(pthru); } @@ -230,12 +232,14 @@ init(string line, std::vector const& factorOrder) int Sentence:: -Read(std::istream& in,const std::vector& factorOrder) +Read(std::istream& in, + const std::vector& factorOrder, + AllOptions const& opts) { std::string line; if (getline(in, line, '\n').eof()) return 0; - init(line, factorOrder); + init(line, factorOrder, opts); return 1; } @@ -366,12 +370,14 @@ CreateFromString(vector const& FOrder, string const& phraseString) } Sentence:: -Sentence(size_t const transId, string const& stext, +Sentence(size_t const transId, + string const& stext, + AllOptions const& opts, vector const* IFO) : InputType(transId) { - if (IFO) init(stext, *IFO); - else init(stext, StaticData::Instance().GetInputFactorOrder()); + if (IFO) init(stext, *IFO, opts); + else init(stext, StaticData::Instance().GetInputFactorOrder(), opts); } } diff --git a/moses/Sentence.h b/moses/Sentence.h index 22ae81ec7..575ae26c1 100644 --- a/moses/Sentence.h +++ b/moses/Sentence.h @@ -1,6 +1,4 @@ -// -*- c++ -*- -// $Id$ - +// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh @@ -28,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "Word.h" #include "Phrase.h" #include "InputType.h" +#include "parameters/AllOptions.h" namespace Moses { @@ -66,7 +65,8 @@ protected: public: Sentence(); Sentence(size_t const transId, std::string const& stext, - std::vector const* IFO = NULL); + AllOptions const& opts, + std::vector const* IFO = NULL); // Sentence(size_t const transId, std::string const& stext); ~Sentence(); @@ -97,7 +97,10 @@ public: void GetXmlTranslationOptions(std::vector &list, size_t startPos, size_t endPos) const; std::vector GetXmlChartTranslationOptions() const; - virtual int Read(std::istream& in,const std::vector& factorOrder); + virtual int + Read(std::istream& in, const std::vector& factorOrder, + AllOptions const& opts); + void Print(std::ostream& out) const; TranslationOptionCollection* @@ -114,7 +117,8 @@ public: void - init(std::string line, std::vector const& factorOrder); + init(std::string line, std::vector const& factorOrder, + AllOptions const& opts); std::vector > const& GetDltMeta() const { diff --git a/moses/StaticData.cpp b/moses/StaticData.cpp index 341e4d0cc..cc4b1ad35 100644 --- a/moses/StaticData.cpp +++ b/moses/StaticData.cpp @@ -63,7 +63,7 @@ StaticData StaticData::s_instance; StaticData::StaticData() : m_sourceStartPosMattersForRecombination(false) , m_requireSortingAfterSourceContext(false) - , m_inputType(SentenceInput) + // , m_inputType(SentenceInput) , m_lmEnableOOVFeature(false) , m_isAlwaysCreateDirectTranslationOption(false) , m_currentWeightSetting("default") @@ -132,23 +132,11 @@ StaticData const PARAM_VEC *params; // input type has to be specified BEFORE loading the phrase tables! - m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput); + // m_parameter->SetParameter(m_inputType, "inputtype", SentenceInput); m_parameter->SetParameter(m_continuePartialTranslation, "continue-partial-translation", false ); - std::string s_it = "text input"; - if (m_inputType == 1) { - s_it = "confusion net"; - } - if (m_inputType == 2) { - s_it = "word lattice"; - } - if (m_inputType == 3) { - s_it = "tree"; - } - VERBOSE(2,"input type is: "<SetParameter(m_xmlInputType, "xml-input", XmlPassThrough); @@ -181,119 +169,30 @@ StaticData m_parameter->SetParameter(m_verboseLevel, "verbose", (size_t) 1); - m_parameter->SetParameter(m_recoverPath, "recover-input-path", false); - if (m_recoverPath && m_inputType == SentenceInput) { - TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n"); - m_recoverPath = false; - } + m_parameter->SetParameter(m_includeLHSInSearchGraph, + "include-lhs-in-search-graph", false ); - m_parameter->SetParameter(m_outputHypoScore, "output-hypo-score", false ); - m_parameter->SetParameter(m_PrintAlignmentInfo, "print-alignment-info", false ); - m_parameter->SetParameter(m_wordAlignmentSort, "sort-word-alignment", NoSort); - params = m_parameter->GetParam("alignment-output-file"); - if (params && params->size()) { - m_alignmentOutputFile = Scan(params->at(0)); - } - - m_parameter->SetParameter( m_PrintID, "print-id", false ); - m_parameter->SetParameter( m_PrintPassthroughInformation, "print-passthrough", false ); - - params = m_parameter->GetParam("output-word-graph"); - m_outputWordGraph = (params && params->size() == 2); - - params = m_parameter->GetParam("output-search-graph"); - if (params && params->size()) { - if (params->size() != 1) { - std::cerr << "ERROR: wrong format for switch -output-search-graph file"; - return false; - } - m_outputSearchGraph = true; - } - // ... in extended format - else if (m_parameter->GetParam("output-search-graph-extended") && - m_parameter->GetParam("output-search-graph-extended")->size()) { - if (m_parameter->GetParam("output-search-graph-extended")->size() != 1) { - std::cerr << "ERROR: wrong format for switch -output-search-graph-extended file"; - return false; - } - m_outputSearchGraph = true; - m_outputSearchGraphExtended = true; - } else { - m_outputSearchGraph = false; - } - - params = m_parameter->GetParam("output-search-graph-slf"); - if (params && params->size()) { - m_outputSearchGraphSLF = true; - } else { - m_outputSearchGraphSLF = false; - } - - params = m_parameter->GetParam("output-search-graph-hypergraph"); - if (params && params->size()) { - m_outputSearchGraphHypergraph = true; - } else { - m_outputSearchGraphHypergraph = false; - } - -#ifdef HAVE_PROTOBUF - params = m_parameter->GetParam("output-search-graph-pb"); - if (params && params->size()) { - if (params->size() != 1) { - cerr << "ERROR: wrong format for switch -output-search-graph-pb path"; - return false; - } - m_outputSearchGraphPB = true; - } else - m_outputSearchGraphPB = false; -#endif - - m_parameter->SetParameter( m_unprunedSearchGraph, "unpruned-search-graph", false ); - m_parameter->SetParameter( m_includeLHSInSearchGraph, "include-lhs-in-search-graph", false ); - - m_parameter->SetParameter(m_outputUnknownsFile, "output-unknowns", ""); - - // printing source phrase spans - m_parameter->SetParameter( m_reportSegmentation, "report-segmentation", false ); - m_parameter->SetParameter( m_reportSegmentationEnriched, "report-segmentation-enriched", false ); - - // print all factors of output translations - m_parameter->SetParameter( m_reportAllFactors, "report-all-factors", false ); + m_parameter->SetParameter(m_outputUnknownsFile, + "output-unknowns", ""); //Print Translation Options - m_parameter->SetParameter(m_printTranslationOptions, "print-translation-option", false ); - + m_parameter->SetParameter(m_printTranslationOptions, + "print-translation-option", false ); + //Print All Derivations - m_parameter->SetParameter(m_printAllDerivations , "print-all-derivations", false ); - - // additional output - m_parameter->SetParameter(m_detailedTranslationReportingFilePath, - "translation-details", ""); - m_parameter->SetParameter(m_detailedTreeFragmentsTranslationReportingFilePath, - "tree-translation-details", ""); - m_parameter->SetParameter(m_detailedAllTranslationReportingFilePath, - "translation-all-details", ""); - m_parameter->SetParameter(m_startTranslationId, "start-translation-id", 0); + m_parameter->SetParameter(m_printAllDerivations , + "print-all-derivations", false ); + + m_parameter->SetParameter(m_startTranslationId, + "start-translation-id", 0); //lattice samples - params = m_parameter->GetParam("lattice-samples"); - if (params) { - if (params->size() ==2 ) { - m_latticeSamplesFilePath = params->at(0); - m_latticeSamplesSize = Scan(params->at(1)); - } else { - std::cerr <<"wrong format for switch -lattice-samples file size"; - return false; - } - } else { - m_latticeSamplesSize = 0; - } return true; } void -StaticData -::ini_compact_table_options() +StaticData:: +ini_compact_table_options() { // Compact phrase table and reordering model m_parameter->SetParameter(m_minphrMemory, "minphr-memory", false ); @@ -301,8 +200,8 @@ StaticData } void -StaticData -::ini_lm_options() +StaticData:: +ini_lm_options() { m_parameter->SetParameter(m_lmcache_cleanup_threshold, "clean-lm-cache", 1); } @@ -349,8 +248,8 @@ StaticData } void -StaticData -::ini_factor_maps() +StaticData:: +ini_factor_maps() { const PARAM_VEC *params; // factor delimiter @@ -380,8 +279,8 @@ StaticData } void -StaticData -::ini_oov_options() +StaticData:: +ini_oov_options() { // unknown word processing m_parameter->SetParameter(m_dropUnknown, "drop-unknown", false ); @@ -398,8 +297,8 @@ StaticData } void -StaticData -::ini_zombie_options() +StaticData:: +ini_zombie_options() { //Disable discarding m_parameter->SetParameter(m_disableDiscarding, "disable-discarding", false); @@ -434,20 +333,6 @@ bool StaticData::LoadData(Parameter *parameter) // search ini_oov_options(); - // set m_nbest_options.enabled = true if necessary: - if (m_options.mbr.enabled - || m_options.mira - || m_options.search.consensus - || m_outputSearchGraph - || m_outputSearchGraphSLF - || m_outputSearchGraphHypergraph -#ifdef HAVE_PROTOBUF - || m_outputSearchGraphPB -#endif - || m_latticeSamplesFilePath.size()) { - m_options.nbest.enabled = true; - } - // S2T decoder m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm", RecursiveCYKPlus); @@ -455,8 +340,9 @@ bool StaticData::LoadData(Parameter *parameter) ini_zombie_options(); // probably dead, or maybe not - m_parameter->SetParameter(m_placeHolderFactor, "placeholder-factor", NOT_FOUND); - + m_parameter->SetParameter(m_placeHolderFactor, "placeholder-factor", + NOT_FOUND); + // FEATURE FUNCTION INITIALIZATION HAPPENS HERE =============================== initialize_features(); @@ -507,7 +393,8 @@ void StaticData::SetWeight(const FeatureFunction* sp, float weight) m_allWeights.Assign(sp,weight); } -void StaticData::SetWeights(const FeatureFunction* sp, const std::vector& weights) +void StaticData::SetWeights(const FeatureFunction* sp, + const std::vector& weights) { m_allWeights.Resize(); m_allWeights.Assign(sp,weights); @@ -557,8 +444,10 @@ void StaticData::LoadChartDecodingParameters() LoadNonTerminals(); // source label overlap - m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap", SourceLabelOverlapAdd); - m_parameter->SetParameter(m_ruleLimit, "rule-limit", DEFAULT_MAX_TRANS_OPT_SIZE); + m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap", + SourceLabelOverlapAdd); + m_parameter->SetParameter(m_ruleLimit, "rule-limit", + DEFAULT_MAX_TRANS_OPT_SIZE); } @@ -596,12 +485,16 @@ void StaticData::LoadDecodeGraphs() } } -void StaticData::LoadDecodeGraphsOld(const vector &mappingVector, const vector &maxChartSpans) +void +StaticData:: +LoadDecodeGraphsOld(const vector &mappingVector, + const vector &maxChartSpans) { const vector& pts = PhraseDictionary::GetColl(); const vector& gens = GenerationDictionary::GetColl(); - const std::vector *featuresRemaining = &FeatureFunction::GetFeatureFunctions(); + const std::vector *featuresRemaining + = &FeatureFunction::GetFeatureFunctions(); DecodeStep *prev = 0; size_t prevDecodeGraphInd = 0; @@ -620,7 +513,8 @@ void StaticData::LoadDecodeGraphsOld(const vector &mappingVector, const // For specifying multiple translation model decodeGraphInd = Scan(token[0]); //the vectorList index can only increment by one - UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd && decodeGraphInd != prevDecodeGraphInd + 1, + UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd + && decodeGraphInd != prevDecodeGraphInd + 1, "Malformed mapping"); if (decodeGraphInd > prevDecodeGraphInd) { prev = NULL; @@ -707,7 +601,8 @@ void StaticData::LoadDecodeGraphsNew(const std::vector &mappingVect decodeGraphInd = Scan(token[0]); //the vectorList index can only increment by one - UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd && decodeGraphInd != prevDecodeGraphInd + 1, + UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd + && decodeGraphInd != prevDecodeGraphInd + 1, "Malformed mapping"); if (decodeGraphInd > prevDecodeGraphInd) { prev = NULL; @@ -783,17 +678,6 @@ void StaticData::ReLoadBleuScoreFeatureParameter(float weight) void StaticData::SetExecPath(const std::string &path) { - /* - namespace fs = boost::filesystem; - - fs::path full_path( fs::initial_path() ); - - full_path = fs::system_complete( fs::path( path ) ); - - //Without file name - m_binPath = full_path.parent_path().string(); - */ - // NOT TESTED size_t pos = path.rfind("/"); if (pos != string::npos) { @@ -810,34 +694,33 @@ const string &StaticData::GetBinDirectory() const float StaticData::GetWeightWordPenalty() const { float weightWP = GetWeight(&WordPenaltyProducer::Instance()); - //VERBOSE(1, "Read weightWP from translation sytem: " << weightWP << std::endl); return weightWP; } void -StaticData -::InitializeForInput(ttasksptr const& ttask) const +StaticData:: +InitializeForInput(ttasksptr const& ttask) const { const std::vector &producers - = FeatureFunction::GetFeatureFunctions(); + = FeatureFunction::GetFeatureFunctions(); for(size_t i=0; i" - // bool m_mbr; //! use MBR decoder - // bool m_useLatticeMBR; //! use MBR decoder - // bool m_mira; // do mira training - // bool m_useConsensusDecoding; //! Use Consensus decoding (DeNero et al 2009) - // size_t m_mbrSize; //! number of translation candidates considered - // float m_mbrScale; //! scaling factor for computing marginal probability of candidate translation - // size_t m_lmbrPruning; //! average number of nodes per word wanted in pruned lattice - // std::vector m_lmbrThetas; //! theta(s) for lattice mbr calculation - // bool m_useLatticeHypSetForLatticeMBR; //! to use nbest as hypothesis set during lattice MBR - // float m_lmbrPrecision; //! unigram precision theta - see Tromble et al 08 for more details - // float m_lmbrPRatio; //! decaying factor for ngram thetas - see Tromble et al 08 for more details - // float m_lmbrMapWeight; //! Weight given to the map solution. See Kumar et al 09 for details - size_t m_lmcache_cleanup_threshold; //! number of translations after which LM claenup is performed (0=never, N=after N translations; default is 1) bool m_lmEnableOOVFeature; @@ -167,15 +141,15 @@ protected: bool m_isAlwaysCreateDirectTranslationOption; //! constructor. only the 1 static variable can be created - bool m_outputWordGraph; //! whether to output word graph - bool m_outputSearchGraph; //! whether to output search graph - bool m_outputSearchGraphExtended; //! ... in extended format - bool m_outputSearchGraphSLF; //! whether to output search graph in HTK standard lattice format (SLF) - bool m_outputSearchGraphHypergraph; //! whether to output search graph in hypergraph + // bool m_outputWordGraph; //! whether to output word graph + // bool m_outputSearchGraph; //! whether to output search graph + // bool m_outputSearchGraphExtended; //! ... in extended format + // bool m_outputSearchGraphSLF; //! whether to output search graph in HTK standard lattice format (SLF) + // bool m_outputSearchGraphHypergraph; //! whether to output search graph in hypergraph #ifdef HAVE_PROTOBUF - bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf + // bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf #endif - bool m_unprunedSearchGraph; //! do not exclude dead ends (chart decoder only) + // bool m_unprunedSearchGraph; //! do not exclude dead ends (chart decoder only) bool m_includeLHSInSearchGraph; //! include LHS of rules in search graph std::string m_outputUnknownsFile; //! output unknowns in this file @@ -190,7 +164,7 @@ protected: Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal; SourceLabelOverlap m_sourceLabelOverlap; UnknownLHSList m_unknownLHS; - WordAlignmentSort m_wordAlignmentSort; + // WordAlignmentSort m_wordAlignmentSort; int m_threadCount; long m_startTranslationId; @@ -229,10 +203,6 @@ protected: const StatefulFeatureFunction* m_treeStructure; - // number of nonterminal labels -// size_t m_nonTerminalSize; - - void ini_compact_table_options(); void ini_consensus_decoding_options(); void ini_cube_pruning_options(); @@ -278,7 +248,8 @@ public: } #endif - //! Load data into static instance. This function is required as LoadData() is not const + //! Load data into static instance. This function is required as + // LoadData() is not const static bool LoadDataStatic(Parameter *parameter, const std::string &execPath); //! Main function to load everything. Also initialize the Parameter object @@ -336,22 +307,6 @@ public: bool IsWordDeletionEnabled() const { return m_wordDeletionEnabled; } - // size_t GetMaxHypoStackSize() const { - // return m_options.search.stack_size; - // } - // size_t GetMinHypoStackDiversity() const { - // return m_options.search.stack_diversity; - // } - - size_t IsPathRecoveryEnabled() const { - return m_recoverPath; - } - bool IsIDEnabled() const { - return m_PrintID; - } - bool IsPassthroughEnabled() const { - return m_PrintPassthroughInformation; - } int GetMaxDistortion() const { return m_options.reordering.max_distortion; @@ -384,47 +339,6 @@ public: void SetVerboseLevel(int x) const { m_verboseLevel = x; } - char GetReportSegmentation() const { - if (m_reportSegmentation) return 1; - if (m_reportSegmentationEnriched) return 2; - return 0; - } - void SetReportSegmentation(const int &val) { - if (val == 0) - m_reportSegmentation = m_reportSegmentationEnriched = false; - else if (val == 1) - m_reportSegmentation = true; - else if (val == 2) - m_reportSegmentationEnriched = true; - else - std::cerr << "Warning: Invalid value for reportSegmentation (0 - 2)! Ignoring"; - } - - bool GetReportAllFactors() const { - return m_reportAllFactors; - } - - bool IsDetailedTranslationReportingEnabled() const { - return !m_detailedTranslationReportingFilePath.empty(); - } - - bool IsDetailedAllTranslationReportingEnabled() const { - return !m_detailedAllTranslationReportingFilePath.empty(); - } - - const std::string &GetDetailedTranslationReportingFilePath() const { - return m_detailedTranslationReportingFilePath; - } - bool IsDetailedTreeFragmentsTranslationReportingEnabled() const { - return !m_detailedTreeFragmentsTranslationReportingFilePath.empty(); - } - const std::string &GetDetailedTreeFragmentsTranslationReportingFilePath() const { - return m_detailedTreeFragmentsTranslationReportingFilePath; - } - - // bool IsLabeledNBestList() const { - // return m_options.nbest.include_feature_labels; - // } bool UseMinphrInMemory() const { return m_minphrMemory; @@ -434,19 +348,6 @@ public: return m_minlexrMemory; } - // for mert - // size_t GetNBestSize() const { - // return m_options.nbest.nbest_size; - // } - - // const std::string &GetNBestFilePath() const { - // return m_options.nbest.output_file_path; - // } - - // bool IsNBestEnabled() const { - // return m_options.nbest.enabled; - // } - size_t GetLatticeSamplesSize() const { return m_latticeSamplesSize; } @@ -455,22 +356,6 @@ public: return m_latticeSamplesFilePath; } - // size_t GetNBestFactor() const { - // return m_options.nbest.factor; - // } - bool GetOutputWordGraph() const { - return m_outputWordGraph; - } - - //! Sets the global score vector weights for a given FeatureFunction. - InputTypeEnum GetInputType() const { - return m_inputType; - } - - // SearchAlgorithm GetSearchAlgorithm() const { - // return m_searchAlgorithm; - // } - bool IsSyntax(SearchAlgorithm algo = DefaultSearchAlgorithm) const { if (algo == DefaultSearchAlgorithm) algo = m_options.search.algo; @@ -577,33 +462,36 @@ public: return m_lmEnableOOVFeature; } - bool GetOutputSearchGraph() const { - return m_outputSearchGraph; - } - void SetOutputSearchGraph(bool outputSearchGraph) { - m_outputSearchGraph = outputSearchGraph; - } - bool GetOutputSearchGraphExtended() const { - return m_outputSearchGraphExtended; - } - bool GetOutputSearchGraphSLF() const { - return m_outputSearchGraphSLF; - } - bool GetOutputSearchGraphHypergraph() const { - return m_outputSearchGraphHypergraph; - } -#ifdef HAVE_PROTOBUF - bool GetOutputSearchGraphPB() const { - return m_outputSearchGraphPB; - } -#endif + // bool GetOutputSearchGraph() const { + // return m_outputSearchGraph; + // } + + // void SetOutputSearchGraph(bool outputSearchGraph) { + // m_outputSearchGraph = outputSearchGraph; + // } + + // bool GetOutputSearchGraphExtended() const { + // return m_outputSearchGraphExtended; + // } + // GetOutputSearchGraphSLF() const { + // return m_outputSearchGraphSLF; + // } + // bool GetOutputSearchGraphHypergraph() const { + // return m_outputSearchGraphHypergraph; + // } + +// #ifdef HAVE_PROTOBUF +// bool GetOutputSearchGraphPB() const { +// return m_outputSearchGraphPB; +// } +// #endif const std::string& GetOutputUnknownsFile() const { return m_outputUnknownsFile; } - bool GetUnprunedSearchGraph() const { - return m_unprunedSearchGraph; - } + // bool GetUnprunedSearchGraph() const { + // return m_unprunedSearchGraph; + // } bool GetIncludeLHSInSearchGraph() const { return m_includeLHSInSearchGraph; @@ -640,9 +528,9 @@ public: return m_sourceLabelOverlap; } - bool GetOutputHypoScore() const { - return m_outputHypoScore; - } + // bool GetOutputHypoScore() const { + // return m_outputHypoScore; + // } size_t GetRuleLimit() const { return m_ruleLimit; } @@ -675,16 +563,16 @@ public: return m_bookkeeping_options.need_alignment_info; // return m_needAlignmentInfo; } - const std::string &GetAlignmentOutputFile() const { - return m_alignmentOutputFile; - } - bool PrintAlignmentInfo() const { - return m_PrintAlignmentInfo; - } + // const std::string &GetAlignmentOutputFile() const { + // return m_alignmentOutputFile; + // } + // bool PrintAlignmentInfo() const { + // return m_PrintAlignmentInfo; + // } - WordAlignmentSort GetWordAlignmentSort() const { - return m_wordAlignmentSort; - } + // WordAlignmentSort GetWordAlignmentSort() const { + // return m_wordAlignmentSort; + // } bool GetHasAlternateWeightSettings() const { return m_weightSetting.size() > 0; diff --git a/moses/Syntax/Manager.cpp b/moses/Syntax/Manager.cpp index 10b0d25c9..9a22b593a 100644 --- a/moses/Syntax/Manager.cpp +++ b/moses/Syntax/Manager.cpp @@ -26,12 +26,12 @@ void Manager::OutputBest(OutputCollector *collector) const const SHyperedge *best = GetBestSHyperedge(); if (best == NULL) { VERBOSE(1, "NO BEST TRANSLATION" << std::endl); - if (StaticData::Instance().GetOutputHypoScore()) { + if (options().output.ReportHypoScore) { out << "0 "; } out << '\n'; } else { - if (StaticData::Instance().GetOutputHypoScore()) { + if (options().output.ReportHypoScore) { out << best->label.score << " "; } Phrase yield = GetOneBestTargetYield(*best); @@ -49,12 +49,10 @@ void Manager::OutputBest(OutputCollector *collector) const void Manager::OutputNBest(OutputCollector *collector) const { if (collector) { - const StaticData &staticData = StaticData::Instance(); long translationId = m_source.GetTranslationId(); - KBestExtractor::KBestVec nBestList; - ExtractKBest(staticData.options().nbest.nbest_size, nBestList, - staticData.options().nbest.only_distinct); + ExtractKBest(options().nbest.nbest_size, nBestList, + options().nbest.only_distinct); OutputNBestList(collector, nBestList, translationId); } } @@ -111,7 +109,8 @@ void Manager::OutputNBestList(OutputCollector *collector, out << translationId << " ||| "; OutputSurface(out, outputPhrase, outputFactorOrder, false); out << " ||| "; - derivation.scoreBreakdown.OutputAllFeatureScores(out); + bool with_labels = options().nbest.include_feature_labels; + derivation.scoreBreakdown.OutputAllFeatureScores(out, with_labels); out << " ||| " << derivation.score; // optionally, print word alignments diff --git a/moses/Syntax/S2T/OovHandler-inl.h b/moses/Syntax/S2T/OovHandler-inl.h index 9f26563d5..3655a0155 100644 --- a/moses/Syntax/S2T/OovHandler-inl.h +++ b/moses/Syntax/S2T/OovHandler-inl.h @@ -66,7 +66,7 @@ template TargetPhrase *OovHandler::SynthesizeTargetPhrase( const Word &oov, const Phrase &srcPhrase, const Word &targetLhs, float prob) { - const StaticData &staticData = StaticData::Instance(); + const StaticData &SD = StaticData::Instance(); const UnknownWordPenaltyProducer &unknownWordPenaltyProducer = UnknownWordPenaltyProducer::Instance(); @@ -82,8 +82,8 @@ TargetPhrase *OovHandler::SynthesizeTargetPhrase( targetPhrase->EvaluateInIsolation(srcPhrase); targetPhrase->SetTargetLHS(&targetLhs); targetPhrase->SetAlignmentInfo("0-0"); - if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || - staticData.GetTreeStructure() != NULL) { + if (!SD.options().output.detailed_tree_transrep_filepath.empty() || + SD.GetTreeStructure() != NULL) { std::string value = "[ " + targetLhs[0]->GetString().as_string() + " " + oov[0]->GetString().as_string() + " ]"; targetPhrase->SetProperty("Tree", value); diff --git a/moses/TabbedSentence.cpp b/moses/TabbedSentence.cpp index ae0876595..74e3de8f2 100644 --- a/moses/TabbedSentence.cpp +++ b/moses/TabbedSentence.cpp @@ -45,7 +45,11 @@ void TabbedSentence::CreateFromString(const std::vector &factorOrder } } -int TabbedSentence::Read(std::istream& in, const std::vector& factorOrder) +int +TabbedSentence:: +Read(std::istream& in, + std::vector const& factorOrder, + AllOptions const& opts) { TabbedColumns allColumns; @@ -58,14 +62,14 @@ int TabbedSentence::Read(std::istream& in, const std::vector& factor if(allColumns.size() < 2) { std::stringstream dummyStream; dummyStream << line << std::endl; - return Sentence::Read(dummyStream, factorOrder); + return Sentence::Read(dummyStream, factorOrder, opts); } else { m_columns.resize(allColumns.size() - 1); std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin()); std::stringstream dummyStream; dummyStream << allColumns[0] << std::endl; - return Sentence::Read(dummyStream, factorOrder); + return Sentence::Read(dummyStream, factorOrder, opts); } } diff --git a/moses/TabbedSentence.h b/moses/TabbedSentence.h index e481e6dac..de08afa0f 100644 --- a/moses/TabbedSentence.h +++ b/moses/TabbedSentence.h @@ -67,7 +67,9 @@ public: virtual void CreateFromString(const std::vector &factorOrder , const std::string &tabbedString); - virtual int Read(std::istream& in,const std::vector& factorOrder); + virtual int + Read(std::istream& in,const std::vector& factorOrder, + AllOptions const& opts); const TabbedColumns& GetColumns() const { return m_columns; diff --git a/moses/TranslationModel/UG/mmsapt.cpp b/moses/TranslationModel/UG/mmsapt.cpp index e86767b46..1110bc16a 100644 --- a/moses/TranslationModel/UG/mmsapt.cpp +++ b/moses/TranslationModel/UG/mmsapt.cpp @@ -867,10 +867,10 @@ namespace Moses boost::unique_lock ctxlock(context->lock); if (localcache) std::cerr << "have local cache " << std::endl; - std::cerr << "BOO at " << HERE << std::endl; + // std::cerr << "BOO at " << HERE << std::endl; if (!localcache) { - std::cerr << "no local cache at " << HERE << std::endl; + // std::cerr << "no local cache at " << HERE << std::endl; setup_bias(ttask); if (context->bias) { diff --git a/moses/TranslationModel/UG/ptable-lookup.cc b/moses/TranslationModel/UG/ptable-lookup.cc index d37097c97..d350d7a3e 100644 --- a/moses/TranslationModel/UG/ptable-lookup.cc +++ b/moses/TranslationModel/UG/ptable-lookup.cc @@ -69,7 +69,7 @@ int main(int argc, char* argv[]) while (true) { boost::shared_ptr phrase(new Sentence); - if (!phrase->Read(cin,ifo)) break; + if (!phrase->Read(cin,ifo, StaticData::Instance().options())) break; boost::shared_ptr ttask; ttask = TranslationTask::create(phrase); if (pdta) diff --git a/moses/TranslationOptionCollectionConfusionNet.cpp b/moses/TranslationOptionCollectionConfusionNet.cpp index 761bc4137..397ca01a5 100644 --- a/moses/TranslationOptionCollectionConfusionNet.cpp +++ b/moses/TranslationOptionCollectionConfusionNet.cpp @@ -1,4 +1,4 @@ -// $Id$ +// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*- #include #include @@ -12,7 +12,7 @@ #include "TranslationModel/PhraseDictionaryTreeAdaptor.h" #include "util/exception.hh" #include - +#include "TranslationTask.h" using namespace std; namespace Moses @@ -41,7 +41,7 @@ TranslationOptionCollectionConfusionNet(ttasksptr const& ttask, size_t inputSize = input.GetSize(); m_inputPathMatrix.resize(inputSize); - size_t maxSizePhrase = StaticData::Instance().GetMaxPhraseLength(); + size_t maxSizePhrase = ttask->options().search.max_phrase_length; maxSizePhrase = std::min(inputSize, maxSizePhrase); // 1-word phrases @@ -234,8 +234,10 @@ CreateTranslationOptionsForRangeLEGACY(const DecodeGraph &decodeGraph, size_t st list ::const_iterator iterStep = decodeGraph.begin(); const DecodeStep &decodeStep = **iterStep; - static_cast(decodeStep).ProcessInitialTranslationLEGACY - (m_source, *oldPtoc, startPos, endPos, adhereTableLimit, inputPathList); + DecodeStepTranslation const& dstep + = static_cast(decodeStep); + dstep.ProcessInitialTransLEGACY(m_source, *oldPtoc, startPos, endPos, + adhereTableLimit, inputPathList); // do rest of decode steps int indexStep = 0; diff --git a/moses/TranslationTask.cpp b/moses/TranslationTask.cpp index 122d8313b..5eab55c82 100644 --- a/moses/TranslationTask.cpp +++ b/moses/TranslationTask.cpp @@ -186,7 +186,8 @@ void TranslationTask::Run() // report thread number #if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS) - VERBOSE(2, "Translating line " << translationId << " in thread id " << pthread_self() << endl); + VERBOSE(2, "Translating line " << translationId << " in thread id " + << pthread_self() << endl); #endif @@ -214,8 +215,8 @@ void TranslationTask::Run() OutputCollector* ocoll; Timer additionalReportingTime; additionalReportingTime.start(); - boost::shared_ptr const& io = m_ioWrapper; + manager->OutputBest(io->GetSingleBestOutputCollector()); // output word graph @@ -229,7 +230,7 @@ void TranslationTask::Run() // Output search graph in hypergraph format for Kenneth Heafield's // lazy hypergraph decoder; writes to stderr - if (StaticData::Instance().GetOutputSearchGraphHypergraph()) { + if (options().output.SearchGraphHG.size()) { size_t transId = manager->GetSource().GetTranslationId(); string fname = io->GetHypergraphOutputFileName(transId); manager->OutputSearchGraphAsHypergraph(fname, PRECISION); diff --git a/moses/TreeInput.cpp b/moses/TreeInput.cpp index 89ce0ba65..9d384deb3 100644 --- a/moses/TreeInput.cpp +++ b/moses/TreeInput.cpp @@ -237,7 +237,10 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector } //! populate this InputType with data from in stream -int TreeInput::Read(std::istream& in,const std::vector& factorOrder) +int +TreeInput:: +Read(std::istream& in, const std::vector& factorOrder, + AllOptions const& opts) { const StaticData &staticData = StaticData::Instance(); @@ -254,8 +257,8 @@ int TreeInput::Read(std::istream& in,const std::vector& factorOrder) stringstream strme; strme << line << endl; - Sentence::Read(strme, factorOrder); - + Sentence::Read(strme, factorOrder, opts); + // size input chart size_t sourceSize = GetSize(); m_sourceChart.resize(sourceSize); diff --git a/moses/TreeInput.h b/moses/TreeInput.h index 2716831cc..2116334c1 100644 --- a/moses/TreeInput.h +++ b/moses/TreeInput.h @@ -53,7 +53,10 @@ public: } //! populate this InputType with data from in stream - virtual int Read(std::istream& in,const std::vector& factorOrder); + virtual int + Read(std::istream& in, + const std::vector& factorOrder, + AllOptions const& opts); //! Output debugging info to stream out virtual void Print(std::ostream&) const; diff --git a/moses/WordLattice.cpp b/moses/WordLattice.cpp index 51ef4be9e..e597affc5 100644 --- a/moses/WordLattice.cpp +++ b/moses/WordLattice.cpp @@ -147,7 +147,11 @@ InitializeFromPCNDataType return !cn.empty(); } -int WordLattice::Read(std::istream& in,const std::vector& factorOrder) +int +WordLattice:: +Read(std::istream& in, + std::vector const& factorOrder, + AllOptions const& opts) { Clear(); std::string line; diff --git a/moses/WordLattice.h b/moses/WordLattice.h index 70b1602f2..992fb9498 100644 --- a/moses/WordLattice.h +++ b/moses/WordLattice.h @@ -43,8 +43,10 @@ public: int InitializeFromPCNDataType(const PCN::CN& cn, const std::vector& factorOrder, const std::string& debug_line = ""); /** Read from PLF format (1 lattice per line) */ - int Read(std::istream& in,const std::vector& factorOrder); - + int Read(std::istream& in, + std::vector const& factorOrder, + AllOptions const& opts); + /** Convert internal representation into an edge matrix * @note edges[1][2] means there is an edge from 1 to 2 */ diff --git a/moses/parameters/AllOptions.cpp b/moses/parameters/AllOptions.cpp index 32a2bcd36..3f481e77d 100644 --- a/moses/parameters/AllOptions.cpp +++ b/moses/parameters/AllOptions.cpp @@ -21,6 +21,7 @@ namespace Moses if (!input.init(param)) return false; if (!mbr.init(param)) return false; if (!lmbr.init(param)) return false; + if (!output.init(param)) return false; param.SetParameter(mira, "mira", false); @@ -45,12 +46,31 @@ namespace Moses { if (mbr.enabled) { - cerr << "Error: Cannot use consensus decoding together with mbr" << endl; + cerr << "Error: Cannot use consensus decoding together with mbr" + << endl; return false; } mbr.enabled = true; } + // RecoverPath should only be used with confusion net or word lattice input + if (output.RecoverPath && input.input_type == SentenceInput) + { + TRACE_ERR("--recover-input-path should only be used with " + <<"confusion net or word lattice input!\n"); + output.RecoverPath = false; + } + + // set m_nbest_options.enabled = true if necessary: + nbest.enabled = (nbest.enabled || mira || search.consensus + || nbest.nbest_size > 0 + || !output.SearchGraph.empty() + || !output.SearchGraphExtended.empty() + || !output.SearchGraphSLF.empty() + || !output.SearchGraphHG.empty() + || !output.SearchGraphPB.empty() + || output.lattice_sample_size != 0); + return true; } @@ -67,9 +87,24 @@ namespace Moses if (!input.update(param)) return false; if (!mbr.update(param)) return false; if (!lmbr.update(param)) return false; - return true; + if (!output.update(param)) return false; + return sanity_check(); } + #endif + bool + AllOptions:: + NBestDistinct() const + { + return (nbest.only_distinct + || mbr.enabled || lmbr.enabled + || output.lattice_sample_size + || !output.SearchGraph.empty() + || !output.SearchGraphExtended.empty() + || !output.SearchGraphSLF.empty() + || !output.SearchGraphHG.empty()); + } + } diff --git a/moses/parameters/AllOptions.h b/moses/parameters/AllOptions.h index 5f9949a76..c5f6e44e2 100644 --- a/moses/parameters/AllOptions.h +++ b/moses/parameters/AllOptions.h @@ -11,6 +11,7 @@ #include "InputOptions.h" #include "MBR_Options.h" #include "LMBR_Options.h" +#include "ReportingOptions.h" namespace Moses { struct @@ -24,7 +25,7 @@ namespace Moses InputOptions input; MBR_Options mbr; LMBR_Options lmbr; - + ReportingOptions output; bool mira; // StackOptions stack; @@ -38,6 +39,8 @@ namespace Moses bool update(std::mapconst& param); #endif + bool NBestDistinct() const; + }; } diff --git a/moses/parameters/NBestOptions.cpp b/moses/parameters/NBestOptions.cpp index d61a67c2f..e916c3437 100644 --- a/moses/parameters/NBestOptions.cpp +++ b/moses/parameters/NBestOptions.cpp @@ -1,4 +1,4 @@ -// -*- mode: c++; cc-style: gnu -*- +// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- #include "moses/Parameter.h" #include "NBestOptions.h" @@ -33,4 +33,21 @@ init(Parameter const& P) enabled = output_file_path.size(); return true; } + +#ifdef HAVE_XMLRPC_C +bool +NBestOptions:: +update(std::mapconst& param) +{ + typedef std::map params_t; + params_t::const_iterator si = param.find("nbest"); + if (si != param.end()) + nbest_size = xmlrpc_c::value_int(si->second); + only_distinct = check(param, "nbest-distinct"); + enabled = (nbest_size > 0); + return true; +} +#endif + + } // namespace Moses diff --git a/moses/parameters/NBestOptions.h b/moses/parameters/NBestOptions.h index 61e3c9806..05da48508 100644 --- a/moses/parameters/NBestOptions.h +++ b/moses/parameters/NBestOptions.h @@ -24,6 +24,10 @@ struct NBestOptions : public OptionsBaseClass bool init(Parameter const& param); +#ifdef HAVE_XMLRPC_C + bool update(std::mapconst& param); +#endif + }; } diff --git a/moses/parameters/OptionsBaseClass.cpp b/moses/parameters/OptionsBaseClass.cpp index e0b23babf..0bb914417 100644 --- a/moses/parameters/OptionsBaseClass.cpp +++ b/moses/parameters/OptionsBaseClass.cpp @@ -10,6 +10,16 @@ namespace Moses { return true; } + + bool + OptionsBaseClass:: + check(std::map const& param, + std::string const key) + { + std::map::const_iterator m; + return (param.find(key) != param.end()); + } + #endif } diff --git a/moses/parameters/OptionsBaseClass.h b/moses/parameters/OptionsBaseClass.h index cb62467cf..55713a174 100644 --- a/moses/parameters/OptionsBaseClass.h +++ b/moses/parameters/OptionsBaseClass.h @@ -12,6 +12,10 @@ namespace Moses #ifdef HAVE_XMLRPC_C virtual bool update(std::mapconst& params); + + bool + check(std::map const& param, + std::string const key); #endif }; } diff --git a/moses/parameters/ReportingOptions.cpp b/moses/parameters/ReportingOptions.cpp index 25ae2f779..fa06eb06f 100644 --- a/moses/parameters/ReportingOptions.cpp +++ b/moses/parameters/ReportingOptions.cpp @@ -1,5 +1,4 @@ // -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- -#if 0 #include "ReportingOptions.h" #include "moses/Parameter.h" @@ -9,82 +8,70 @@ namespace Moses { ReportingOptions:: init(Parameter const& param) { + // including factors in the output + param.SetParameter(ReportAllFactors, "report-all-factors", false); + + // segmentation reporting + ReportSegmentation = (param.GetParam("report-segmentation-enriched") + ? 2 : param.GetParam("report-segmentation") + ? 1 : 0); + + // word alignment reporting + param.SetParameter(PrintAlignmentInfo, "print-alignment-info", false); + param.SetParameter(WA_SortOrder, "sort-word-alignment", NoSort); + std::string e; // hack to save us param.SetParameter(...) + param.SetParameter(AlignmentOutputFile,"alignment-output-file", e); + + // output a word graph PARAM_VEC const* params; - - param.SetParameter(segmentation, "report-segmentation", false ); - param.SetParameter(segmentation_enriched, "report-segmentation-enriched", false); - param.SetParameter(all_factors, "report-all-factors", false ); - - // print ... - param.SetParameter(id, "print-id", false ); - param.SetParameter(aln_info, "print-alignment-info", false); - param.SetParameter(passthrough, "print-passthrough", false ); - - param.SetParameter(detailed_transrep_filepath, "translation-details", ""); - param.SetParameter(detailed_tree_transrep_filepath, - "tree-translation-details", ""); - param.SetParameter(detailed_all_transrep_filepath, - "translation-all-details", ""); - - // output search graph - param.SetParameter(output, - "translation-all-details", ""); - - - - param.SetParameter(sort_word_alignment, "sort-word-alignment", NoSort); - - - // Is there a reason why we can't use SetParameter here? [UG] - = param.GetParam("alignment-output-file"); - if (params && params->size()) { - m_alignmentOutputFile = Scan(params->at(0)); - } - params = param.GetParam("output-word-graph"); - output_word_graph = (params && params->size() == 2); - - // bizarre code ahead! Why do we need to do the checks here? - // as adapted from StaticData.cpp - params = param.GetParam("output-search-graph"); - if (params && params->size()) { - if (params->size() != 1) { - std::cerr << "ERROR: wrong format for switch -output-search-graph file"; - return false; - } - output_search_graph = true; - } - else if (m_parameter->GetParam("output-search-graph-extended") && - m_parameter->GetParam("output-search-graph-extended")->size()) { - if (m_parameter->GetParam("output-search-graph-extended")->size() != 1) { - std::cerr << "ERROR: wrong format for switch -output-search-graph-extended file"; - return false; - } - output_search_graph = true; - m_outputSearchGraphExtended = true; - } else { - m_outputSearchGraph = false; - } - - params = m_parameter->GetParam("output-search-graph-slf"); - output_search_graph_slf = params && params->size(); - params = m_parameter->GetParam("output-search-graph-hypergraph"); - output_search_graph_hypergraph = params && params->size(); + WordGraph = (params && params->size() == 2); // what are the two options? + // dump the search graph + param.SetParameter(SearchGraph, "output-search-graph", e); + param.SetParameter(SearchGraphExtended, "output-search-graph-extended", e); + param.SetParameter(SearchGraphSLF,"output-search-graph-slf", e); + param.SetParameter(SearchGraphHG, "output-search-graph-hypergraph", e); #ifdef HAVE_PROTOBUF - params = m_parameter->GetParam("output-search-graph-pb"); - if (params && params->size()) { - if (params->size() != 1) { - cerr << "ERROR: wrong format for switch -output-search-graph-pb path"; - return false; - } - m_outputSearchGraphPB = true; - } else - m_outputSearchGraphPB = false; + param.SetParameter(SearchGraphPB, "output-search-graph-pb", e); #endif + param.SetParameter(DontPruneSearchGraph, "unpruned-search-graph", false); + + + // miscellaneous + param.SetParameter(RecoverPath, "recover-input-path",false); + param.SetParameter(ReportHypoScore, "output-hypo-score",false); + param.SetParameter(PrintID, "print-id",false); + param.SetParameter(PrintPassThrough, "print-passthrough",false); + param.SetParameter(detailed_all_transrep_filepath, + "translation-all-details", e); + param.SetParameter(detailed_transrep_filepath, "translation-details", e); + param.SetParameter(detailed_tree_transrep_filepath, + "tree-translation-details", e); + params = param.GetParam("lattice-samples"); + if (params) { + if (params->size() ==2 ) { + lattice_sample_filepath = params->at(0); + lattice_sample_size = Scan(params->at(1)); + } else { + std::cerr <<"wrong format for switch -lattice-samples file size"; + return false; + } + } else { + lattice_sample_size = 0; + } + return true; + } + +#ifdef HAVE_XMLRPC_C + bool + ReportingOptions:: + update(std::mapconst& param) + { + ReportAllFactors = check(param, "report-all-factors"); return true; } -} #endif +} diff --git a/moses/parameters/ReportingOptions.h b/moses/parameters/ReportingOptions.h index 343bd58ec..0c4c2ac58 100644 --- a/moses/parameters/ReportingOptions.h +++ b/moses/parameters/ReportingOptions.h @@ -2,40 +2,59 @@ #pragma once #include #include "moses/Parameter.h" +#include "OptionsBaseClass.h" + namespace Moses { struct - ReportingOptions + ReportingOptions : public OptionsBaseClass { + bool ReportAllFactors; // m_reportAllFactors; - WordAlignmentSort sort_word_alignment; // 0: no, 1: target order + int ReportSegmentation; // 0: no 1: m_reportSegmentation 2: ..._enriched - - bool segmentation; // m_reportSegmentation; - bool segmentation_enriched; // m_reportSegmentationEnriched; - bool all_factors; // m_reportAllFactors; + bool PrintAlignmentInfo; // m_PrintAlignmentInfo + WordAlignmentSort WA_SortOrder; // 0: no, 1: target order + std::string AlignmentOutputFile; - bool output_word_graph; - bool output_search_graph; - bool output_search_graph_extended; - bool output_search_graph_slf; - bool output_search_graph_hypergraph; - bool output_search_graph_protobuf; + bool WordGraph; + + std::string SearchGraph; + std::string SearchGraphExtended; + std::string SearchGraphSLF; + std::string SearchGraphHG; + std::string SearchGraphPB; + bool DontPruneSearchGraph; + + bool RecoverPath; // recover input path? + bool ReportHypoScore; + + bool PrintID; + bool PrintPassThrough; // print .. bool aln_info; // m_PrintAlignmentInfo; - bool id; // m_PrintID; - bool passthrough; // m_PrintPassthroughInformation; // transrep = translation reporting std::string detailed_transrep_filepath; std::string detailed_tree_transrep_filepath; std::string detailed_all_transrep_filepath; - - std::string aln_output_file; // m_alignmentOutputFile; + + std::string lattice_sample_filepath; + size_t lattice_sample_size; bool init(Parameter const& param); + + /// do we need to keep the search graph from decoding? + bool NeedSearchGraph() const { + return !(SearchGraph.empty() && SearchGraphExtended.empty()); + } + +#ifdef HAVE_XMLRPC_C + bool update(std::mapconst& param); +#endif + }; } diff --git a/moses/parameters/SearchOptions.cpp b/moses/parameters/SearchOptions.cpp index 6636ba744..39ac64515 100644 --- a/moses/parameters/SearchOptions.cpp +++ b/moses/parameters/SearchOptions.cpp @@ -36,6 +36,7 @@ namespace Moses beam_width = TransformScore(beam_width); trans_opt_threshold = TransformScore(trans_opt_threshold); early_discarding_threshold = TransformScore(early_discarding_threshold); + return true; } diff --git a/moses/server/TranslationRequest.cpp b/moses/server/TranslationRequest.cpp index 09d21c322..085d16622 100644 --- a/moses/server/TranslationRequest.cpp +++ b/moses/server/TranslationRequest.cpp @@ -25,8 +25,7 @@ using Moses::Sentence; boost::shared_ptr TranslationRequest:: create(Translator* translator, xmlrpc_c::paramList const& paramList, - boost::condition_variable& cond, - boost::mutex& mut) + boost::condition_variable& cond, boost::mutex& mut) { boost::shared_ptr ret; ret.reset(new TranslationRequest(paramList, cond, mut)); @@ -60,10 +59,9 @@ Run() Moses::StaticData const& SD = Moses::StaticData::Instance(); //Make sure alternative paths are retained, if necessary - if (m_withGraphInfo || m_nbestSize>0) - // why on earth is this a global variable? Is this even thread-safe???? UG - (const_cast(SD)).SetOutputSearchGraph(true); - + // if (m_withGraphInfo || m_nbestSize>0) + // why on earth is this a global variable? Is this even thread-safe???? UG + // (const_cast(SD)).SetOutputSearchGraph(true); // std::stringstream out, graphInfo, transCollOpts; if (SD.IsSyntax()) @@ -170,7 +168,14 @@ outputNBest(const Manager& manager, map& retData) { TrellisPathList nBestList; vector nBestXml; - manager.CalcNBest(m_nbestSize, nBestList, m_nbestDistinct); + manager.CalcNBest(m_options.nbest.nbest_size, nBestList, + m_options.nbest.only_distinct); + + StaticData const& SD = StaticData::Instance(); + manager.OutputNBest(cout, nBestList, + SD.GetOutputFactorOrder(), + m_source->GetTranslationId(), + options().output.ReportSegmentation); BOOST_FOREACH(Moses::TrellisPath const* path, nBestList) { vector const& E = path->GetEdges(); @@ -180,7 +185,8 @@ outputNBest(const Manager& manager, map& retData) if (m_withScoreBreakdown) { // should the score breakdown be reported in a more structured manner? ostringstream buf; - path->GetScoreBreakdown()->OutputAllFeatureScores(buf); + bool with_labels = m_options.nbest.include_feature_labels; + path->GetScoreBreakdown()->OutputAllFeatureScores(buf, with_labels); nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str()); } @@ -228,23 +234,23 @@ insertTranslationOptions(Moses::Manager& manager, retData["topt"] = xmlrpc_c::value_array(toptsXml); } -bool -check(std::map const& params, std::string const key) -{ - std::map::const_iterator m; - return (params.find(key) != params.end()); -} - TranslationRequest:: TranslationRequest(xmlrpc_c::paramList const& paramList, boost::condition_variable& cond, boost::mutex& mut) : m_cond(cond), m_mutex(mut), m_done(false), m_paramList(paramList) - , m_nbestSize(0) + // , m_nbestSize(0) , m_session_id(0) { m_options = StaticData::Instance().options(); } +bool +check(std::map const& param, + std::string const key) +{ + std::map::const_iterator m; + return (param.find(key) != param.end()); +} void TranslationRequest:: @@ -274,10 +280,9 @@ parse_request(std::map const& params) m_withWordAlignInfo = check(params, "word-align"); m_withGraphInfo = check(params, "sg"); m_withTopts = check(params, "topt"); - m_reportAllFactors = check(params, "report-all-factors"); - m_nbestDistinct = check(params, "nbest-distinct"); + // m_reportAllFactors = check(params, "report-all-factors"); + // m_nbestDistinct = check(params, "nbest-distinct"); m_withScoreBreakdown = check(params, "add-score-breakdown"); - m_source.reset(new Sentence(0,m_source_string)); si = params.find("lambda"); if (si != params.end()) { @@ -298,9 +303,9 @@ parse_request(std::map const& params) } } - si = params.find("nbest"); - if (si != params.end()) - m_nbestSize = xmlrpc_c::value_int(si->second); + // si = params.find("nbest"); + // if (si != params.end()) + // m_nbestSize = xmlrpc_c::value_int(si->second); si = params.find("context"); if (si != params.end()) @@ -309,6 +314,8 @@ parse_request(std::map const& params) VERBOSE(1,"CONTEXT " << context); m_context.reset(new std::vector(1,context)); } + + // // biased sampling for suffix-array-based sampling phrase table? // if ((si = params.find("bias")) != params.end()) // { @@ -317,6 +324,7 @@ parse_request(std::map const& params) // for (size_t i = 1; i < tmp.size(); i += 2) // m_bias[xmlrpc_c::value_int(tmp[i-1])] = xmlrpc_c::value_double(tmp[i]); // } + m_source.reset(new Sentence(0,m_source_string,m_options)); } // end of Translationtask::parse_request() @@ -326,8 +334,8 @@ run_chart_decoder() { Moses::TreeInput tinput; istringstream buf(m_source_string + "\n"); - tinput.Read(buf, StaticData::Instance().GetInputFactorOrder()); - + tinput.Read(buf, StaticData::Instance().GetInputFactorOrder(), m_options); + Moses::ChartManager manager(this->self()); manager.Decode(); @@ -393,8 +401,13 @@ void TranslationRequest:: run_phrase_decoder() { + if (m_withGraphInfo || m_options.nbest.nbest_size>0) + m_options.output.SearchGraph = "true"; + Manager manager(this->self()); // if (m_bias.size()) manager.SetBias(&m_bias); + + manager.Decode(); pack_hypothesis(manager.GetBestHypothesis(), "text", m_retData); @@ -403,10 +416,10 @@ run_phrase_decoder() if (m_withGraphInfo) insertGraphInfo(manager,m_retData); if (m_withTopts) insertTranslationOptions(manager,m_retData); - if (m_nbestSize) outputNBest(manager, m_retData); + if (m_options.nbest.nbest_size) outputNBest(manager, m_retData); - (const_cast(Moses::StaticData::Instance())) - .SetOutputSearchGraph(false); + // (const_cast(Moses::StaticData::Instance())) + // .SetOutputSearchGraph(false); // WTF? one more reason not to have this as global variable! --- UG } diff --git a/moses/server/TranslationRequest.h b/moses/server/TranslationRequest.h index 866eca20e..9736a7a8f 100644 --- a/moses/server/TranslationRequest.h +++ b/moses/server/TranslationRequest.h @@ -43,9 +43,9 @@ TranslationRequest : public virtual Moses::TranslationTask bool m_withGraphInfo; bool m_withTopts; bool m_reportAllFactors; - bool m_nbestDistinct; + // bool m_nbestDistinct; bool m_withScoreBreakdown; - size_t m_nbestSize; + // size_t m_nbestSize; uint64_t m_session_id; // 0 means none, 1 means new