From 59c4ba9f4df5f5266a79ddf983fce1b7afa7622b Mon Sep 17 00:00:00 2001 From: lexi_birch Date: Fri, 16 Feb 2007 15:56:44 +0000 Subject: [PATCH] Merging from branch. From now on, can have multiple decoder step lists to accomodate backoff Specify this as an extra parameter in the [mapping] option in the ini file This is backwards compatible. Before (and still accepted): [mapping] T 0 Now you can have: [mapping] 0 T 0 1 T 1 1 G 0 Imagine for instance the translation table 0 is words - words, and the table 1 is stems - stems, and the generation table 0 is stems - words. This will allow us to backoff to stems if words are not found. It is not really backoff because all the options from both decoder step lists get included into the translation option collection, which is then used to create the hypotheses. The different paths must have their weights carefully balanced. MERT might not be enough to discover the best weights for all the combined parameters. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1217 1f5c12ca-751b-0410-a591-d2e778427230 --- moses/src/Manager.cpp | 5 +- moses/src/StaticData.cpp | 120 +++++++++++++++------- moses/src/StaticData.h | 7 +- moses/src/TranslationOptionCollection.cpp | 64 +++++++++--- moses/src/TranslationOptionCollection.h | 23 +---- 5 files changed, 144 insertions(+), 75 deletions(-) diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp index 0f23194bb..70e8cd8d9 100755 --- a/moses/src/Manager.cpp +++ b/moses/src/Manager.cpp @@ -69,13 +69,14 @@ Manager::~Manager() void Manager::ProcessSentence() { m_staticData.ResetSentenceStats(m_source); - list < DecodeStep* > &decodeStepList = m_staticData.GetDecodeStepList(); + vector < list < DecodeStep* > * >&decodeStepVL = m_staticData.GetDecodeStepVL(); + // create list of all possible translations // this is only valid if: // 1. generation of source sentence is not done 1st // 2. initial hypothesis factors are given in the sentence //CreateTranslationOptions(m_source, phraseDictionary, lmListInitial); - m_possibleTranslations->CreateTranslationOptions(decodeStepList + m_possibleTranslations->CreateTranslationOptions(decodeStepVL , m_staticData.GetFactorCollection()); // initial seed hypothesis: nothing translated, no words produced diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp index e57df95cf..c1100207a 100755 --- a/moses/src/StaticData.cpp +++ b/moses/src/StaticData.cpp @@ -240,7 +240,29 @@ StaticData::~StaticData() RemoveAllInColl(m_phraseDictionary); RemoveAllInColl(m_generationDictionary); RemoveAllInColl(m_languageModel); - RemoveAllInColl(m_decodeStepList); + //need to delete lists within vector as well + while (! m_decodeStepVL.empty() ) + { + list * ptrList = m_decodeStepVL.back(); + m_decodeStepVL.pop_back(); + while( ! ptrList->empty() ) + { + DecodeStep * ptrDecodeStep = ptrList->back(); + ptrList->pop_back(); + if (ptrDecodeStep != NULL) + { + delete ptrDecodeStep; + ptrDecodeStep = NULL; + } + } + //cout << "list size " << ptrList->size() << endl; + if (ptrList != NULL) + { + delete ptrList; + ptrList = NULL; + } + } + RemoveAllInColl(m_reorderModels); // small score producers @@ -642,48 +664,76 @@ bool StaticData::LoadMapping() // mapping const vector &mappingVector = m_parameter->GetParam("mapping"); DecodeStep *prev = 0; + size_t previousVectorList = 0; for(size_t i=0; i token = Tokenize(mappingVector[i]); + size_t vectorList; + DecodeType decodeType; + size_t index; if (token.size() == 2) { - DecodeType decodeType = token[0] == "T" ? Translate : Generate; - size_t index = Scan(token[1]); - DecodeStep* decodeStep = 0; - switch (decodeType) { - case Translate: - if(index>=m_phraseDictionary.size()) - { - stringstream strme; - strme << "No phrase dictionary with index " - << index << " available!"; - UserMessage::Add(strme.str()); - return false; - } - decodeStep = new DecodeStepTranslation(m_phraseDictionary[index], prev); - break; - case Generate: - if(index>=m_generationDictionary.size()) - { - stringstream strme; - strme << "No generation dictionary with index " - << index << " available!"; - UserMessage::Add(strme.str()); - return false; - } - decodeStep = new DecodeStepGeneration(m_generationDictionary[index], prev); - break; - case InsertNullFertilityWord: - assert(!"Please implement NullFertilityInsertion."); - break; - } - assert(decodeStep); - m_decodeStepList.push_back(decodeStep); - prev = decodeStep; - } else { + vectorList = 0; + decodeType = token[0] == "T" ? Translate : Generate; + index = Scan(token[1]); + } + //Smoothing + else if (token.size() == 3) + { + vectorList = Scan(token[0]); + //the vectorList index can only increment by one + assert(vectorList == previousVectorList || vectorList == previousVectorList + 1); + if (vectorList > previousVectorList) + { + prev = NULL; + } + decodeType = token[1] == "T" ? Translate : Generate; + index = Scan(token[2]); + } + else + { UserMessage::Add("Malformed mapping!"); return false; } + + DecodeStep* decodeStep = 0; + switch (decodeType) { + case Translate: + if(index>=m_phraseDictionary.size()) + { + stringstream strme; + strme << "No phrase dictionary with index " + << index << " available!"; + UserMessage::Add(strme.str()); + return false; + } + decodeStep = new DecodeStepTranslation(m_phraseDictionary[index], prev); + break; + case Generate: + if(index>=m_generationDictionary.size()) + { + stringstream strme; + strme << "No generation dictionary with index " + << index << " available!"; + UserMessage::Add(strme.str()); + return false; + } + decodeStep = new DecodeStepGeneration(m_generationDictionary[index], prev); + break; + case InsertNullFertilityWord: + assert(!"Please implement NullFertilityInsertion."); + break; + } + assert(decodeStep); + list < DecodeStep *> * decodeList=NULL; + if (m_decodeStepVL.size() < vectorList + 1) + { + decodeList = new list < DecodeStep *>; + m_decodeStepVL.push_back(decodeList); + } + m_decodeStepVL[vectorList]->push_back(decodeStep); + prev = decodeStep; + previousVectorList = vectorList; } return true; diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h index 18feafa33..3f15773a4 100755 --- a/moses/src/StaticData.h +++ b/moses/src/StaticData.h @@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include #include +#include #include #include "TypeDef.h" #include "ScoreIndexManager.h" @@ -50,7 +51,7 @@ protected: FactorCollection m_factorCollection; std::vector m_phraseDictionary; std::vector m_generationDictionary; - std::list < DecodeStep* > m_decodeStepList; + std::vector < std::list < DecodeStep*> * > m_decodeStepVL; Parameter *m_parameter; std::vector m_inputFactorOrder, m_outputFactorOrder; LMList m_languageModel; @@ -152,9 +153,9 @@ public: return m_outputFactorOrder; } - std::list < DecodeStep* > &GetDecodeStepList() + std::vector < std::list < DecodeStep* > * > &GetDecodeStepVL() { - return m_decodeStepList; + return m_decodeStepVL; } inline bool GetSourceStartPosMattersForRecombination() const diff --git a/moses/src/TranslationOptionCollection.cpp b/moses/src/TranslationOptionCollection.cpp index 9139e2898..83335feb8 100644 --- a/moses/src/TranslationOptionCollection.cpp +++ b/moses/src/TranslationOptionCollection.cpp @@ -125,19 +125,24 @@ void TranslationOptionCollection::Prune() * \param factorCollection input sentence with all factors */ -void TranslationOptionCollection::ProcessUnknownWord(const std::list < DecodeStep* > &decodeStepList, FactorCollection &factorCollection) +void TranslationOptionCollection::ProcessUnknownWord(const std::vector < std::list < DecodeStep* > * > &decodeStepVL + , FactorCollection &factorCollection) { size_t size = m_source.GetSize(); // try to translation for coverage with no trans by expanding table limit - for (size_t pos = 0 ; pos < size ; ++pos) + for (size_t startVL = 0 ; startVL < decodeStepVL.size() ; startVL++) { - TranslationOptionList &fullList = GetTranslationOptionList(pos, pos); - size_t numTransOpt = fullList.size(); - if (numTransOpt == 0) - { - CreateTranslationOptionsForRange(decodeStepList, factorCollection - , pos, pos, false); - } + const list < DecodeStep* > * decodeStepList = decodeStepVL[startVL]; + for (size_t pos = 0 ; pos < size ; ++pos) + { + TranslationOptionList &fullList = GetTranslationOptionList(pos, pos); + size_t numTransOpt = fullList.size(); + if (numTransOpt == 0) + { + CreateTranslationOptionsForRange(*decodeStepList, factorCollection + , pos, pos, false); + } + } } // create unknown words for 1 word coverage where we don't have any trans options @@ -314,7 +319,7 @@ void TranslationOptionCollection::CalcFutureScore() * \param decodeStepList list of decoding steps * \param factorCollection input sentence with all factors */ -void TranslationOptionCollection::CreateTranslationOptions(const list < DecodeStep* > &decodeStepList +void TranslationOptionCollection::CreateTranslationOptions(const vector * > &decodeStepVL , FactorCollection &factorCollection) { m_factorCollection = &factorCollection; @@ -323,16 +328,21 @@ void TranslationOptionCollection::CreateTranslationOptions(const list < DecodeSt // in the phraseDictionary (which is the- possibly filtered-- phrase // table loaded on initialization), generate TranslationOption objects // for all phrases - - for (size_t startPos = 0 ; startPos < m_source.GetSize() ; startPos++) + for (size_t startVL = 0 ; startVL < decodeStepVL.size() ; startVL++) { - for (size_t endPos = startPos ; endPos < m_source.GetSize() ; endPos++) + const list < DecodeStep* > * decodeStepList = decodeStepVL[startVL]; + for (size_t startPos = 0 ; startPos < m_source.GetSize() ; startPos++) { - CreateTranslationOptionsForRange( decodeStepList, factorCollection, startPos, endPos, true); + for (size_t endPos = startPos ; endPos < m_source.GetSize() ; endPos++) + { + CreateTranslationOptionsForRange( *decodeStepList, factorCollection, startPos, endPos, true); + } } } - ProcessUnknownWord(decodeStepList, factorCollection); + VERBOSE(3,"Translation Option Collection\n " << *this << endl); + + ProcessUnknownWord(decodeStepVL, factorCollection); // Prune Prune(); @@ -457,3 +467,27 @@ void TranslationOptionCollection::Add(const TranslationOption *translationOption TO_STRING_BODY(TranslationOptionCollection); +inline std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& coll) +{ + size_t size = coll.GetSize(); + for (size_t startPos = 0 ; startPos < size ; ++startPos) + { + for (size_t endPos = startPos ; endPos < size ; ++endPos) + { + TranslationOptionList fullList = coll.GetTranslationOptionList(startPos, endPos); + size_t sizeFull = fullList.size(); + for (size_t i = 0; i < sizeFull; i++) + { + out << *fullList[i] << std::endl; + } + } + } + + //std::vector< std::vector< TranslationOptionList > >::const_iterator i = coll.m_collection.begin(); + //size_t j = 0; + //for (; i!=coll.m_collection.end(); ++i) { + //out << "s[" << j++ << "].size=" << i->size() << std::endl; + //} + + return out; +} diff --git a/moses/src/TranslationOptionCollection.h b/moses/src/TranslationOptionCollection.h index c6c586b20..b22b78cbb 100755 --- a/moses/src/TranslationOptionCollection.h +++ b/moses/src/TranslationOptionCollection.h @@ -75,7 +75,7 @@ protected: , size_t startPos, size_t endPos, bool adhereTableLimit ); //! Force a creation of a translation option where there are none for a particular source position. - void ProcessUnknownWord(const std::list < DecodeStep* > &decodeStepList, FactorCollection &factorCollection); + void ProcessUnknownWord(const std::vector < std::list < DecodeStep* > *> &decodeStepVL, FactorCollection &factorCollection); //! special handling of ONE unknown words. virtual void ProcessOneUnknownWord(const Word &sourceWord , size_t sourcePos @@ -105,10 +105,10 @@ public: const InputType& GetSource() const { return m_source; } //! get length/size of source input - size_t GetSize() const; + size_t GetSize() const { return m_source.GetSize(); }; //! Create all possible translations from the phrase tables - virtual void CreateTranslationOptions(const std::list < DecodeStep* > &decodeStepList + virtual void CreateTranslationOptions(const std::vector < std::list < DecodeStep* > * > &decodeStepVL , FactorCollection &factorCollection); //! Create translation options that exactly cover a specific input span. virtual void CreateTranslationOptionsForRange(const std::list < DecodeStep* > &decodeStepList @@ -132,21 +132,4 @@ public: TO_STRING(); }; -inline std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& coll) -{ - std::vector< std::vector< TranslationOptionList > >::const_iterator i = coll.m_collection.begin(); - size_t j = 0; - for (; i!=coll.m_collection.end(); ++i) { - out << "s[" << j++ << "].size=" << i->size() << std::endl; - } - - /* - TranslationOptionCollection::const_iterator iter; - for (iter = coll.begin() ; iter != coll.end() ; ++iter) - { - TRACE_ERR (*iter << std::endl); - } - */ - return out; -}