mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-19 07:07:24 +03:00
Merging from branch.
From now on, can have multiple decoder step lists to accomodate backoff Specify this as an extra parameter in the [mapping] option in the ini file This is backwards compatible. Before (and still accepted): [mapping] T 0 Now you can have: [mapping] 0 T 0 1 T 1 1 G 0 Imagine for instance the translation table 0 is words - words, and the table 1 is stems - stems, and the generation table 0 is stems - words. This will allow us to backoff to stems if words are not found. It is not really backoff because all the options from both decoder step lists get included into the translation option collection, which is then used to create the hypotheses. The different paths must have their weights carefully balanced. MERT might not be enough to discover the best weights for all the combined parameters. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1217 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
2f4c70b4ae
commit
59c4ba9f4d
@ -69,13 +69,14 @@ Manager::~Manager()
|
||||
void Manager::ProcessSentence()
|
||||
{
|
||||
m_staticData.ResetSentenceStats(m_source);
|
||||
list < DecodeStep* > &decodeStepList = m_staticData.GetDecodeStepList();
|
||||
vector < list < DecodeStep* > * >&decodeStepVL = m_staticData.GetDecodeStepVL();
|
||||
|
||||
// create list of all possible translations
|
||||
// this is only valid if:
|
||||
// 1. generation of source sentence is not done 1st
|
||||
// 2. initial hypothesis factors are given in the sentence
|
||||
//CreateTranslationOptions(m_source, phraseDictionary, lmListInitial);
|
||||
m_possibleTranslations->CreateTranslationOptions(decodeStepList
|
||||
m_possibleTranslations->CreateTranslationOptions(decodeStepVL
|
||||
, m_staticData.GetFactorCollection());
|
||||
|
||||
// initial seed hypothesis: nothing translated, no words produced
|
||||
|
@ -240,7 +240,29 @@ StaticData::~StaticData()
|
||||
RemoveAllInColl(m_phraseDictionary);
|
||||
RemoveAllInColl(m_generationDictionary);
|
||||
RemoveAllInColl(m_languageModel);
|
||||
RemoveAllInColl(m_decodeStepList);
|
||||
//need to delete lists within vector as well
|
||||
while (! m_decodeStepVL.empty() )
|
||||
{
|
||||
list <DecodeStep *> * ptrList = m_decodeStepVL.back();
|
||||
m_decodeStepVL.pop_back();
|
||||
while( ! ptrList->empty() )
|
||||
{
|
||||
DecodeStep * ptrDecodeStep = ptrList->back();
|
||||
ptrList->pop_back();
|
||||
if (ptrDecodeStep != NULL)
|
||||
{
|
||||
delete ptrDecodeStep;
|
||||
ptrDecodeStep = NULL;
|
||||
}
|
||||
}
|
||||
//cout << "list size " << ptrList->size() << endl;
|
||||
if (ptrList != NULL)
|
||||
{
|
||||
delete ptrList;
|
||||
ptrList = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
RemoveAllInColl(m_reorderModels);
|
||||
|
||||
// small score producers
|
||||
@ -642,48 +664,76 @@ bool StaticData::LoadMapping()
|
||||
// mapping
|
||||
const vector<string> &mappingVector = m_parameter->GetParam("mapping");
|
||||
DecodeStep *prev = 0;
|
||||
size_t previousVectorList = 0;
|
||||
for(size_t i=0; i<mappingVector.size(); i++)
|
||||
{
|
||||
vector<string> token = Tokenize(mappingVector[i]);
|
||||
size_t vectorList;
|
||||
DecodeType decodeType;
|
||||
size_t index;
|
||||
if (token.size() == 2)
|
||||
{
|
||||
DecodeType decodeType = token[0] == "T" ? Translate : Generate;
|
||||
size_t index = Scan<size_t>(token[1]);
|
||||
DecodeStep* decodeStep = 0;
|
||||
switch (decodeType) {
|
||||
case Translate:
|
||||
if(index>=m_phraseDictionary.size())
|
||||
{
|
||||
stringstream strme;
|
||||
strme << "No phrase dictionary with index "
|
||||
<< index << " available!";
|
||||
UserMessage::Add(strme.str());
|
||||
return false;
|
||||
}
|
||||
decodeStep = new DecodeStepTranslation(m_phraseDictionary[index], prev);
|
||||
break;
|
||||
case Generate:
|
||||
if(index>=m_generationDictionary.size())
|
||||
{
|
||||
stringstream strme;
|
||||
strme << "No generation dictionary with index "
|
||||
<< index << " available!";
|
||||
UserMessage::Add(strme.str());
|
||||
return false;
|
||||
}
|
||||
decodeStep = new DecodeStepGeneration(m_generationDictionary[index], prev);
|
||||
break;
|
||||
case InsertNullFertilityWord:
|
||||
assert(!"Please implement NullFertilityInsertion.");
|
||||
break;
|
||||
}
|
||||
assert(decodeStep);
|
||||
m_decodeStepList.push_back(decodeStep);
|
||||
prev = decodeStep;
|
||||
} else {
|
||||
vectorList = 0;
|
||||
decodeType = token[0] == "T" ? Translate : Generate;
|
||||
index = Scan<size_t>(token[1]);
|
||||
}
|
||||
//Smoothing
|
||||
else if (token.size() == 3)
|
||||
{
|
||||
vectorList = Scan<size_t>(token[0]);
|
||||
//the vectorList index can only increment by one
|
||||
assert(vectorList == previousVectorList || vectorList == previousVectorList + 1);
|
||||
if (vectorList > previousVectorList)
|
||||
{
|
||||
prev = NULL;
|
||||
}
|
||||
decodeType = token[1] == "T" ? Translate : Generate;
|
||||
index = Scan<size_t>(token[2]);
|
||||
}
|
||||
else
|
||||
{
|
||||
UserMessage::Add("Malformed mapping!");
|
||||
return false;
|
||||
}
|
||||
|
||||
DecodeStep* decodeStep = 0;
|
||||
switch (decodeType) {
|
||||
case Translate:
|
||||
if(index>=m_phraseDictionary.size())
|
||||
{
|
||||
stringstream strme;
|
||||
strme << "No phrase dictionary with index "
|
||||
<< index << " available!";
|
||||
UserMessage::Add(strme.str());
|
||||
return false;
|
||||
}
|
||||
decodeStep = new DecodeStepTranslation(m_phraseDictionary[index], prev);
|
||||
break;
|
||||
case Generate:
|
||||
if(index>=m_generationDictionary.size())
|
||||
{
|
||||
stringstream strme;
|
||||
strme << "No generation dictionary with index "
|
||||
<< index << " available!";
|
||||
UserMessage::Add(strme.str());
|
||||
return false;
|
||||
}
|
||||
decodeStep = new DecodeStepGeneration(m_generationDictionary[index], prev);
|
||||
break;
|
||||
case InsertNullFertilityWord:
|
||||
assert(!"Please implement NullFertilityInsertion.");
|
||||
break;
|
||||
}
|
||||
assert(decodeStep);
|
||||
list < DecodeStep *> * decodeList=NULL;
|
||||
if (m_decodeStepVL.size() < vectorList + 1)
|
||||
{
|
||||
decodeList = new list < DecodeStep *>;
|
||||
m_decodeStepVL.push_back(decodeList);
|
||||
}
|
||||
m_decodeStepVL[vectorList]->push_back(decodeStep);
|
||||
prev = decodeStep;
|
||||
previousVectorList = vectorList;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include "TypeDef.h"
|
||||
#include "ScoreIndexManager.h"
|
||||
@ -50,7 +51,7 @@ protected:
|
||||
FactorCollection m_factorCollection;
|
||||
std::vector<PhraseDictionary*> m_phraseDictionary;
|
||||
std::vector<GenerationDictionary*> m_generationDictionary;
|
||||
std::list < DecodeStep* > m_decodeStepList;
|
||||
std::vector < std::list < DecodeStep*> * > m_decodeStepVL;
|
||||
Parameter *m_parameter;
|
||||
std::vector<FactorType> m_inputFactorOrder, m_outputFactorOrder;
|
||||
LMList m_languageModel;
|
||||
@ -152,9 +153,9 @@ public:
|
||||
return m_outputFactorOrder;
|
||||
}
|
||||
|
||||
std::list < DecodeStep* > &GetDecodeStepList()
|
||||
std::vector < std::list < DecodeStep* > * > &GetDecodeStepVL()
|
||||
{
|
||||
return m_decodeStepList;
|
||||
return m_decodeStepVL;
|
||||
}
|
||||
|
||||
inline bool GetSourceStartPosMattersForRecombination() const
|
||||
|
@ -125,19 +125,24 @@ void TranslationOptionCollection::Prune()
|
||||
* \param factorCollection input sentence with all factors
|
||||
*/
|
||||
|
||||
void TranslationOptionCollection::ProcessUnknownWord(const std::list < DecodeStep* > &decodeStepList, FactorCollection &factorCollection)
|
||||
void TranslationOptionCollection::ProcessUnknownWord(const std::vector < std::list < DecodeStep* > * > &decodeStepVL
|
||||
, FactorCollection &factorCollection)
|
||||
{
|
||||
size_t size = m_source.GetSize();
|
||||
// try to translation for coverage with no trans by expanding table limit
|
||||
for (size_t pos = 0 ; pos < size ; ++pos)
|
||||
for (size_t startVL = 0 ; startVL < decodeStepVL.size() ; startVL++)
|
||||
{
|
||||
TranslationOptionList &fullList = GetTranslationOptionList(pos, pos);
|
||||
size_t numTransOpt = fullList.size();
|
||||
if (numTransOpt == 0)
|
||||
{
|
||||
CreateTranslationOptionsForRange(decodeStepList, factorCollection
|
||||
, pos, pos, false);
|
||||
}
|
||||
const list < DecodeStep* > * decodeStepList = decodeStepVL[startVL];
|
||||
for (size_t pos = 0 ; pos < size ; ++pos)
|
||||
{
|
||||
TranslationOptionList &fullList = GetTranslationOptionList(pos, pos);
|
||||
size_t numTransOpt = fullList.size();
|
||||
if (numTransOpt == 0)
|
||||
{
|
||||
CreateTranslationOptionsForRange(*decodeStepList, factorCollection
|
||||
, pos, pos, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// create unknown words for 1 word coverage where we don't have any trans options
|
||||
@ -314,7 +319,7 @@ void TranslationOptionCollection::CalcFutureScore()
|
||||
* \param decodeStepList list of decoding steps
|
||||
* \param factorCollection input sentence with all factors
|
||||
*/
|
||||
void TranslationOptionCollection::CreateTranslationOptions(const list < DecodeStep* > &decodeStepList
|
||||
void TranslationOptionCollection::CreateTranslationOptions(const vector <list < DecodeStep* > * > &decodeStepVL
|
||||
, FactorCollection &factorCollection)
|
||||
{
|
||||
m_factorCollection = &factorCollection;
|
||||
@ -323,16 +328,21 @@ void TranslationOptionCollection::CreateTranslationOptions(const list < DecodeSt
|
||||
// in the phraseDictionary (which is the- possibly filtered-- phrase
|
||||
// table loaded on initialization), generate TranslationOption objects
|
||||
// for all phrases
|
||||
|
||||
for (size_t startPos = 0 ; startPos < m_source.GetSize() ; startPos++)
|
||||
for (size_t startVL = 0 ; startVL < decodeStepVL.size() ; startVL++)
|
||||
{
|
||||
for (size_t endPos = startPos ; endPos < m_source.GetSize() ; endPos++)
|
||||
const list < DecodeStep* > * decodeStepList = decodeStepVL[startVL];
|
||||
for (size_t startPos = 0 ; startPos < m_source.GetSize() ; startPos++)
|
||||
{
|
||||
CreateTranslationOptionsForRange( decodeStepList, factorCollection, startPos, endPos, true);
|
||||
for (size_t endPos = startPos ; endPos < m_source.GetSize() ; endPos++)
|
||||
{
|
||||
CreateTranslationOptionsForRange( *decodeStepList, factorCollection, startPos, endPos, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ProcessUnknownWord(decodeStepList, factorCollection);
|
||||
VERBOSE(3,"Translation Option Collection\n " << *this << endl);
|
||||
|
||||
ProcessUnknownWord(decodeStepVL, factorCollection);
|
||||
|
||||
// Prune
|
||||
Prune();
|
||||
@ -457,3 +467,27 @@ void TranslationOptionCollection::Add(const TranslationOption *translationOption
|
||||
|
||||
TO_STRING_BODY(TranslationOptionCollection);
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& coll)
|
||||
{
|
||||
size_t size = coll.GetSize();
|
||||
for (size_t startPos = 0 ; startPos < size ; ++startPos)
|
||||
{
|
||||
for (size_t endPos = startPos ; endPos < size ; ++endPos)
|
||||
{
|
||||
TranslationOptionList fullList = coll.GetTranslationOptionList(startPos, endPos);
|
||||
size_t sizeFull = fullList.size();
|
||||
for (size_t i = 0; i < sizeFull; i++)
|
||||
{
|
||||
out << *fullList[i] << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//std::vector< std::vector< TranslationOptionList > >::const_iterator i = coll.m_collection.begin();
|
||||
//size_t j = 0;
|
||||
//for (; i!=coll.m_collection.end(); ++i) {
|
||||
//out << "s[" << j++ << "].size=" << i->size() << std::endl;
|
||||
//}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
@ -75,7 +75,7 @@ protected:
|
||||
, size_t startPos, size_t endPos, bool adhereTableLimit );
|
||||
|
||||
//! Force a creation of a translation option where there are none for a particular source position.
|
||||
void ProcessUnknownWord(const std::list < DecodeStep* > &decodeStepList, FactorCollection &factorCollection);
|
||||
void ProcessUnknownWord(const std::vector < std::list < DecodeStep* > *> &decodeStepVL, FactorCollection &factorCollection);
|
||||
//! special handling of ONE unknown words.
|
||||
virtual void ProcessOneUnknownWord(const Word &sourceWord
|
||||
, size_t sourcePos
|
||||
@ -105,10 +105,10 @@ public:
|
||||
const InputType& GetSource() const { return m_source; }
|
||||
|
||||
//! get length/size of source input
|
||||
size_t GetSize() const;
|
||||
size_t GetSize() const { return m_source.GetSize(); };
|
||||
|
||||
//! Create all possible translations from the phrase tables
|
||||
virtual void CreateTranslationOptions(const std::list < DecodeStep* > &decodeStepList
|
||||
virtual void CreateTranslationOptions(const std::vector < std::list < DecodeStep* > * > &decodeStepVL
|
||||
, FactorCollection &factorCollection);
|
||||
//! Create translation options that exactly cover a specific input span.
|
||||
virtual void CreateTranslationOptionsForRange(const std::list < DecodeStep* > &decodeStepList
|
||||
@ -132,21 +132,4 @@ public:
|
||||
TO_STRING();
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& coll)
|
||||
{
|
||||
std::vector< std::vector< TranslationOptionList > >::const_iterator i = coll.m_collection.begin();
|
||||
size_t j = 0;
|
||||
for (; i!=coll.m_collection.end(); ++i) {
|
||||
out << "s[" << j++ << "].size=" << i->size() << std::endl;
|
||||
}
|
||||
|
||||
/*
|
||||
TranslationOptionCollection::const_iterator iter;
|
||||
for (iter = coll.begin() ; iter != coll.end() ; ++iter)
|
||||
{
|
||||
TRACE_ERR (*iter << std::endl);
|
||||
}
|
||||
*/
|
||||
return out;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user