mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-11 19:27:11 +03:00
- finally updated version for async-factors -- this is not complete yet
- Simple, naive implementation: get around stack pruning issues by decoding factors separately. - TODOs: - future cost estimation - nbest list Generation - bugs git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/async-factors@825 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
c7c83c50f1
commit
576553993b
@ -100,7 +100,7 @@ void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<
|
||||
{
|
||||
if ( hypo != NULL)
|
||||
{
|
||||
OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSourceSpan, reportAllFactors);
|
||||
if (hypo->GetPTID() == -1) OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSourceSpan, reportAllFactors);
|
||||
OutputSurface(out, hypo->GetTargetPhrase(), outputFactorOrder, reportAllFactors);
|
||||
|
||||
if (reportSourceSpan == true
|
||||
|
@ -132,7 +132,7 @@ int main(int argc, char* argv[])
|
||||
LatticePathList nBestList;
|
||||
manager.CalcNBest(nBestSize, nBestList);
|
||||
inputOutput->SetNBest(nBestList, source->GetTranslationId());
|
||||
RemoveAllInColl< LatticePathList::iterator > (nBestList);
|
||||
RemoveAllInColl(nBestList);
|
||||
}
|
||||
|
||||
if (staticData.IsDetailedTranslationReportingEnabled()) {
|
||||
|
@ -24,6 +24,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include <cassert>
|
||||
#include "TypeDef.h"
|
||||
#include "Dictionary.h"
|
||||
#include "Word.h"
|
||||
#include "ScoreComponentCollection.h"
|
||||
|
||||
class PhraseDictionaryBase;
|
||||
class GenerationDictionary;
|
||||
@ -32,6 +34,13 @@ class TranslationOptionCollection;
|
||||
class PartialTranslOptColl;
|
||||
class FactorCollection;
|
||||
class InputType;
|
||||
class Phrase;
|
||||
|
||||
typedef std::pair<Word, ScoreComponentCollection2> WordPair;
|
||||
typedef std::list< WordPair > WordList;
|
||||
// 1st = word
|
||||
// 2nd = score
|
||||
typedef std::list< WordPair >::const_iterator WordListIterator;
|
||||
|
||||
/** Specification for a decoding step.
|
||||
* The factored translation model consists of Translation and Generation
|
||||
@ -52,6 +61,10 @@ public:
|
||||
DecodeStep(Dictionary *ptr, const DecodeStep* prevDecodeStep);
|
||||
virtual ~DecodeStep();
|
||||
|
||||
// This sucks!
|
||||
virtual const int GetType() const =0;
|
||||
virtual int GenerateOptions(std::vector<WordList>& wordListVector, const Phrase& targetPhrase) { return 0; };
|
||||
|
||||
/** mask of factors that are present after this decode step */
|
||||
const FactorMask& GetOutputFactorMask() const
|
||||
{
|
||||
|
@ -74,6 +74,47 @@ inline void IncrementIterators(vector< WordListIterator > &wordListIterVector
|
||||
}
|
||||
}
|
||||
|
||||
int GenerationDecodeStep::GenerateOptions(vector<WordList>& wordListVector, const Phrase& targetPhrase)
|
||||
{
|
||||
size_t targetLength = targetPhrase.GetSize();
|
||||
const GenerationDictionary& generationDictionary = GetGenerationDictionary();
|
||||
// create generation list
|
||||
int wordListVectorPos = 0;
|
||||
for (size_t currPos = 0 ; currPos < targetLength ; currPos++) // going thorugh all words
|
||||
{
|
||||
// generatable factors for this word to be put in wordList
|
||||
WordList &wordList = wordListVector[wordListVectorPos];
|
||||
const FactorArray &factorArray = targetPhrase.GetFactorArray(currPos);
|
||||
|
||||
// consult dictionary for possible generations for this word
|
||||
const OutputWordCollection *wordColl = generationDictionary.FindWord(factorArray);
|
||||
|
||||
if (wordColl == NULL)
|
||||
{ // word not found in generation dictionary
|
||||
|
||||
// NOTE: Do nothing right now, fix later
|
||||
//toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
|
||||
return 0; // can't be part of a phrase, special handling
|
||||
}
|
||||
else
|
||||
{
|
||||
// sort(*wordColl, CompareWordCollScore);
|
||||
OutputWordCollection::const_iterator iterWordColl;
|
||||
for (iterWordColl = wordColl->begin() ; iterWordColl != wordColl->end(); ++iterWordColl)
|
||||
{
|
||||
const Word &outputWord = (*iterWordColl).first;
|
||||
const ScoreComponentCollection2& score = (*iterWordColl).second;
|
||||
// enter into word list generated factor(s) and its(their) score(s)
|
||||
wordList.push_back(WordPair(outputWord, score));
|
||||
}
|
||||
|
||||
wordListVectorPos++; // done, next word
|
||||
}
|
||||
}
|
||||
return wordListVectorPos;
|
||||
}
|
||||
|
||||
|
||||
void GenerationDecodeStep::Process(const TranslationOption &inputPartialTranslOpt
|
||||
, const DecodeStep &decodeStep
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
@ -113,7 +154,7 @@ void GenerationDecodeStep::Process(const TranslationOption &inputPartialTranslOp
|
||||
|
||||
if (wordColl == NULL)
|
||||
{ // word not found in generation dictionary
|
||||
//toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
|
||||
toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
|
||||
return; // can't be part of a phrase, special handling
|
||||
}
|
||||
else
|
||||
|
@ -23,9 +23,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#define _Generation_DECODE_STEP_H_
|
||||
|
||||
#include "DecodeStep.h"
|
||||
#include "Word.h"
|
||||
#include "ScoreComponentCollection.h"
|
||||
|
||||
class GenerationDictionary;
|
||||
class Phrase;
|
||||
class WordsRange;
|
||||
class ScoreComponentCollection2;
|
||||
|
||||
class GenerationDecodeStep : public DecodeStep
|
||||
@ -33,9 +36,12 @@ class GenerationDecodeStep : public DecodeStep
|
||||
public:
|
||||
GenerationDecodeStep(GenerationDictionary* dict, const DecodeStep* prev);
|
||||
|
||||
const int GetType() const { return 1; };
|
||||
|
||||
/** returns phrase table (dictionary) for translation step */
|
||||
const GenerationDictionary &GetGenerationDictionary() const;
|
||||
|
||||
int GenerateOptions(std::vector<WordList>& wordListVector, const Phrase& targetPhrase);
|
||||
virtual void Process(const TranslationOption &inputPartialTranslOpt
|
||||
, const DecodeStep &decodeStep
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
|
@ -32,6 +32,9 @@ class TranslationDecodeStep : public DecodeStep
|
||||
public:
|
||||
TranslationDecodeStep(PhraseDictionaryBase* dict, const DecodeStep* prev);
|
||||
|
||||
// still sucks
|
||||
const int GetType() const { return 0; };
|
||||
|
||||
/** returns phrase table (dictionary) for translation step */
|
||||
const PhraseDictionaryBase &GetPhraseDictionary() const;
|
||||
|
||||
|
@ -71,7 +71,8 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input
|
||||
vector<string> token = Tokenize( line );
|
||||
|
||||
// add each line in generation file into class
|
||||
Word inputWord, outputWord;
|
||||
Word *inputWord = new Word();
|
||||
Word outputWord;
|
||||
|
||||
// create word with certain factors filled out
|
||||
|
||||
@ -81,7 +82,7 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input
|
||||
{
|
||||
FactorType factorType = input[i];
|
||||
const Factor *factor = factorCollection.AddFactor( direction, factorType, factorString[i]);
|
||||
inputWord.SetFactor(factorType, factor);
|
||||
inputWord->SetFactor(factorType, factor);
|
||||
}
|
||||
|
||||
factorString = Tokenize( token[1], "|" );
|
||||
@ -112,6 +113,11 @@ void GenerationDictionary::Load(const std::vector<FactorType> &input
|
||||
|
||||
GenerationDictionary::~GenerationDictionary()
|
||||
{
|
||||
std::map<const FactorArrayWrapper* , OutputWordCollection>::const_iterator iter;
|
||||
for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter)
|
||||
{
|
||||
delete iter->first;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int GenerationDictionary::GetNumScoreComponents() const
|
||||
@ -127,18 +133,15 @@ const std::string GenerationDictionary::GetScoreProducerDescription() const
|
||||
const OutputWordCollection *GenerationDictionary::FindWord(const FactorArray &factorArray) const
|
||||
{
|
||||
const OutputWordCollection *ret;
|
||||
Word word;
|
||||
Word::Copy(word.GetFactorArray(), factorArray);
|
||||
|
||||
std::map<Word , OutputWordCollection>::const_iterator iter = m_collection.find(word);
|
||||
FactorArrayWrapper wrapper(factorArray);
|
||||
std::map<const FactorArrayWrapper* , OutputWordCollection>::const_iterator iter = m_collection.find(&wrapper);
|
||||
if (iter == m_collection.end())
|
||||
{ // can't find source phrase
|
||||
cerr << "Can't find: " << word << "\n";
|
||||
ret = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "FOUND: " << word << "\n";
|
||||
ret = &iter->second;
|
||||
}
|
||||
return ret;
|
||||
|
@ -31,6 +31,15 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
class FactorCollection;
|
||||
|
||||
struct FactorArrayWrapperComparer
|
||||
{
|
||||
//! returns true if hypoA can be recombined with hypoB
|
||||
bool operator()(const FactorArrayWrapper *a, const FactorArrayWrapper *b) const
|
||||
{
|
||||
return *a < *b;
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::map < Word , ScoreComponentCollection2 > OutputWordCollection;
|
||||
// 1st = output phrase
|
||||
// 2nd = log probability (score)
|
||||
@ -38,7 +47,7 @@ typedef std::map < Word , ScoreComponentCollection2 > OutputWordCollection;
|
||||
class GenerationDictionary : public Dictionary, public ScoreProducer
|
||||
{
|
||||
protected:
|
||||
std::map<Word , OutputWordCollection> m_collection;
|
||||
std::map<const FactorArrayWrapper* , OutputWordCollection, FactorArrayWrapperComparer> m_collection;
|
||||
// 1st = source
|
||||
// 2nd = target
|
||||
std::string m_filename;
|
||||
|
@ -39,8 +39,10 @@ using namespace std;
|
||||
|
||||
unsigned int Hypothesis::s_HypothesesCreated = 0;
|
||||
ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
|
||||
unsigned long Hypothesis::scoredLMs;
|
||||
unsigned long Hypothesis::maskedLMs;
|
||||
|
||||
Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
|
||||
Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget, int ptid)
|
||||
: m_prevHypo(NULL)
|
||||
, m_targetPhrase(emptyTarget)
|
||||
, m_sourcePhrase(0)
|
||||
@ -55,39 +57,129 @@ Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
|
||||
{ // used for initial seeding of trans process
|
||||
// initialize scores
|
||||
_hash_computed = false;
|
||||
m_ptid = ptid;
|
||||
m_targetLen = 0;
|
||||
maskedLMs = scoredLMs = 0x0;
|
||||
ResetScore();
|
||||
}
|
||||
|
||||
/***
|
||||
* continue prevHypo by appending the phrases in transOpt
|
||||
*/
|
||||
Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
|
||||
Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt, int ptid)
|
||||
: m_prevHypo(&prevHypo)
|
||||
, m_targetPhrase(transOpt.GetTargetPhrase())
|
||||
, m_sourcePhrase(0)
|
||||
, m_sourceCompleted (prevHypo.m_sourceCompleted )
|
||||
, m_sourceInput (prevHypo.m_sourceInput)
|
||||
, m_currSourceWordsRange (transOpt.GetSourceWordsRange())
|
||||
, m_currTargetWordsRange ( prevHypo.m_currTargetWordsRange.GetEndPos() + 1
|
||||
,prevHypo.m_currTargetWordsRange.GetEndPos() + transOpt.GetTargetPhrase().GetSize())
|
||||
, m_wordDeleted(false)
|
||||
, m_totalScore(0.0f)
|
||||
, m_futureScore(0.0f)
|
||||
, m_scoreBreakdown (prevHypo.m_scoreBreakdown)
|
||||
, m_languageModelStates(prevHypo.m_languageModelStates)
|
||||
, m_arcList(NULL)
|
||||
, m_id(s_HypothesesCreated++)
|
||||
, m_targetPhrase(ptid == -1 ? transOpt.GetTargetPhrase() : (*(new Phrase(prevHypo.m_targetPhrase))))
|
||||
, m_sourcePhrase(0)
|
||||
, m_sourceCompleted (prevHypo.m_sourceCompleted )
|
||||
, m_sourceInput (prevHypo.m_sourceInput)
|
||||
, m_currSourceWordsRange (transOpt.GetSourceWordsRange())
|
||||
, m_currTargetWordsRange ( prevHypo.m_currTargetWordsRange.GetEndPos() + 1
|
||||
,prevHypo.m_currTargetWordsRange.GetEndPos() + transOpt.GetTargetPhrase().GetSize())
|
||||
, m_wordDeleted(false)
|
||||
, m_totalScore(0.0f)
|
||||
, m_futureScore(0.0f)
|
||||
, m_scoreBreakdown (prevHypo.m_scoreBreakdown)
|
||||
, m_languageModelStates(prevHypo.m_languageModelStates)
|
||||
, m_arcList(NULL)
|
||||
, m_id(s_HypothesesCreated++)
|
||||
{
|
||||
// assert that we are not extending our hypothesis by retranslating something
|
||||
// that this hypothesis has already translated!
|
||||
assert(!m_sourceCompleted.Overlap(m_currSourceWordsRange));
|
||||
|
||||
m_ptid = ptid;
|
||||
//m_targetLen = 0;
|
||||
if (m_ptid > -1)
|
||||
{ // merge with existing factors: target phrase must already be full length
|
||||
// assumes that phrases have already been checked for compatibility
|
||||
(const_cast<Phrase &>(m_targetPhrase)).MergeFactorsPartial(transOpt.GetTargetPhrase(), prevHypo.m_targetLen);
|
||||
m_targetLen = prevHypo.GetTargetLen() + transOpt.GetTargetPhrase().GetSize();
|
||||
cerr << "\t* " << "secondary hyp: " << m_targetLen << " " << m_currSourceWordsRange << "\n";
|
||||
}
|
||||
|
||||
_hash_computed = false;
|
||||
//maskedLMs = scoredLMs = 0x0;
|
||||
m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true);
|
||||
m_wordDeleted = transOpt.IsDeletionOption();
|
||||
m_scoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
|
||||
}
|
||||
|
||||
// Transfer hypothesis
|
||||
Hypothesis::Hypothesis(const Hypothesis &orig, int ptid)
|
||||
: m_prevHypo(&orig)
|
||||
, m_targetPhrase(ptid == 1 ? *(new Phrase(Output)) : *(new Phrase(orig.m_targetPhrase)))
|
||||
, m_sourcePhrase(0)
|
||||
, m_sourceCompleted (orig.m_sourceCompleted.GetSize())
|
||||
, m_sourceInput (orig.m_sourceInput)
|
||||
, m_currSourceWordsRange (NOT_FOUND, NOT_FOUND)
|
||||
, m_currTargetWordsRange (NOT_FOUND, NOT_FOUND)
|
||||
, m_wordDeleted(false)
|
||||
, m_totalScore(orig.m_totalScore)
|
||||
, m_futureScore(orig.m_futureScore)
|
||||
, m_scoreBreakdown (orig.m_scoreBreakdown)
|
||||
, m_languageModelStates(orig.m_languageModelStates)
|
||||
, m_arcList(NULL)
|
||||
, m_id(s_HypothesesCreated++)
|
||||
{
|
||||
m_ptid = ptid;
|
||||
m_targetLen = 0;
|
||||
//maskedLMs = scoredLMs = 0x0;
|
||||
// IF ptid == 0 then we need to construct a new targetPhrase with the entire sentence so far
|
||||
if (ptid == 1)
|
||||
{
|
||||
list<const Hypothesis *> tmp;
|
||||
for (const Hypothesis *x = &orig; x != NULL; x = x->m_prevHypo)
|
||||
tmp.push_front(x);
|
||||
list<const Hypothesis *>::const_iterator i;
|
||||
for (i = tmp.begin(); i != tmp.end(); i++)
|
||||
for (unsigned int j = 0; j < (*i)->GetSize(); j++)
|
||||
{
|
||||
(const_cast<Phrase &>(m_targetPhrase)).push_back((*i)->m_targetPhrase.GetFactorArray(j));
|
||||
}
|
||||
cerr << "INFO: Doing Transfer... Current Target String: **["
|
||||
<< orig.m_targetPhrase << ", " << m_targetPhrase << "]**" << std::endl;
|
||||
}
|
||||
_hash_computed = false;
|
||||
}
|
||||
|
||||
Hypothesis::Hypothesis(const Hypothesis &orig, Phrase& genph, ScoreComponentCollection2& generationScore, int ptid)
|
||||
: m_prevHypo(&orig)
|
||||
, m_targetPhrase(*(new Phrase(Output)))
|
||||
, m_sourcePhrase(orig.m_sourcePhrase)
|
||||
, m_sourceCompleted (orig.m_sourceCompleted.GetSize())
|
||||
, m_sourceInput (orig.m_sourceInput)
|
||||
, m_currSourceWordsRange (orig.m_currSourceWordsRange)
|
||||
, m_currTargetWordsRange (orig.m_currTargetWordsRange)
|
||||
, m_wordDeleted(false)
|
||||
, m_totalScore(orig.m_totalScore)
|
||||
, m_futureScore(orig.m_futureScore)
|
||||
, m_scoreBreakdown (orig.m_scoreBreakdown)
|
||||
, m_languageModelStates(orig.m_languageModelStates)
|
||||
, m_arcList(NULL)
|
||||
, m_id(s_HypothesesCreated++)
|
||||
{
|
||||
m_ptid = ptid;
|
||||
m_targetLen = orig.m_targetLen;
|
||||
if (ptid == 1)
|
||||
{
|
||||
list<const Hypothesis *> tmp;
|
||||
for (const Hypothesis *x = &orig; x != NULL; x = x->m_prevHypo)
|
||||
tmp.push_front(x);
|
||||
list<const Hypothesis *>::const_iterator i;
|
||||
for (i = tmp.begin(); i != tmp.end(); i++)
|
||||
for (unsigned int j = 0; j < (*i)->GetSize(); j++)
|
||||
(const_cast<Phrase &>(m_targetPhrase)).push_back((*i)->m_targetPhrase.GetFactorArray(j));
|
||||
}
|
||||
if (m_ptid > -1)
|
||||
{ // merge with existing factors: target phrase must already be full length
|
||||
// assumes that phrases have already been checked for compatibility
|
||||
(const_cast<Phrase &>(m_targetPhrase)).MergeFactorsPartial(genph, 0);
|
||||
cerr << "\t* " << "generated hyp: " << m_targetPhrase << " by adding " << genph << " " << m_currSourceWordsRange << "\n";
|
||||
}
|
||||
m_scoreBreakdown.PlusEquals(generationScore);
|
||||
_hash_computed = false;
|
||||
}
|
||||
|
||||
Hypothesis::~Hypothesis()
|
||||
{
|
||||
if (m_arcList)
|
||||
@ -134,25 +226,25 @@ void Hypothesis::AddArc(Hypothesis *loserHypo)
|
||||
*/
|
||||
Hypothesis* Hypothesis::CreateNext(const TranslationOption &transOpt) const
|
||||
{
|
||||
return Create(*this, transOpt);
|
||||
return Create(*this, transOpt, m_ptid);
|
||||
}
|
||||
|
||||
/***
|
||||
* return the subclass of Hypothesis most appropriate to the given translation option
|
||||
*/
|
||||
Hypothesis* Hypothesis::Create(const Hypothesis &prevHypo, const TranslationOption &transOpt)
|
||||
Hypothesis* Hypothesis::Create(const Hypothesis &prevHypo, const TranslationOption &transOpt, int ptid)
|
||||
{
|
||||
Hypothesis *ptr = s_objectPool.getPtr();
|
||||
return new(ptr) Hypothesis(prevHypo, transOpt);
|
||||
return new(ptr) Hypothesis(prevHypo, transOpt, ptid);
|
||||
}
|
||||
/***
|
||||
* return the subclass of Hypothesis most appropriate to the given target phrase
|
||||
*/
|
||||
|
||||
Hypothesis* Hypothesis::Create(InputType const& m_source, const TargetPhrase &emptyTarget)
|
||||
Hypothesis* Hypothesis::Create(InputType const& m_source, const TargetPhrase &emptyTarget, int ptid)
|
||||
{
|
||||
Hypothesis *ptr = s_objectPool.getPtr();
|
||||
return new(ptr) Hypothesis(m_source, emptyTarget);
|
||||
return new(ptr) Hypothesis(m_source, emptyTarget, ptid);
|
||||
}
|
||||
|
||||
#if 0
|
||||
@ -216,13 +308,19 @@ void Hypothesis::CalcLMScore(const LMList &languageModels)
|
||||
{
|
||||
const size_t startPos = m_currTargetWordsRange.GetStartPos();
|
||||
LMList::const_iterator iterLM;
|
||||
unsigned long index = 0x1;
|
||||
size_t lmIdx = 0;
|
||||
|
||||
// already have LM scores from previous and trigram score of poss trans.
|
||||
// just need trigram score of the words of the start of current phrase
|
||||
for (iterLM = languageModels.begin() ; iterLM != languageModels.end() ; ++iterLM,++lmIdx)
|
||||
for (iterLM = languageModels.begin() ; iterLM != languageModels.end() ; ++iterLM,++lmIdx, index <<= 0x1)
|
||||
{
|
||||
const LanguageModel &languageModel = **iterLM;
|
||||
|
||||
if (index & maskedLMs || !languageModel.Useable(m_targetPhrase))
|
||||
continue;
|
||||
|
||||
scoredLMs |= index;
|
||||
size_t nGramOrder = languageModel.GetNGramOrder();
|
||||
size_t currEndPos = m_currTargetWordsRange.GetEndPos();
|
||||
float lmScore;
|
||||
@ -317,7 +415,8 @@ void Hypothesis::CalcScore(const StaticData& staticData, const SquareMatrix &fut
|
||||
CalcLMScore(staticData.GetAllLM());
|
||||
|
||||
// WORD PENALTY
|
||||
m_scoreBreakdown.PlusEquals(staticData.GetWordPenaltyProducer(), - (float) m_currTargetWordsRange.GetWordsCount());
|
||||
if (m_ptid != -1)
|
||||
m_scoreBreakdown.PlusEquals(staticData.GetWordPenaltyProducer(), - (float) m_currTargetWordsRange.GetWordsCount());
|
||||
|
||||
// FUTURE COST
|
||||
CalcFutureScore(futureScore);
|
||||
@ -415,6 +514,7 @@ ostream& operator<<(ostream& out, const Hypothesis& hypothesis)
|
||||
hypothesis.ToStream(out);
|
||||
// words bitmap
|
||||
out << "[" << hypothesis.m_sourceCompleted << "] ";
|
||||
out << "tlen: " << hypothesis.m_targetLen << " ";
|
||||
|
||||
// scores
|
||||
out << " [total=" << hypothesis.GetTotalScore() << "]";
|
||||
|
@ -66,6 +66,8 @@ protected:
|
||||
const Phrase &m_targetPhrase; /**< target phrase being created at the current decoding step */
|
||||
Phrase const* m_sourcePhrase; /**< input sentence */
|
||||
WordsBitmap m_sourceCompleted; /**< keeps track of which words have been translated so far */
|
||||
int m_ptid;
|
||||
int m_targetLen;
|
||||
//TODO: how to integrate this into confusion network framework; what if
|
||||
//it's a confusion network in the end???
|
||||
InputType const& m_sourceInput;
|
||||
@ -78,6 +80,8 @@ protected:
|
||||
std::vector<LanguageModelSingleFactor::State> m_languageModelStates; /**< relevant history for language model scoring -- used for recombination */
|
||||
const Hypothesis *m_mainHypo;
|
||||
ArcList *m_arcList; /**< all arcs that end at the same lattice point as this hypothesis */
|
||||
static unsigned long maskedLMs;
|
||||
static unsigned long scoredLMs;
|
||||
|
||||
void CalcFutureScore(const SquareMatrix &futureScore);
|
||||
//void CalcFutureScore(float futureScore[256][256]);
|
||||
@ -95,23 +99,31 @@ public:
|
||||
return s_objectPool;
|
||||
}
|
||||
|
||||
|
||||
static unsigned int s_HypothesesCreated; // Statistics: how many hypotheses were created in total
|
||||
int m_id; /**< numeric ID of this hypothesis, used for logging */
|
||||
|
||||
// for masking lms
|
||||
inline unsigned long &GetMaskedLMs() { return maskedLMs; }
|
||||
inline unsigned long &GetScoredLMs() { return scoredLMs; }
|
||||
inline int GetTargetLen() const { return m_targetLen; }
|
||||
inline int GetPTID() const { return m_ptid; }
|
||||
/** used by initial seeding of the translation process */
|
||||
Hypothesis(InputType const& source, const TargetPhrase &emptyTarget);
|
||||
Hypothesis(InputType const& source, const TargetPhrase &emptyTarget, int ptid = -1);
|
||||
/** used when creating a new hypothesis using a translation option (phrase translation) */
|
||||
Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt);
|
||||
Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt, int ptid = -1);
|
||||
/** copy constructor for new pt restart*/
|
||||
Hypothesis(const Hypothesis &orig, int ptid);
|
||||
/** copy constructor for generation options */
|
||||
Hypothesis(const Hypothesis &orig, Phrase& genph, ScoreComponentCollection2& generationScore, int ptid);
|
||||
~Hypothesis();
|
||||
|
||||
/** return the subclass of Hypothesis most appropriate to the given translation option */
|
||||
static Hypothesis* Create(const Hypothesis &prevHypo, const TranslationOption &transOpt);
|
||||
static Hypothesis* Create(const Hypothesis &prevHypo, const TranslationOption &transOpt, int ptid = -1);
|
||||
|
||||
static Hypothesis* Create(const WordsBitmap &initialCoverage);
|
||||
static Hypothesis* Create(const WordsBitmap &initialCoverage, int ptid = -1);
|
||||
|
||||
/** return the subclass of Hypothesis most appropriate to the given target phrase */
|
||||
static Hypothesis* Create(InputType const& source, const TargetPhrase &emptyTarget);
|
||||
static Hypothesis* Create(InputType const& source, const TargetPhrase &emptyTarget, int ptid = -1);
|
||||
|
||||
/** return the subclass of Hypothesis most appropriate to the given translation option */
|
||||
Hypothesis* CreateNext(const TranslationOption &transOpt) const;
|
||||
@ -221,7 +233,8 @@ public:
|
||||
|
||||
void ToStream(std::ostream& out) const
|
||||
{
|
||||
if (m_prevHypo != NULL)
|
||||
if (m_ptid >= 0) { out << " ::: "; }
|
||||
else if (m_prevHypo != NULL)
|
||||
{
|
||||
m_prevHypo->ToStream(out);
|
||||
}
|
||||
|
@ -101,7 +101,6 @@ protected:
|
||||
// if returns false, hypothesis not used
|
||||
// caller must take care to delete unused hypo to avoid leak
|
||||
// used by Add(Hypothesis *hypothesis, float beamThreshold);
|
||||
void RemoveAll();
|
||||
|
||||
/** destroy all instances of Hypothesis in this collection */
|
||||
inline void Detach(const HypothesisCollection::iterator &iter)
|
||||
@ -115,6 +114,8 @@ protected:
|
||||
pool.freeObject(*iter);
|
||||
Detach(iter);
|
||||
}
|
||||
void RemoveAll();
|
||||
|
||||
/** add Hypothesis to the collection, without pruning */
|
||||
inline void AddNoPrune(Hypothesis *hypothesis)
|
||||
{
|
||||
@ -129,6 +130,39 @@ public:
|
||||
|
||||
HypothesisCollection();
|
||||
|
||||
void Reset()
|
||||
{
|
||||
m_hypos.clear();
|
||||
m_bestScore = -std::numeric_limits<float>::infinity();
|
||||
m_worstScore = -std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
||||
/** destroy all instances of Hypothesis in this collection */
|
||||
inline void erase(const HypothesisCollection::iterator &iter)
|
||||
{
|
||||
m_hypos.erase(iter);
|
||||
}
|
||||
|
||||
inline void insert(const HypothesisCollection::iterator &iter, Hypothesis& h)
|
||||
{
|
||||
if (!m_hypos.insert(&h).second) {
|
||||
}
|
||||
}
|
||||
|
||||
inline void insertset(HypothesisCollection &other)
|
||||
{
|
||||
HypothesisCollection::const_iterator iterHypo;
|
||||
|
||||
for (iterHypo = other.begin(); iterHypo != other.end(); ++iterHypo)
|
||||
Add(*iterHypo);
|
||||
}
|
||||
|
||||
void FreeHypPool()
|
||||
{
|
||||
ObjectPool<Hypothesis> &pool = Hypothesis::GetObjectPool();
|
||||
pool.reset();
|
||||
}
|
||||
|
||||
// this function will recombine hypotheses silently! There is no record
|
||||
// (could affect n-best list generation...TODO)
|
||||
void AddPrune(Hypothesis *hypothesis);
|
||||
@ -136,7 +170,8 @@ public:
|
||||
|
||||
inline ~HypothesisCollection()
|
||||
{
|
||||
RemoveAll();
|
||||
// Don't do this any more
|
||||
// RemoveAll();
|
||||
}
|
||||
/** set maximum number of hypotheses in the collection
|
||||
* /param maxHypoStackSize maximum number (typical number: 100) */
|
||||
|
@ -38,7 +38,7 @@ public:
|
||||
~LatticePathCollection()
|
||||
{
|
||||
// clean up
|
||||
RemoveAllInColl<LatticePathCollection::iterator> (*this);
|
||||
RemoveAllInColl(*this);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -38,10 +38,12 @@ libmoses_a_SOURCES = \
|
||||
PhraseDictionaryNode.cpp \
|
||||
PhraseDictionaryTree.cpp \
|
||||
PhraseDictionaryTreeAdaptor.cpp \
|
||||
PhraseReference.cpp \
|
||||
ScoreComponentCollection.cpp \
|
||||
ScoreIndexManager.cpp \
|
||||
ScoreProducer.cpp \
|
||||
Sentence.cpp \
|
||||
SentenceStats.cpp \
|
||||
StaticData.cpp \
|
||||
TargetPhrase.cpp \
|
||||
TranslationOption.cpp \
|
||||
|
@ -35,11 +35,23 @@ using namespace std;
|
||||
|
||||
Manager::Manager(InputType const& source, StaticData &staticData)
|
||||
:m_source(source)
|
||||
,m_hypoStack(source.GetSize() + 1)
|
||||
,m_hypoStack(source.GetSize() + 1)
|
||||
,m_staticData(staticData)
|
||||
,m_possibleTranslations(*source.CreateTranslationOptionCollection())
|
||||
,m_initialTargetPhrase(Output)
|
||||
{
|
||||
int pts = m_staticData.GetPhraseDictionaries().size();
|
||||
|
||||
// Need to allocate space for additional collections
|
||||
if (m_staticData.GetInputType() == 0)
|
||||
{
|
||||
m_secondaryOptions = (TranslationOptionCollection **) malloc(sizeof(TranslationOptionCollection *) * pts);
|
||||
for (int c = 0; c < pts; c++)
|
||||
m_secondaryOptions[c] = source.CreateTranslationOptionCollection();
|
||||
}
|
||||
m_scoredLMs = 0x0;
|
||||
assert(m_staticData.GetAllLM().size() < sizeof(int));
|
||||
|
||||
std::vector < HypothesisCollection >::iterator iterStack;
|
||||
for (iterStack = m_hypoStack.begin() ; iterStack != m_hypoStack.end() ; ++iterStack)
|
||||
{
|
||||
@ -51,6 +63,15 @@ Manager::Manager(InputType const& source, StaticData &staticData)
|
||||
|
||||
Manager::~Manager()
|
||||
{
|
||||
if (m_staticData.GetInputType() == 0)
|
||||
{
|
||||
for (unsigned int c = 0; c < m_staticData.GetPhraseDictionaries().size(); c++)
|
||||
delete m_secondaryOptions[c];
|
||||
free(m_secondaryOptions);
|
||||
}
|
||||
// Clear Hyps before the collections go away
|
||||
m_hypoStack[0].FreeHypPool();
|
||||
|
||||
delete &m_possibleTranslations;
|
||||
}
|
||||
|
||||
@ -67,40 +88,224 @@ void Manager::ProcessSentence()
|
||||
// 1. generation of source sentence is not done 1st
|
||||
// 2. initial hypothesis factors are given in the sentence
|
||||
//CreateTranslationOptions(m_source, phraseDictionary, lmListInitial);
|
||||
m_possibleTranslations.CreateTranslationOptions(decodeStepList
|
||||
, m_staticData.GetFactorCollection());
|
||||
//m_possibleTranslations.CreateTranslationOptions(decodeStepList
|
||||
//, m_staticData.GetFactorCollection());
|
||||
|
||||
list < DecodeStep* >::const_iterator iterStep = decodeStepList.begin();
|
||||
|
||||
list<DecodeStep *> b;
|
||||
assert((*iterStep)->GetType() == 0);
|
||||
b.push_back(*iterStep); // better not be a generation step!
|
||||
m_possibleTranslations.CreateTranslationOptions(b, m_staticData.GetFactorCollection());
|
||||
|
||||
//
|
||||
// Create Secondary options
|
||||
//
|
||||
|
||||
//for (int c = 1; c < m_staticData.GetPhraseDictionaries().size(); c++)
|
||||
unsigned int c;
|
||||
for (c = 0, ++iterStep ; iterStep != decodeStepList.end() ; ++iterStep)
|
||||
if ((*iterStep)->GetType() == 0)
|
||||
{
|
||||
list<DecodeStep *> b;
|
||||
b.push_back(*iterStep);
|
||||
m_secondaryOptions[c++]->CreateTranslationOptions(b, m_staticData.GetFactorCollection());
|
||||
}
|
||||
|
||||
// initial seed hypothesis: nothing translated, no words produced
|
||||
{
|
||||
Hypothesis *hypo = Hypothesis::Create(m_source, m_initialTargetPhrase);
|
||||
Hypothesis *hypo = Hypothesis::Create(m_source, m_initialTargetPhrase, -1); // initial PTID
|
||||
m_hypoStack[0].AddPrune(hypo);
|
||||
}
|
||||
|
||||
// go through each stack
|
||||
std::vector < HypothesisCollection >::iterator iterStack;
|
||||
for (iterStack = m_hypoStack.begin() ; iterStack != m_hypoStack.end() ; ++iterStack)
|
||||
{
|
||||
HypothesisCollection &sourceHypoColl = *iterStack;
|
||||
|
||||
// the stack is pruned before processing (lazy pruning):
|
||||
sourceHypoColl.PruneToSize(m_staticData.GetMaxHypoStackSize());
|
||||
//c = 0;
|
||||
int pt = 1;
|
||||
|
||||
sourceHypoColl.InitializeArcs();
|
||||
// Process first PT as if it were normal (this is used to compute distortion)
|
||||
// This should be identical
|
||||
for (iterStack = m_hypoStack.begin(); iterStack != m_hypoStack.end(); ++iterStack)
|
||||
{
|
||||
HypothesisCollection &sourceHypoColl = *iterStack;
|
||||
|
||||
// go through each hypothesis on the stack and try to expand it
|
||||
HypothesisCollection::const_iterator iterHypo;
|
||||
for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo)
|
||||
{
|
||||
Hypothesis &hypothesis = **iterHypo;
|
||||
ProcessOneHypothesis(hypothesis); // expand the hypothesis
|
||||
// the stack is pruned before processing (lazy pruning):
|
||||
sourceHypoColl.PruneToSize(m_staticData.GetMaxHypoStackSize());
|
||||
|
||||
sourceHypoColl.InitializeArcs();
|
||||
|
||||
// go through each hypothesis on the stack and try to expand it
|
||||
HypothesisCollection::const_iterator iterHypo;
|
||||
for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo)
|
||||
{
|
||||
Hypothesis &hypothesis = **iterHypo;
|
||||
cerr << "Processing Hypo " << hypothesis << endl;
|
||||
ProcessOneHypothesis(hypothesis, &m_possibleTranslations, -1); // expand the hypothesis
|
||||
}
|
||||
// some logging
|
||||
if (m_staticData.GetVerboseLevel() > 0) {
|
||||
//OutputHypoStack();
|
||||
OutputHypoStackSize();
|
||||
}
|
||||
// some logging
|
||||
if (m_staticData.GetVerboseLevel() > 0) {
|
||||
//OutputHypoStack();
|
||||
OutputHypoStackSize();
|
||||
}
|
||||
|
||||
}
|
||||
if (m_staticData.GetInputType()) return;
|
||||
|
||||
// Process remaining steps
|
||||
int gt = 0, id = 1;
|
||||
iterStep = decodeStepList.begin();
|
||||
for (++iterStep; iterStep != decodeStepList.end(); ++iterStep)
|
||||
{
|
||||
if ((*iterStep)->GetType() == 0)
|
||||
{
|
||||
int i = 0;
|
||||
int firsthyp = 1;
|
||||
// Create new start(s)
|
||||
HypothesisCollection::const_iterator iterHypo;
|
||||
HypothesisCollection &currHypoColl = m_hypoStack.back();
|
||||
|
||||
// clear stack 0
|
||||
m_hypoStack[0].Reset();
|
||||
|
||||
for (iterHypo = currHypoColl.begin() ; iterHypo != currHypoColl.end() ; ++iterHypo, i++)
|
||||
{
|
||||
Hypothesis &hypothesis = **iterHypo;
|
||||
if (firsthyp)
|
||||
{
|
||||
m_scoredLMs |= hypothesis.GetScoredLMs();
|
||||
hypothesis.GetScoredLMs() = 0x0;
|
||||
|
||||
firsthyp = 0;
|
||||
}
|
||||
hypothesis.GetMaskedLMs() = m_scoredLMs;
|
||||
cerr << "\t[Transfer Step] Hypo " << i << ": " << hypothesis << endl;
|
||||
Hypothesis *restartHypo = new Hypothesis(hypothesis, id);
|
||||
cerr << "\t\t* [transfer result] " << *restartHypo << endl;
|
||||
m_hypoStack[0].AddPrune(restartHypo);
|
||||
}
|
||||
// clear remaining stacks
|
||||
for (iterStack = m_hypoStack.begin()+1; iterStack != m_hypoStack.end(); ++iterStack)
|
||||
(*iterStack).Reset();
|
||||
|
||||
// Now decode the current PT
|
||||
fprintf(stderr, "Starting PT processing for %d, %d items in starting stack\n", pt, m_hypoStack[0].size());
|
||||
for (iterStack = m_hypoStack.begin(); iterStack != m_hypoStack.end(); ++iterStack)
|
||||
{
|
||||
HypothesisCollection &sourceHypoColl = *iterStack;
|
||||
|
||||
// the stack is pruned before processing (lazy pruning):
|
||||
sourceHypoColl.PruneToSize(m_staticData.GetMaxHypoStackSize());
|
||||
|
||||
sourceHypoColl.InitializeArcs();
|
||||
|
||||
// go through each hypothesis on the stack and try to expand it
|
||||
HypothesisCollection::const_iterator iterHypo;
|
||||
for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo)
|
||||
{
|
||||
Hypothesis &hypothesis = **iterHypo;
|
||||
cerr << "\t[PT Processing] Hypo " << i << ": " << hypothesis << endl;
|
||||
ProcessOneHypothesis(hypothesis, m_secondaryOptions[pt-1]); // expand the hypothesis
|
||||
}
|
||||
// some logging
|
||||
if (m_staticData.GetVerboseLevel() > 0) {
|
||||
//OutputHypoStack();
|
||||
OutputHypoStackSize();
|
||||
}
|
||||
}
|
||||
pt++;
|
||||
}
|
||||
else // do a generation step
|
||||
{
|
||||
int firsthyp = 1;
|
||||
// NOTE: This should be done elsewhere, like a GenerationDecodeStep
|
||||
// but that code has been mostly rewritten to handle TranslationOptions
|
||||
// so this hack is easier for now...
|
||||
|
||||
// normal generation step
|
||||
HypothesisCollection::const_iterator iterHypo;
|
||||
HypothesisCollection &currHypoColl = m_hypoStack.back();
|
||||
HypothesisCollection tmp;
|
||||
|
||||
for (iterHypo = currHypoColl.begin() ; iterHypo != currHypoColl.end() ; ++iterHypo)
|
||||
{
|
||||
Hypothesis &hypothesis = **iterHypo;
|
||||
if (firsthyp)
|
||||
{
|
||||
m_scoredLMs |= hypothesis.GetScoredLMs();
|
||||
hypothesis.GetScoredLMs() = 0x0;
|
||||
|
||||
firsthyp = 0;
|
||||
}
|
||||
hypothesis.GetMaskedLMs() = m_scoredLMs;
|
||||
cerr << "Generating from Hypo: " << hypothesis << endl;
|
||||
|
||||
// Actual Generation
|
||||
const Phrase &targetPhrase = hypothesis.GetTargetPhrase();
|
||||
size_t targetLength = targetPhrase.GetSize();
|
||||
vector< WordList > wordListVector(targetLength);
|
||||
|
||||
(*iterStep)->GenerateOptions(wordListVector, targetPhrase);
|
||||
|
||||
// use generation list (wordList)
|
||||
// set up iterators (total number of expansions)
|
||||
size_t numIteration = 1;
|
||||
vector< WordListIterator > wordListIterVector(targetLength);
|
||||
vector< const Word* > mergeWords(targetLength);
|
||||
for (size_t currPos = 0 ; currPos < targetLength ; currPos++)
|
||||
{
|
||||
wordListIterVector[currPos] = wordListVector[currPos].begin();
|
||||
numIteration *= wordListVector[currPos].size();
|
||||
}
|
||||
//fprintf(stderr, "INFO: numiteration == %d\n", numIteration);
|
||||
|
||||
// go thru each possible factor for each word & create hypothesis
|
||||
for (size_t currIter = 0 ; currIter < numIteration ; currIter++)
|
||||
{
|
||||
ScoreComponentCollection2 generationScore; // total score for this string of words
|
||||
|
||||
// create vector of words with new factors for last phrase
|
||||
for (size_t currPos = 0 ; currPos < targetLength ; currPos++)
|
||||
{
|
||||
const WordPair &wordPair = *wordListIterVector[currPos];
|
||||
mergeWords[currPos] = &(wordPair.first);
|
||||
generationScore.PlusEquals(wordPair.second);
|
||||
}
|
||||
|
||||
// merge with existing trans opt
|
||||
Phrase genPhrase(Output, mergeWords);
|
||||
//TranslationOption *newTransOpt = MergeGeneration(inputPartialTranslOpt, genPhrase, generationScore);
|
||||
//cerr << "INFO: " << genPhrase << std::endl;
|
||||
Hypothesis *newh = new Hypothesis(hypothesis, genPhrase, generationScore, id);
|
||||
if (newh != NULL)
|
||||
{
|
||||
//outputPartialTranslOptColl.Add(newTransOpt);
|
||||
cerr << "\t\t+ Generating: " << *newh << std::endl;
|
||||
tmp.insert(iterHypo, *newh);
|
||||
}
|
||||
|
||||
// increment iterators
|
||||
for (size_t currPos = 0 ; currPos < wordListVector.size() ; currPos++)
|
||||
{
|
||||
WordListIterator &iter = wordListIterVector[currPos];
|
||||
iter++;
|
||||
if (iter != wordListVector[currPos].end())
|
||||
{ // eg. 4 -> 5
|
||||
}
|
||||
else
|
||||
{ // eg 9 -> 10
|
||||
iter = wordListVector[currPos].begin();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
currHypoColl.Reset();
|
||||
currHypoColl.insertset(tmp);
|
||||
fprintf(stderr, "After procssing generation step #%d, %d items in hyp collection\n", gt, currHypoColl.size());
|
||||
gt++;
|
||||
}
|
||||
id++;
|
||||
}
|
||||
|
||||
// some more logging
|
||||
if (m_staticData.GetVerboseLevel() > 0) {
|
||||
@ -116,10 +321,14 @@ void Manager::ProcessSentence()
|
||||
* violation of reordering limits.
|
||||
* \param hypothesis hypothesis to be expanded upon
|
||||
*/
|
||||
void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
|
||||
void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis,
|
||||
TranslationOptionCollection *options,
|
||||
int index)
|
||||
{
|
||||
// since we check for reordering limits, its good to have that limit handy
|
||||
int maxDistortion = m_staticData.GetMaxDistortion();
|
||||
|
||||
// Changed this, secondary processing wants monotone decoding
|
||||
int maxDistortion = m_staticData.GetMaxDistortion(); // : 0;
|
||||
|
||||
// no limit of reordering: only check for overlap
|
||||
if (maxDistortion < 0)
|
||||
@ -135,7 +344,9 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
|
||||
if (!hypoBitmap.Overlap(WordsRange(startPos, endPos)))
|
||||
{
|
||||
ExpandAllHypotheses(hypothesis
|
||||
, m_possibleTranslations.GetTranslationOptionList(WordsRange(startPos, endPos)));
|
||||
, options->GetTranslationOptionList(WordsRange(startPos, endPos))
|
||||
, index
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -164,7 +375,9 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
|
||||
)
|
||||
{
|
||||
ExpandAllHypotheses(hypothesis
|
||||
,m_possibleTranslations.GetTranslationOptionList(WordsRange(startPos, endPos)));
|
||||
, options->GetTranslationOptionList(WordsRange(startPos, endPos))
|
||||
, index
|
||||
);
|
||||
}
|
||||
}
|
||||
// filling in gap => just check for overlap
|
||||
@ -174,7 +387,9 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
|
||||
&& !hypoBitmap.Overlap(WordsRange(startPos, endPos)))
|
||||
{
|
||||
ExpandAllHypotheses(hypothesis
|
||||
,m_possibleTranslations.GetTranslationOptionList(WordsRange(startPos, endPos)));
|
||||
, options->GetTranslationOptionList(WordsRange(startPos, endPos))
|
||||
, index
|
||||
);
|
||||
}
|
||||
}
|
||||
// ignoring, continuing forward => be limited by start of gap
|
||||
@ -184,7 +399,9 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
|
||||
&& !hypoBitmap.Overlap(WordsRange(startPos, endPos)))
|
||||
{
|
||||
ExpandAllHypotheses(hypothesis
|
||||
,m_possibleTranslations.GetTranslationOptionList(WordsRange(startPos, endPos)));
|
||||
, options->GetTranslationOptionList(WordsRange(startPos, endPos))
|
||||
, index
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -197,12 +414,12 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
|
||||
* \param transOptList list of translation options to be applied
|
||||
*/
|
||||
|
||||
void Manager::ExpandAllHypotheses(const Hypothesis &hypothesis,const TranslationOptionList &transOptList)
|
||||
void Manager::ExpandAllHypotheses(const Hypothesis &hypothesis,const TranslationOptionList &transOptList, int index)
|
||||
{
|
||||
TranslationOptionList::const_iterator iter;
|
||||
for (iter = transOptList.begin() ; iter != transOptList.end() ; ++iter)
|
||||
{
|
||||
ExpandHypothesis(hypothesis, **iter);
|
||||
ExpandHypothesis(hypothesis, **iter, index);
|
||||
}
|
||||
}
|
||||
|
||||
@ -213,11 +430,21 @@ void Manager::ExpandAllHypotheses(const Hypothesis &hypothesis,const Translation
|
||||
* \param transOpt translation option (phrase translation)
|
||||
* that is applied to create the new hypothesis
|
||||
*/
|
||||
void Manager::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt)
|
||||
void Manager::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt, int index)
|
||||
{
|
||||
cerr << "Trying to expand: " << hypothesis << " with " << transOpt << std::endl;
|
||||
if (index > -1 &&
|
||||
(transOpt.GetTargetPhrase().GetSize() + hypothesis.GetTargetLen() > hypothesis.GetTargetPhrase().GetSize()
|
||||
|| !hypothesis.GetTargetPhrase().IsCompatiblePartial(transOpt.GetTargetPhrase(), hypothesis.GetTargetLen())
|
||||
)
|
||||
)
|
||||
return;
|
||||
// create hypothesis and calculate all its scores
|
||||
Hypothesis *newHypo = hypothesis.CreateNext(transOpt);
|
||||
newHypo->CalcScore(m_staticData, m_possibleTranslations.GetFutureScore());
|
||||
newHypo->CalcScore(m_staticData,
|
||||
(index == -1 ? m_possibleTranslations.GetFutureScore()
|
||||
: m_secondaryOptions[index]->GetFutureScore())
|
||||
);
|
||||
|
||||
// logging for the curious
|
||||
if(m_staticData.GetVerboseLevel() > 2)
|
||||
@ -227,6 +454,7 @@ void Manager::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOp
|
||||
|
||||
// add to hypothesis stack
|
||||
size_t wordsTranslated = newHypo->GetWordsBitmap().GetNumWordsCovered();
|
||||
cerr << "\t+ Adding: " << *newHypo << "\n";
|
||||
m_hypoStack[wordsTranslated].AddPrune(newHypo);
|
||||
}
|
||||
|
||||
|
@ -32,6 +32,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "LatticePathList.h"
|
||||
#include "SquareMatrix.h"
|
||||
#include "WordsBitmap.h"
|
||||
#include "DecodeStep_Generation.h"
|
||||
//#include "UnknownWordHandler.h"
|
||||
|
||||
class LatticePath;
|
||||
@ -79,12 +80,16 @@ protected:
|
||||
// no of elements = no of words in source + 1
|
||||
StaticData &m_staticData; /**< holds various kinds of constants, counters, and global data structures */
|
||||
TranslationOptionCollection &m_possibleTranslations; /**< pre-computed list of translation options for the phrases in this sentence */
|
||||
TranslationOptionCollection **m_secondaryOptions; /**< Lists per factor */
|
||||
TargetPhrase m_initialTargetPhrase; /**< used to seed 1st hypo */
|
||||
|
||||
// vector of scored lms
|
||||
unsigned long m_scoredLMs;
|
||||
|
||||
// functions for creating hypotheses
|
||||
void ProcessOneHypothesis(const Hypothesis &hypothesis);
|
||||
void ExpandAllHypotheses(const Hypothesis &hypothesis,const TranslationOptionList &transOptList);
|
||||
void ExpandHypothesis(const Hypothesis &hypothesis,const TranslationOption &transOpt);
|
||||
void ProcessOneHypothesis(const Hypothesis &hypothesis, TranslationOptionCollection *options = NULL, int index = 0);
|
||||
void ExpandAllHypotheses(const Hypothesis &hypothesis,const TranslationOptionList &transOptList, int index = 0);
|
||||
void ExpandHypothesis(const Hypothesis &hypothesis,const TranslationOption &transOpt, int index = 0);
|
||||
|
||||
// logging
|
||||
void OutputHypoStack(int stack = -1);
|
||||
|
@ -52,7 +52,7 @@ public:
|
||||
/** destructor, cleans out list */
|
||||
~PartialTranslOptColl()
|
||||
{
|
||||
RemoveAllInColl<std::vector<TranslationOption*>::iterator>( m_list );
|
||||
RemoveAllInColl( m_list );
|
||||
}
|
||||
|
||||
void AddNoPrune(TranslationOption *partialTranslOpt);
|
||||
|
@ -128,6 +128,21 @@ void Phrase::MergeFactors(const Phrase ©)
|
||||
}
|
||||
}
|
||||
|
||||
void Phrase::MergeFactorsPartial(const Phrase ©, int start)
|
||||
{
|
||||
size_t size = copy.GetSize();
|
||||
for (size_t currPos = start; currPos < start + size; currPos++)
|
||||
{
|
||||
for (unsigned int currFactor = 0 ; currFactor < NUM_FACTORS ; currFactor++)
|
||||
{
|
||||
FactorType factorType = static_cast<FactorType>(currFactor);
|
||||
const Factor *factor = copy.GetFactor(currPos - start, factorType);
|
||||
if (factor != NULL)
|
||||
SetFactor(currPos, factorType, factor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Phrase::MergeFactors(const Phrase ©, FactorType factorType)
|
||||
{
|
||||
assert(GetSize() == copy.GetSize());
|
||||
@ -357,6 +372,28 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase) const
|
||||
|
||||
}
|
||||
|
||||
bool Phrase::IsCompatiblePartial(const Phrase &inputPhrase, int start) const
|
||||
{
|
||||
const size_t size = inputPhrase.GetSize();
|
||||
|
||||
for (size_t currPos = start; currPos < start + size; currPos++)
|
||||
{
|
||||
for (unsigned int currFactor = 0 ; currFactor < NUM_FACTORS ; currFactor++)
|
||||
{
|
||||
FactorType factorType = static_cast<FactorType>(currFactor);
|
||||
const Factor *thisFactor = GetFactor(currPos, factorType)
|
||||
,*inputFactor = inputPhrase.GetFactor(currPos - start, factorType);
|
||||
if (thisFactor != NULL && inputFactor != NULL && thisFactor != inputFactor)
|
||||
return false;
|
||||
//cerr << "\t* " << thisFactor << " is compatible with " << inputFactor << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
//cerr << inputPhrase << " is compatible with " << *this << "\n";
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
bool Phrase::IsCompatible(const Phrase &inputPhrase, FactorType factorType) const
|
||||
{
|
||||
if (inputPhrase.GetSize() != GetSize()) { return false; }
|
||||
|
@ -64,12 +64,14 @@ public:
|
||||
, FactorCollection &factorCollection);
|
||||
|
||||
void MergeFactors(const Phrase ©);
|
||||
void MergeFactorsPartial(const Phrase ©, int start = 0);
|
||||
//! copy a single factor (specified by factorType)
|
||||
void MergeFactors(const Phrase ©, FactorType factorType);
|
||||
//! copy all factors specified in factorVec and none others
|
||||
void MergeFactors(const Phrase ©, const std::vector<FactorType>& factorVec);
|
||||
|
||||
// must run IsCompatible() to ensure incompatible factors aren't being overwritten
|
||||
bool IsCompatiblePartial(const Phrase &inputPhrase, int start) const;
|
||||
bool IsCompatible(const Phrase &inputPhrase) const;
|
||||
bool IsCompatible(const Phrase &inputPhrase, FactorType factorType) const;
|
||||
bool IsCompatible(const Phrase &inputPhrase, const std::vector<FactorType>& factorVec) const;
|
||||
|
@ -124,8 +124,10 @@ private:
|
||||
off_t startPos;
|
||||
FILE* f;
|
||||
public:
|
||||
#if 0
|
||||
#ifdef DEBUG
|
||||
DECLAREMEMSTAT(Self);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
PrefixTreeF(FILE* f_=0) : f(f_) {if(f) read();}
|
||||
@ -271,7 +273,7 @@ public:
|
||||
|
||||
};
|
||||
template<typename T,typename D> D PrefixTreeF<T,D>::def;
|
||||
#ifdef DEBUG
|
||||
#if 0 //def DEBUG
|
||||
template<typename T,typename D> MemoryStatsPrinter< PrefixTreeF<T,D> > PrefixTreeF<T,D>::memStat("PrefixTreeF<T,D>",0);
|
||||
#endif
|
||||
#endif
|
||||
|
@ -19,6 +19,7 @@ License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include <algorithm>
|
||||
#include "TranslationOptionCollection.h"
|
||||
#include "Sentence.h"
|
||||
#include "DecodeStep.h"
|
||||
@ -59,7 +60,7 @@ TranslationOptionCollection::~TranslationOptionCollection()
|
||||
{
|
||||
for (size_t endPos = startPos ; endPos < size ; ++endPos)
|
||||
{
|
||||
RemoveAllInColl<TranslationOptionList::iterator>(GetTranslationOptionList(startPos, endPos));
|
||||
RemoveAllInColl(GetTranslationOptionList(startPos, endPos));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -70,20 +71,39 @@ bool CompareTranslationOption(const TranslationOption *a, const TranslationOptio
|
||||
return a->GetFutureScore() > b->GetFutureScore();
|
||||
}
|
||||
|
||||
void TranslationOptionCollection::ProcessUnknownWord()
|
||||
{
|
||||
// create unknown words for 1 word coverage where we don't have any trans options
|
||||
size_t size = m_source.GetSize();
|
||||
vector<bool> process(size);
|
||||
fill(process.begin(), process.end(), true);
|
||||
|
||||
for (size_t startPos = 0 ; startPos < size ; ++startPos)
|
||||
{
|
||||
for (size_t endPos = startPos ; endPos < size ; ++endPos)
|
||||
{
|
||||
TranslationOptionList &fullList = GetTranslationOptionList(startPos, endPos);
|
||||
size_t s = fullList.size();
|
||||
if (s > 0)
|
||||
{
|
||||
fill(process.begin() + startPos, process.begin() + endPos + 1, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t currPos = 0 ; currPos < size ; ++currPos)
|
||||
{
|
||||
if (process[currPos])
|
||||
ProcessUnknownWord(currPos, *m_factorCollection);
|
||||
}
|
||||
}
|
||||
|
||||
/** pruning: only keep the top n (m_maxNoTransOptPerCoverage) elements */
|
||||
void TranslationOptionCollection::Prune()
|
||||
{
|
||||
size_t size = m_source.GetSize();
|
||||
ProcessUnknownWord();
|
||||
|
||||
// create unknown words for 1 word coverage where we don't have any trans options
|
||||
for (size_t startPos = 0 ; startPos < size ; ++startPos)
|
||||
{
|
||||
TranslationOptionList &fullList = GetTranslationOptionList(startPos, startPos);
|
||||
if (fullList.size() == 0)
|
||||
{
|
||||
ProcessUnknownWord(startPos, *m_factorCollection);
|
||||
}
|
||||
}
|
||||
size_t size = m_source.GetSize();
|
||||
|
||||
// prune to max no. of trans opt
|
||||
if (m_maxNoTransOptPerCoverage == 0)
|
||||
@ -210,6 +230,7 @@ void TranslationOptionCollection::CreateTranslationOptions(
|
||||
const list < DecodeStep* > &decodeStepList
|
||||
, FactorCollection &factorCollection)
|
||||
{
|
||||
m_dstep = (DecodeStep *) &decodeStepList.front();
|
||||
m_factorCollection = &factorCollection;
|
||||
|
||||
for (size_t startPos = 0 ; startPos < m_source.GetSize() ; startPos++)
|
||||
@ -325,13 +346,17 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const FactorArray &sourc
|
||||
|
||||
for (unsigned int currFactor = 0 ; currFactor < NUM_FACTORS ; currFactor++)
|
||||
{
|
||||
FactorType factorType = static_cast<FactorType>(currFactor);
|
||||
if (m_dstep->GetDictionaryPtr()->GetOutputFactorMask().test(currFactor)) // only set bits for this pt
|
||||
{
|
||||
|
||||
const Factor *sourceFactor = sourceWord[currFactor];
|
||||
if (sourceFactor == NULL)
|
||||
targetWord[factorType] = factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR);
|
||||
else
|
||||
targetWord[factorType] = factorCollection.AddFactor(Output, factorType, sourceFactor->GetString());
|
||||
FactorType factorType = static_cast<FactorType>(currFactor);
|
||||
|
||||
const Factor *sourceFactor = sourceWord[currFactor];
|
||||
if (sourceFactor == NULL)
|
||||
targetWord[factorType] = factorCollection.AddFactor(Output, factorType, UNKNOWN_FACTOR);
|
||||
else
|
||||
targetWord[factorType] = factorCollection.AddFactor(Output, factorType, sourceFactor->GetString());
|
||||
}
|
||||
}
|
||||
|
||||
targetPhrase.SetScore();
|
||||
@ -399,11 +424,13 @@ void TranslationOptionCollection::ProcessInitialTranslation(
|
||||
TRACE_ERR(endl);
|
||||
}
|
||||
}
|
||||
#if 0 // do this elsewhere now
|
||||
// handling unknown words
|
||||
else if (wordsRange.GetWordsCount() == 1)
|
||||
{
|
||||
|
||||
ProcessUnknownWord(startPos, factorCollection);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/** add translation option to the list
|
||||
|
@ -54,6 +54,7 @@ class TranslationOptionCollection
|
||||
friend std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& coll);
|
||||
TranslationOptionCollection(const TranslationOptionCollection&); /*< no copy constructor */
|
||||
protected:
|
||||
DecodeStep *m_dstep;
|
||||
std::vector< std::vector< TranslationOptionList > > m_collection; /*< contains translation options */
|
||||
InputType const &m_source;
|
||||
SquareMatrix m_futureScore; /*< matrix of future costs for parts of the sentence */
|
||||
@ -70,6 +71,7 @@ protected:
|
||||
, PartialTranslOptColl &outputPartialTranslOptColl
|
||||
, size_t startPos, size_t endPos );
|
||||
|
||||
void ProcessUnknownWord();
|
||||
virtual void ProcessOneUnknownWord(const FactorArray &sourceWord
|
||||
, size_t sourcePos
|
||||
, FactorCollection &factorCollection);
|
||||
|
@ -144,7 +144,7 @@ std::string Join(const std::string& delimiter, const std::vector<T>& items)
|
||||
std::ostringstream outstr;
|
||||
if(items.size() == 0) return "";
|
||||
outstr << items[0];
|
||||
for(unsigned int i = 1; i < items.size(); i++) outstr << " " << items[i];
|
||||
for(unsigned int i = 1; i < items.size(); i++) outstr << delimiter << items[i];
|
||||
return outstr.str();
|
||||
}
|
||||
|
||||
@ -215,11 +215,10 @@ inline float CalcTranslationScore(const std::vector<float> &scoreVector,
|
||||
return out.str(); \
|
||||
} \
|
||||
|
||||
template<class ITER, class COLL>
|
||||
template<class COLL>
|
||||
void RemoveAllInColl(COLL &coll)
|
||||
{
|
||||
ITER iter;
|
||||
for (iter = coll.begin() ; iter != coll.end() ; ++iter)
|
||||
for (typename COLL::iterator iter = coll.begin() ; iter != coll.end() ; ++iter)
|
||||
{
|
||||
delete (*iter);
|
||||
}
|
||||
@ -237,7 +236,9 @@ template<typename T> inline void ShrinkToFit(T& v) {
|
||||
/***
|
||||
* include checks for null return value, and helpful print statements
|
||||
*/
|
||||
/*
|
||||
void* xmalloc(unsigned int numBytes);
|
||||
void* xrealloc(void* ptr, unsigned int numBytes);
|
||||
#define malloc(x) xmalloc(x)
|
||||
#define realloc(x, n) xrealloc(x, n)
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user