- add assignment operator for Phrase class

(default implementation segfaults because of mempool)
- store source phrase pointer in confusion net case


git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@465 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
zens 2006-08-02 19:31:50 +00:00
parent 6ff37224f1
commit 5deccd7744
12 changed files with 131 additions and 37 deletions

View File

@ -21,7 +21,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <cassert>
#include <iostream>
#include <limits>
#include <assert.h>
#include "TranslationOption.h"
#include "TranslationOptionCollection.h"
#include "DummyScoreProducers.h"
@ -43,6 +42,7 @@ ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
: m_prevHypo(NULL)
, m_targetPhrase(emptyTarget)
, m_sourcePhrase(0)
, m_sourceCompleted(source.GetSize())
, m_sourceInput(source)
, m_currSourceWordsRange(NOT_FOUND, NOT_FOUND)
@ -65,6 +65,7 @@ Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
: m_prevHypo(&prevHypo)
, m_targetPhrase(transOpt.GetTargetPhrase())
, m_sourcePhrase(0)
, m_sourceCompleted (prevHypo.m_sourceCompleted )
, m_sourceInput (prevHypo.m_sourceInput)
, m_currSourceWordsRange (transOpt.GetSourceWordsRange())
@ -453,3 +454,17 @@ ostream& operator<<(ostream& out, const Hypothesis& hypothesis)
}
std::string Hypothesis::GetSourcePhraseStringRep() const
{
if(m_sourcePhrase) {
assert(m_sourcePhrase->ToString()==m_sourcePhrase->GetStringRep(WordsRange(0,m_sourcePhrase->GetSize()-1)));
return m_sourcePhrase->ToString();
}
else
return m_sourceInput.GetStringRep(m_currSourceWordsRange);
}
std::string Hypothesis::GetTargetPhraseStringRep() const
{
return m_targetPhrase.GetStringRep(m_currTargetWordsRange);
}

View File

@ -56,6 +56,7 @@ protected:
const Hypothesis* m_prevHypo;
const Phrase &m_targetPhrase; //target phrase being created at the current decoding step
Phrase const* m_sourcePhrase;
WordsBitmap m_sourceCompleted;
//TODO: how to integrate this into confusion network framework; what if
//it's a confusion network in the end???
@ -176,6 +177,9 @@ public:
return m_sourceInput;
}
std::string GetSourcePhraseStringRep() const;
std::string GetTargetPhraseStringRep() const;
// curr - pos is relative from CURRENT hypothesis's starting ind ex
// (ie, start of sentence would be some negative number, which is
// not allowed- USE WITH CAUTION)

View File

@ -19,7 +19,7 @@ License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "assert.h"
#include <cassert>
#include <algorithm>
#include <sstream>
#include <string>
@ -38,16 +38,41 @@ Phrase::Phrase(const Phrase &copy)
,m_arraySize(copy.m_arraySize)
,m_memPoolIndex(copy.m_memPoolIndex)
{
assert(m_memPoolIndex<s_memPool.size() && s_memPool[m_memPoolIndex]);
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->alloc();
memcpy(m_factorArray, copy.m_factorArray, m_phraseSize * sizeof(FactorArray));
}
Phrase& Phrase::operator=(const Phrase& x)
{
if(this!=&x)
{
if(m_factorArray)
{
assert(m_memPoolIndex<s_memPool.size());
s_memPool[m_memPoolIndex]->free((char*)m_factorArray);
}
m_direction=x.m_direction;
m_phraseSize=x.m_phraseSize;
m_arraySize=x.m_arraySize;
m_memPoolIndex=x.m_memPoolIndex;
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->alloc();
memcpy(m_factorArray, x.m_factorArray, m_phraseSize * sizeof(FactorArray));
}
return *this;
}
Phrase::Phrase(FactorDirection direction)
: m_direction(direction)
, m_phraseSize(0)
, m_arraySize(ARRAY_SIZE_INCR)
, m_memPoolIndex(0)
{
assert(m_memPoolIndex<s_memPool.size());
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->alloc();
}
@ -74,7 +99,17 @@ Phrase::Phrase(FactorDirection direction, const vector< const Word* > &mergeWord
Phrase::~Phrase()
{
s_memPool[m_memPoolIndex]->free((char*)m_factorArray);
// RZ:
// will segFault if Phrase was default constructed and AddWord was never called
// not sure if this is really the intended behaviour
// assertion failure is better than segFault, but if(m_factorArray) might be more appropriate
//assert(m_factorArray);
if(m_factorArray)
{
assert(m_memPoolIndex<s_memPool.size());
assert((char*)m_factorArray);
s_memPool[m_memPoolIndex]->free((char*)m_factorArray);
}
}
void Phrase::MergeFactors(const Phrase &copy)

View File

@ -34,7 +34,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
class Phrase
{
friend std::ostream& operator<<(std::ostream&, const Phrase&);
protected:
private:
static std::vector<mempool*> s_memPool;
FactorDirection m_direction;
@ -50,10 +50,12 @@ public:
inline Phrase()
: m_phraseSize(0)
, m_arraySize(0)
, m_memPoolIndex(0)
, m_factorArray(NULL)
{ // shouldn't be used. only for map
}
Phrase(const Phrase &copy);
Phrase& operator=(const Phrase&);
Phrase(FactorDirection direction);
Phrase(FactorDirection direction, const std::vector< const Word* > &mergeWords);
@ -110,6 +112,8 @@ public:
std::string GetStringRep(const WordsRange &wordsRange) const;
void push_back(Word const& w) {Word::Copy(AddWord(),w.GetFactorArray());}
TO_STRING;
// used to insert & find phrase in dictionary

View File

@ -19,6 +19,10 @@ class PPimp;
class PhraseDictionaryTree : public Dictionary {
PDTimp *imp; //implementation
PhraseDictionaryTree(); // not implemented
PhraseDictionaryTree(const PhraseDictionaryTree&); //not implemented
void operator=(const PhraseDictionaryTree&); //not implemented
public:
PhraseDictionaryTree(size_t noScoreComponent);

View File

@ -9,6 +9,7 @@
#include "Input.h"
#include "ConfusionNet.h"
#include "StaticData.h"
#include "UniqueObject.h"
inline bool existsFile(const char* filename) {
struct stat mystat;
@ -56,6 +57,7 @@ struct PDTAimp {
m_tgtColls.clear();
m_cache.clear();
m_rangeCache.clear();
Phrase dummy(Input); uniqueObject(dummy,1);
}
void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)
@ -175,10 +177,11 @@ struct PDTAimp {
Range range;
float score;
unsigned realWords;
Phrase src;
State() : range(0,0),score(0.0),realWords(0) {}
State(size_t b,size_t e,const PPtr& v,float sc=0.0,unsigned rw=0) : ptr(v),range(b,e),score(sc),realWords(rw) {}
State(Range const& r,const PPtr& v,float sc=0.0,unsigned rw=0) : ptr(v),range(r),score(sc),realWords(rw) {}
State() : range(0,0),score(0.0),realWords(0),src(Input) {}
State(size_t b,size_t e,const PPtr& v,float sc=0.0,unsigned rw=0) : ptr(v),range(b,e),score(sc),realWords(rw),src(Input) {}
State(Range const& r,const PPtr& v,float sc=0.0,unsigned rw=0) : ptr(v),range(r),score(sc),realWords(rw),src(Input) {}
size_t begin() const {return range.first;}
size_t end() const {return range.second;}
@ -188,7 +191,8 @@ struct PDTAimp {
void CreateTargetPhrase(TargetPhrase& targetPhrase,
StringTgtCand::first_type const& factorStrings,
StringTgtCand::second_type const& scoreVector) const
StringTgtCand::second_type const& scoreVector,
Phrase const* srcPtr=0) const
{
for(size_t k=0;k<factorStrings.size();++k)
@ -199,6 +203,7 @@ struct PDTAimp {
fa[m_output[l]]=m_factorCollection->AddFactor(Output, m_output[l], factors[l]);
}
targetPhrase.SetScore(m_obj, scoreVector, m_weights, *m_languageModels, m_weightWP);
targetPhrase.SetSourcePhrase(srcPtr);
}
@ -213,7 +218,7 @@ struct PDTAimp {
}
std::sort(costs.begin(),nth,std::greater<std::pair<float,size_t> >());
// convert into TargerPhraseCollection
// convert into TargetPhraseCollection
TargetPhraseCollection *rv=new TargetPhraseCollection;
for(std::vector<std::pair<float,size_t> >::iterator it=costs.begin();it!=nth;++it)
rv->push_back(tCands[it->second]);
@ -224,8 +229,9 @@ struct PDTAimp {
struct TScores {
float total;
StringTgtCand::second_type trans;
Phrase const* src;
TScores() : total(0.0) {}
TScores() : total(0.0),src(0) {}
};
void CacheSource(ConfusionNet const& src)
@ -260,19 +266,23 @@ struct PDTAimp {
{
Range newRange(curr.begin(),curr.end()+1);
float newScore=curr.GetScore()+currCol[colidx].second;
Phrase newSrc(curr.src);
newSrc.push_back(w);
if(newRange.second<src.GetSize())
stack.push_back(State(newRange,nextP,newScore,newRealWords));
{
stack.push_back(State(newRange,nextP,newScore,newRealWords));
stack.back().src=newSrc;
}
std::vector<StringTgtCand> tcands;
m_dict->GetTargetCandidates(nextP,tcands);
if(tcands.size())
{
E2Costs& e2costs=cov2cand[newRange];
Phrase const* srcPtr=uniqueObject(newSrc);
for(size_t i=0;i<tcands.size();++i)
{
std::vector<float> nscores(tcands[i].second.size()+m_numInputScores,0.0);
std::transform(tcands[i].second.begin(),tcands[i].second.end(),nscores.begin(),TransformScore);
switch(m_numInputScores)
@ -295,6 +305,7 @@ struct PDTAimp {
{
scores.total=score;
scores.trans=nscores;
scores.src=srcPtr;
}
}
}
@ -318,7 +329,7 @@ struct PDTAimp {
{
TScores const & scores=j->second;
TargetPhrase targetPhrase(Output);
CreateTargetPhrase(targetPhrase,j->first,scores.trans);
CreateTargetPhrase(targetPhrase,j->first,scores.trans,scores.src);
costs.push_back(std::make_pair(targetPhrase.GetFutureScore(),tCands.size()));
tCands.push_back(targetPhrase);
}

View File

@ -16,6 +16,9 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionaryBase {
typedef PhraseDictionaryBase MyBase;
PDTAimp *imp;
friend class PDTAimp;
PhraseDictionaryTreeAdaptor();
PhraseDictionaryTreeAdaptor(const PhraseDictionaryTreeAdaptor&);
void operator=(const PhraseDictionaryTreeAdaptor&);
public:
PhraseDictionaryTreeAdaptor(size_t noScoreComponent,unsigned numInputScores);
virtual ~PhraseDictionaryTreeAdaptor();

View File

@ -31,7 +31,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
TargetPhrase::TargetPhrase(FactorDirection direction)
:Phrase(direction),m_transScore(0.0), m_ngramScore(0.0), m_fullScore(0.0)
:Phrase(direction),m_transScore(0.0), m_ngramScore(0.0), m_fullScore(0.0), m_sourcePhrase(0)
{
}

View File

@ -40,9 +40,10 @@ protected:
float m_transScore, m_ngramScore, m_fullScore;
ScoreComponentCollection2 m_scoreBreakdown;
// in case of confusion net, ptr to source phrase
Phrase const* m_sourcePhrase;
public:
TargetPhrase(FactorDirection direction);
TargetPhrase(FactorDirection direction=Output);
//! used by the unknown word handler- these targets
//! don't have a translation score, so wp is the only thing used
@ -95,6 +96,15 @@ public:
return m_scoreBreakdown;
}
void SetSourcePhrase(Phrase const* p)
{
m_sourcePhrase=p;
}
Phrase const* GetSourcePhrase() const
{
return m_sourcePhrase;
}
TO_STRING;
};

View File

@ -29,10 +29,10 @@ using namespace std;
TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase)
: m_phrase(targetPhrase)
,m_sourceWordsRange (wordsRange)
: m_targetPhrase(targetPhrase),m_sourcePhrase(targetPhrase.GetSourcePhrase())
,m_sourceWordsRange (wordsRange)
{ // used by initial translation step
// set score
m_scoreGen = 0;
m_scoreTrans = targetPhrase.GetTranslationScore();
@ -40,9 +40,10 @@ TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetP
}
TranslationOption::TranslationOption(const TranslationOption &copy, const TargetPhrase &targetPhrase)
: m_phrase(targetPhrase)
,m_sourceWordsRange (copy.m_sourceWordsRange)
,m_scoreBreakdown(copy.m_scoreBreakdown)
: m_targetPhrase(targetPhrase)
,m_sourcePhrase(copy.m_sourcePhrase) // take source phrase pointer from initial translation option
,m_sourceWordsRange (copy.m_sourceWordsRange)
,m_scoreBreakdown(copy.m_scoreBreakdown)
{ // used in creating the next translation step
m_scoreGen = copy.GetGenerationScore();
m_scoreTrans = copy.GetTranslationScore() + targetPhrase.GetTranslationScore();
@ -55,7 +56,7 @@ TranslationOption::TranslationOption(const TranslationOption &copy
, const GenerationDictionary *generationDictionary
, float generationScore
, float weight)
: m_phrase (inputPhrase)
: m_targetPhrase (inputPhrase),m_sourcePhrase(copy.m_sourcePhrase)
, m_sourceWordsRange (copy.m_sourceWordsRange)
, m_scoreBreakdown(copy.m_scoreBreakdown)
{ // used in creating the next generation step
@ -67,7 +68,7 @@ TranslationOption::TranslationOption(const TranslationOption &copy
}
TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase, int /*whatever*/)
: m_phrase(targetPhrase)
: m_targetPhrase(targetPhrase)
,m_sourceWordsRange (wordsRange)
,m_scoreTrans(0)
,m_scoreGen(0)
@ -78,10 +79,10 @@ TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetP
TranslationOption *TranslationOption::MergeTranslation(const TargetPhrase &targetPhrase) const
{
if (m_phrase.IsCompatible(targetPhrase))
if (m_targetPhrase.IsCompatible(targetPhrase))
{
TargetPhrase mergePhrase(targetPhrase);
mergePhrase.MergeFactors(m_phrase);
mergePhrase.MergeFactors(m_targetPhrase);
TranslationOption *newTransOpt = new TranslationOption(*this, mergePhrase);
return newTransOpt;
}
@ -96,10 +97,10 @@ TranslationOption *TranslationOption::MergeGeneration(const Phrase &inputPhrase
, float generationScore
, float weight) const
{
if (m_phrase.IsCompatible(inputPhrase))
if (m_targetPhrase.IsCompatible(inputPhrase))
{
Phrase mergePhrase(inputPhrase);
mergePhrase.MergeFactors(m_phrase);
mergePhrase.MergeFactors(m_targetPhrase);
TranslationOption *newTransOpt = new TranslationOption(*this, mergePhrase, generationDictionary, generationScore, weight);
return newTransOpt;
}

View File

@ -40,11 +40,12 @@ class GenerationDictionary;
*/
class TranslationOption
{
friend std::ostream& operator<<(std::ostream& out, const TranslationOption& possibleTranslation);
friend std::ostream& operator<<(std::ostream& out, const TranslationOption& possibleTranslation);
protected:
const Phrase m_phrase;
const Phrase m_targetPhrase;
Phrase const* m_sourcePhrase;
const WordsRange m_sourceWordsRange;
float m_scoreTrans, m_scoreGen, m_futureScore, m_ngramScore;
@ -75,12 +76,16 @@ public:
inline const Phrase &GetTargetPhrase() const
{
return m_phrase;
return m_targetPhrase;
}
inline const WordsRange &GetSourceWordsRange() const
{
return m_sourceWordsRange;
}
Phrase const* GetSourcePhrase() const
{
return m_sourcePhrase;
}
bool Overlap(const Hypothesis &hypothesis) const;
/***
@ -139,7 +144,7 @@ public:
*/
inline bool IsDeletionOption() const
{
return m_phrase.GetSize() == 0;
return m_targetPhrase.GetSize() == 0;
}
void CalcScore(const LMList &allLM, float weightWordPenalty);

View File

@ -94,15 +94,17 @@ char * mempool::alloc(){
ptr = free_list = block_list->block;
for (int i=0;i<block_size-1;i++) {
*(char **)ptr = ptr + item_size;
ptr = ptr + item_size;
*(char **)ptr = ptr + item_size;
ptr = ptr + item_size;
}
*(char **)ptr=NULL;
blocknum++;
}
assert(free_list);
ptr = free_list;
free_list=*(char **)ptr;