mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-20 15:48:05 +03:00
- add assignment operator for Phrase class
(default implementation segfaults because of mempool) - store source phrase pointer in confusion net case git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@465 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
6ff37224f1
commit
5deccd7744
@ -21,7 +21,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <assert.h>
|
||||
#include "TranslationOption.h"
|
||||
#include "TranslationOptionCollection.h"
|
||||
#include "DummyScoreProducers.h"
|
||||
@ -43,6 +42,7 @@ ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
|
||||
Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
|
||||
: m_prevHypo(NULL)
|
||||
, m_targetPhrase(emptyTarget)
|
||||
, m_sourcePhrase(0)
|
||||
, m_sourceCompleted(source.GetSize())
|
||||
, m_sourceInput(source)
|
||||
, m_currSourceWordsRange(NOT_FOUND, NOT_FOUND)
|
||||
@ -65,6 +65,7 @@ Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
|
||||
Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
|
||||
: m_prevHypo(&prevHypo)
|
||||
, m_targetPhrase(transOpt.GetTargetPhrase())
|
||||
, m_sourcePhrase(0)
|
||||
, m_sourceCompleted (prevHypo.m_sourceCompleted )
|
||||
, m_sourceInput (prevHypo.m_sourceInput)
|
||||
, m_currSourceWordsRange (transOpt.GetSourceWordsRange())
|
||||
@ -453,3 +454,17 @@ ostream& operator<<(ostream& out, const Hypothesis& hypothesis)
|
||||
}
|
||||
|
||||
|
||||
std::string Hypothesis::GetSourcePhraseStringRep() const
|
||||
{
|
||||
if(m_sourcePhrase) {
|
||||
assert(m_sourcePhrase->ToString()==m_sourcePhrase->GetStringRep(WordsRange(0,m_sourcePhrase->GetSize()-1)));
|
||||
return m_sourcePhrase->ToString();
|
||||
}
|
||||
else
|
||||
return m_sourceInput.GetStringRep(m_currSourceWordsRange);
|
||||
|
||||
}
|
||||
std::string Hypothesis::GetTargetPhraseStringRep() const
|
||||
{
|
||||
return m_targetPhrase.GetStringRep(m_currTargetWordsRange);
|
||||
}
|
||||
|
@ -56,6 +56,7 @@ protected:
|
||||
|
||||
const Hypothesis* m_prevHypo;
|
||||
const Phrase &m_targetPhrase; //target phrase being created at the current decoding step
|
||||
Phrase const* m_sourcePhrase;
|
||||
WordsBitmap m_sourceCompleted;
|
||||
//TODO: how to integrate this into confusion network framework; what if
|
||||
//it's a confusion network in the end???
|
||||
@ -176,6 +177,9 @@ public:
|
||||
return m_sourceInput;
|
||||
}
|
||||
|
||||
std::string GetSourcePhraseStringRep() const;
|
||||
std::string GetTargetPhraseStringRep() const;
|
||||
|
||||
// curr - pos is relative from CURRENT hypothesis's starting ind ex
|
||||
// (ie, start of sentence would be some negative number, which is
|
||||
// not allowed- USE WITH CAUTION)
|
||||
|
@ -19,7 +19,7 @@ License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "assert.h"
|
||||
#include <cassert>
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
@ -38,16 +38,41 @@ Phrase::Phrase(const Phrase ©)
|
||||
,m_arraySize(copy.m_arraySize)
|
||||
,m_memPoolIndex(copy.m_memPoolIndex)
|
||||
{
|
||||
assert(m_memPoolIndex<s_memPool.size() && s_memPool[m_memPoolIndex]);
|
||||
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->alloc();
|
||||
memcpy(m_factorArray, copy.m_factorArray, m_phraseSize * sizeof(FactorArray));
|
||||
}
|
||||
|
||||
Phrase& Phrase::operator=(const Phrase& x)
|
||||
{
|
||||
if(this!=&x)
|
||||
{
|
||||
|
||||
if(m_factorArray)
|
||||
{
|
||||
assert(m_memPoolIndex<s_memPool.size());
|
||||
s_memPool[m_memPoolIndex]->free((char*)m_factorArray);
|
||||
}
|
||||
|
||||
m_direction=x.m_direction;
|
||||
m_phraseSize=x.m_phraseSize;
|
||||
m_arraySize=x.m_arraySize;
|
||||
m_memPoolIndex=x.m_memPoolIndex;
|
||||
|
||||
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->alloc();
|
||||
memcpy(m_factorArray, x.m_factorArray, m_phraseSize * sizeof(FactorArray));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
Phrase::Phrase(FactorDirection direction)
|
||||
: m_direction(direction)
|
||||
, m_phraseSize(0)
|
||||
, m_arraySize(ARRAY_SIZE_INCR)
|
||||
, m_memPoolIndex(0)
|
||||
{
|
||||
assert(m_memPoolIndex<s_memPool.size());
|
||||
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->alloc();
|
||||
}
|
||||
|
||||
@ -74,7 +99,17 @@ Phrase::Phrase(FactorDirection direction, const vector< const Word* > &mergeWord
|
||||
|
||||
Phrase::~Phrase()
|
||||
{
|
||||
s_memPool[m_memPoolIndex]->free((char*)m_factorArray);
|
||||
// RZ:
|
||||
// will segFault if Phrase was default constructed and AddWord was never called
|
||||
// not sure if this is really the intended behaviour
|
||||
// assertion failure is better than segFault, but if(m_factorArray) might be more appropriate
|
||||
//assert(m_factorArray);
|
||||
if(m_factorArray)
|
||||
{
|
||||
assert(m_memPoolIndex<s_memPool.size());
|
||||
assert((char*)m_factorArray);
|
||||
s_memPool[m_memPoolIndex]->free((char*)m_factorArray);
|
||||
}
|
||||
}
|
||||
|
||||
void Phrase::MergeFactors(const Phrase ©)
|
||||
|
@ -34,7 +34,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
class Phrase
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream&, const Phrase&);
|
||||
protected:
|
||||
private:
|
||||
static std::vector<mempool*> s_memPool;
|
||||
|
||||
FactorDirection m_direction;
|
||||
@ -50,10 +50,12 @@ public:
|
||||
inline Phrase()
|
||||
: m_phraseSize(0)
|
||||
, m_arraySize(0)
|
||||
, m_memPoolIndex(0)
|
||||
, m_factorArray(NULL)
|
||||
{ // shouldn't be used. only for map
|
||||
}
|
||||
Phrase(const Phrase ©);
|
||||
Phrase& operator=(const Phrase&);
|
||||
|
||||
Phrase(FactorDirection direction);
|
||||
Phrase(FactorDirection direction, const std::vector< const Word* > &mergeWords);
|
||||
@ -110,6 +112,8 @@ public:
|
||||
|
||||
std::string GetStringRep(const WordsRange &wordsRange) const;
|
||||
|
||||
void push_back(Word const& w) {Word::Copy(AddWord(),w.GetFactorArray());}
|
||||
|
||||
TO_STRING;
|
||||
|
||||
// used to insert & find phrase in dictionary
|
||||
|
@ -19,6 +19,10 @@ class PPimp;
|
||||
|
||||
class PhraseDictionaryTree : public Dictionary {
|
||||
PDTimp *imp; //implementation
|
||||
|
||||
PhraseDictionaryTree(); // not implemented
|
||||
PhraseDictionaryTree(const PhraseDictionaryTree&); //not implemented
|
||||
void operator=(const PhraseDictionaryTree&); //not implemented
|
||||
public:
|
||||
PhraseDictionaryTree(size_t noScoreComponent);
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "Input.h"
|
||||
#include "ConfusionNet.h"
|
||||
#include "StaticData.h"
|
||||
#include "UniqueObject.h"
|
||||
|
||||
inline bool existsFile(const char* filename) {
|
||||
struct stat mystat;
|
||||
@ -56,6 +57,7 @@ struct PDTAimp {
|
||||
m_tgtColls.clear();
|
||||
m_cache.clear();
|
||||
m_rangeCache.clear();
|
||||
Phrase dummy(Input); uniqueObject(dummy,1);
|
||||
}
|
||||
|
||||
void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)
|
||||
@ -175,10 +177,11 @@ struct PDTAimp {
|
||||
Range range;
|
||||
float score;
|
||||
unsigned realWords;
|
||||
Phrase src;
|
||||
|
||||
State() : range(0,0),score(0.0),realWords(0) {}
|
||||
State(size_t b,size_t e,const PPtr& v,float sc=0.0,unsigned rw=0) : ptr(v),range(b,e),score(sc),realWords(rw) {}
|
||||
State(Range const& r,const PPtr& v,float sc=0.0,unsigned rw=0) : ptr(v),range(r),score(sc),realWords(rw) {}
|
||||
State() : range(0,0),score(0.0),realWords(0),src(Input) {}
|
||||
State(size_t b,size_t e,const PPtr& v,float sc=0.0,unsigned rw=0) : ptr(v),range(b,e),score(sc),realWords(rw),src(Input) {}
|
||||
State(Range const& r,const PPtr& v,float sc=0.0,unsigned rw=0) : ptr(v),range(r),score(sc),realWords(rw),src(Input) {}
|
||||
|
||||
size_t begin() const {return range.first;}
|
||||
size_t end() const {return range.second;}
|
||||
@ -188,7 +191,8 @@ struct PDTAimp {
|
||||
|
||||
void CreateTargetPhrase(TargetPhrase& targetPhrase,
|
||||
StringTgtCand::first_type const& factorStrings,
|
||||
StringTgtCand::second_type const& scoreVector) const
|
||||
StringTgtCand::second_type const& scoreVector,
|
||||
Phrase const* srcPtr=0) const
|
||||
{
|
||||
|
||||
for(size_t k=0;k<factorStrings.size();++k)
|
||||
@ -199,6 +203,7 @@ struct PDTAimp {
|
||||
fa[m_output[l]]=m_factorCollection->AddFactor(Output, m_output[l], factors[l]);
|
||||
}
|
||||
targetPhrase.SetScore(m_obj, scoreVector, m_weights, *m_languageModels, m_weightWP);
|
||||
targetPhrase.SetSourcePhrase(srcPtr);
|
||||
}
|
||||
|
||||
|
||||
@ -213,7 +218,7 @@ struct PDTAimp {
|
||||
}
|
||||
std::sort(costs.begin(),nth,std::greater<std::pair<float,size_t> >());
|
||||
|
||||
// convert into TargerPhraseCollection
|
||||
// convert into TargetPhraseCollection
|
||||
TargetPhraseCollection *rv=new TargetPhraseCollection;
|
||||
for(std::vector<std::pair<float,size_t> >::iterator it=costs.begin();it!=nth;++it)
|
||||
rv->push_back(tCands[it->second]);
|
||||
@ -224,8 +229,9 @@ struct PDTAimp {
|
||||
struct TScores {
|
||||
float total;
|
||||
StringTgtCand::second_type trans;
|
||||
Phrase const* src;
|
||||
|
||||
TScores() : total(0.0) {}
|
||||
TScores() : total(0.0),src(0) {}
|
||||
};
|
||||
|
||||
void CacheSource(ConfusionNet const& src)
|
||||
@ -260,19 +266,23 @@ struct PDTAimp {
|
||||
{
|
||||
Range newRange(curr.begin(),curr.end()+1);
|
||||
float newScore=curr.GetScore()+currCol[colidx].second;
|
||||
Phrase newSrc(curr.src);
|
||||
newSrc.push_back(w);
|
||||
if(newRange.second<src.GetSize())
|
||||
stack.push_back(State(newRange,nextP,newScore,newRealWords));
|
||||
|
||||
{
|
||||
stack.push_back(State(newRange,nextP,newScore,newRealWords));
|
||||
stack.back().src=newSrc;
|
||||
}
|
||||
|
||||
std::vector<StringTgtCand> tcands;
|
||||
m_dict->GetTargetCandidates(nextP,tcands);
|
||||
|
||||
if(tcands.size())
|
||||
{
|
||||
E2Costs& e2costs=cov2cand[newRange];
|
||||
|
||||
Phrase const* srcPtr=uniqueObject(newSrc);
|
||||
for(size_t i=0;i<tcands.size();++i)
|
||||
{
|
||||
|
||||
std::vector<float> nscores(tcands[i].second.size()+m_numInputScores,0.0);
|
||||
std::transform(tcands[i].second.begin(),tcands[i].second.end(),nscores.begin(),TransformScore);
|
||||
switch(m_numInputScores)
|
||||
@ -295,6 +305,7 @@ struct PDTAimp {
|
||||
{
|
||||
scores.total=score;
|
||||
scores.trans=nscores;
|
||||
scores.src=srcPtr;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -318,7 +329,7 @@ struct PDTAimp {
|
||||
{
|
||||
TScores const & scores=j->second;
|
||||
TargetPhrase targetPhrase(Output);
|
||||
CreateTargetPhrase(targetPhrase,j->first,scores.trans);
|
||||
CreateTargetPhrase(targetPhrase,j->first,scores.trans,scores.src);
|
||||
costs.push_back(std::make_pair(targetPhrase.GetFutureScore(),tCands.size()));
|
||||
tCands.push_back(targetPhrase);
|
||||
}
|
||||
|
@ -16,6 +16,9 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionaryBase {
|
||||
typedef PhraseDictionaryBase MyBase;
|
||||
PDTAimp *imp;
|
||||
friend class PDTAimp;
|
||||
PhraseDictionaryTreeAdaptor();
|
||||
PhraseDictionaryTreeAdaptor(const PhraseDictionaryTreeAdaptor&);
|
||||
void operator=(const PhraseDictionaryTreeAdaptor&);
|
||||
public:
|
||||
PhraseDictionaryTreeAdaptor(size_t noScoreComponent,unsigned numInputScores);
|
||||
virtual ~PhraseDictionaryTreeAdaptor();
|
||||
|
@ -31,7 +31,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
using namespace std;
|
||||
|
||||
TargetPhrase::TargetPhrase(FactorDirection direction)
|
||||
:Phrase(direction),m_transScore(0.0), m_ngramScore(0.0), m_fullScore(0.0)
|
||||
:Phrase(direction),m_transScore(0.0), m_ngramScore(0.0), m_fullScore(0.0), m_sourcePhrase(0)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -40,9 +40,10 @@ protected:
|
||||
float m_transScore, m_ngramScore, m_fullScore;
|
||||
ScoreComponentCollection2 m_scoreBreakdown;
|
||||
|
||||
// in case of confusion net, ptr to source phrase
|
||||
Phrase const* m_sourcePhrase;
|
||||
public:
|
||||
|
||||
TargetPhrase(FactorDirection direction);
|
||||
TargetPhrase(FactorDirection direction=Output);
|
||||
|
||||
//! used by the unknown word handler- these targets
|
||||
//! don't have a translation score, so wp is the only thing used
|
||||
@ -95,6 +96,15 @@ public:
|
||||
return m_scoreBreakdown;
|
||||
}
|
||||
|
||||
void SetSourcePhrase(Phrase const* p)
|
||||
{
|
||||
m_sourcePhrase=p;
|
||||
}
|
||||
Phrase const* GetSourcePhrase() const
|
||||
{
|
||||
return m_sourcePhrase;
|
||||
}
|
||||
|
||||
TO_STRING;
|
||||
};
|
||||
|
||||
|
@ -29,10 +29,10 @@ using namespace std;
|
||||
|
||||
|
||||
TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase)
|
||||
: m_phrase(targetPhrase)
|
||||
,m_sourceWordsRange (wordsRange)
|
||||
: m_targetPhrase(targetPhrase),m_sourcePhrase(targetPhrase.GetSourcePhrase())
|
||||
,m_sourceWordsRange (wordsRange)
|
||||
{ // used by initial translation step
|
||||
|
||||
|
||||
// set score
|
||||
m_scoreGen = 0;
|
||||
m_scoreTrans = targetPhrase.GetTranslationScore();
|
||||
@ -40,9 +40,10 @@ TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetP
|
||||
}
|
||||
|
||||
TranslationOption::TranslationOption(const TranslationOption ©, const TargetPhrase &targetPhrase)
|
||||
: m_phrase(targetPhrase)
|
||||
,m_sourceWordsRange (copy.m_sourceWordsRange)
|
||||
,m_scoreBreakdown(copy.m_scoreBreakdown)
|
||||
: m_targetPhrase(targetPhrase)
|
||||
,m_sourcePhrase(copy.m_sourcePhrase) // take source phrase pointer from initial translation option
|
||||
,m_sourceWordsRange (copy.m_sourceWordsRange)
|
||||
,m_scoreBreakdown(copy.m_scoreBreakdown)
|
||||
{ // used in creating the next translation step
|
||||
m_scoreGen = copy.GetGenerationScore();
|
||||
m_scoreTrans = copy.GetTranslationScore() + targetPhrase.GetTranslationScore();
|
||||
@ -55,7 +56,7 @@ TranslationOption::TranslationOption(const TranslationOption ©
|
||||
, const GenerationDictionary *generationDictionary
|
||||
, float generationScore
|
||||
, float weight)
|
||||
: m_phrase (inputPhrase)
|
||||
: m_targetPhrase (inputPhrase),m_sourcePhrase(copy.m_sourcePhrase)
|
||||
, m_sourceWordsRange (copy.m_sourceWordsRange)
|
||||
, m_scoreBreakdown(copy.m_scoreBreakdown)
|
||||
{ // used in creating the next generation step
|
||||
@ -67,7 +68,7 @@ TranslationOption::TranslationOption(const TranslationOption ©
|
||||
}
|
||||
|
||||
TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase, int /*whatever*/)
|
||||
: m_phrase(targetPhrase)
|
||||
: m_targetPhrase(targetPhrase)
|
||||
,m_sourceWordsRange (wordsRange)
|
||||
,m_scoreTrans(0)
|
||||
,m_scoreGen(0)
|
||||
@ -78,10 +79,10 @@ TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetP
|
||||
|
||||
TranslationOption *TranslationOption::MergeTranslation(const TargetPhrase &targetPhrase) const
|
||||
{
|
||||
if (m_phrase.IsCompatible(targetPhrase))
|
||||
if (m_targetPhrase.IsCompatible(targetPhrase))
|
||||
{
|
||||
TargetPhrase mergePhrase(targetPhrase);
|
||||
mergePhrase.MergeFactors(m_phrase);
|
||||
mergePhrase.MergeFactors(m_targetPhrase);
|
||||
TranslationOption *newTransOpt = new TranslationOption(*this, mergePhrase);
|
||||
return newTransOpt;
|
||||
}
|
||||
@ -96,10 +97,10 @@ TranslationOption *TranslationOption::MergeGeneration(const Phrase &inputPhrase
|
||||
, float generationScore
|
||||
, float weight) const
|
||||
{
|
||||
if (m_phrase.IsCompatible(inputPhrase))
|
||||
if (m_targetPhrase.IsCompatible(inputPhrase))
|
||||
{
|
||||
Phrase mergePhrase(inputPhrase);
|
||||
mergePhrase.MergeFactors(m_phrase);
|
||||
mergePhrase.MergeFactors(m_targetPhrase);
|
||||
TranslationOption *newTransOpt = new TranslationOption(*this, mergePhrase, generationDictionary, generationScore, weight);
|
||||
return newTransOpt;
|
||||
}
|
||||
|
@ -40,11 +40,12 @@ class GenerationDictionary;
|
||||
*/
|
||||
class TranslationOption
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream& out, const TranslationOption& possibleTranslation);
|
||||
friend std::ostream& operator<<(std::ostream& out, const TranslationOption& possibleTranslation);
|
||||
|
||||
protected:
|
||||
|
||||
const Phrase m_phrase;
|
||||
const Phrase m_targetPhrase;
|
||||
Phrase const* m_sourcePhrase;
|
||||
const WordsRange m_sourceWordsRange;
|
||||
float m_scoreTrans, m_scoreGen, m_futureScore, m_ngramScore;
|
||||
|
||||
@ -75,12 +76,16 @@ public:
|
||||
|
||||
inline const Phrase &GetTargetPhrase() const
|
||||
{
|
||||
return m_phrase;
|
||||
return m_targetPhrase;
|
||||
}
|
||||
inline const WordsRange &GetSourceWordsRange() const
|
||||
{
|
||||
return m_sourceWordsRange;
|
||||
}
|
||||
Phrase const* GetSourcePhrase() const
|
||||
{
|
||||
return m_sourcePhrase;
|
||||
}
|
||||
|
||||
bool Overlap(const Hypothesis &hypothesis) const;
|
||||
/***
|
||||
@ -139,7 +144,7 @@ public:
|
||||
*/
|
||||
inline bool IsDeletionOption() const
|
||||
{
|
||||
return m_phrase.GetSize() == 0;
|
||||
return m_targetPhrase.GetSize() == 0;
|
||||
}
|
||||
void CalcScore(const LMList &allLM, float weightWordPenalty);
|
||||
|
||||
|
@ -94,15 +94,17 @@ char * mempool::alloc(){
|
||||
ptr = free_list = block_list->block;
|
||||
|
||||
for (int i=0;i<block_size-1;i++) {
|
||||
*(char **)ptr = ptr + item_size;
|
||||
ptr = ptr + item_size;
|
||||
*(char **)ptr = ptr + item_size;
|
||||
ptr = ptr + item_size;
|
||||
}
|
||||
|
||||
*(char **)ptr=NULL;
|
||||
|
||||
blocknum++;
|
||||
}
|
||||
|
||||
|
||||
assert(free_list);
|
||||
|
||||
ptr = free_list;
|
||||
|
||||
free_list=*(char **)ptr;
|
||||
|
Loading…
Reference in New Issue
Block a user