mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-05 02:22:21 +03:00
148 lines
3.9 KiB
C++
148 lines
3.9 KiB
C++
// -*- c++ -*-
|
|
// $Id$
|
|
// vim:tabstop=2
|
|
|
|
#ifndef moses_PDTAimp_h
|
|
#define moses_PDTAimp_h
|
|
|
|
#include "StaticData.h" // needed for factor splitter
|
|
#include "moses/TranslationModel/PhraseDictionaryTree.h"
|
|
#include "UniqueObject.h"
|
|
#include "InputFileStream.h"
|
|
#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
|
|
#include "Util.h"
|
|
#include "util/tokenize_piece.hh"
|
|
#include "util/exception.hh"
|
|
#include "moses/FF/InputFeature.h"
|
|
#include "util/exception.hh"
|
|
|
|
namespace Moses
|
|
{
|
|
|
|
inline double addLogScale(double x,double y)
|
|
{
|
|
if(x>y) return addLogScale(y,x);
|
|
else return x+log(1.0+exp(y-x));
|
|
}
|
|
|
|
inline double Exp(double x)
|
|
{
|
|
return exp(x);
|
|
}
|
|
|
|
/** implementation of the binary phrase table for the phrase-based decoder. Used by PhraseDictionaryTreeAdaptor
|
|
*/
|
|
class PDTAimp
|
|
{
|
|
// only these classes are allowed to instantiate this class
|
|
friend class PhraseDictionaryTreeAdaptor;
|
|
|
|
protected:
|
|
PDTAimp(PhraseDictionaryTreeAdaptor *p);
|
|
|
|
public:
|
|
std::vector<FactorType> m_input,m_output;
|
|
PhraseDictionaryTree *m_dict;
|
|
const InputFeature *m_inputFeature;
|
|
typedef std::vector<TargetPhraseCollectionWithSourcePhrase const*> vTPC;
|
|
mutable vTPC m_tgtColls;
|
|
|
|
typedef std::map<Phrase,TargetPhraseCollectionWithSourcePhrase const*> MapSrc2Tgt;
|
|
mutable MapSrc2Tgt m_cache;
|
|
PhraseDictionaryTreeAdaptor *m_obj;
|
|
int useCache;
|
|
|
|
std::vector<vTPC> m_rangeCache;
|
|
unsigned m_numInputScores;
|
|
|
|
UniqueObjectManager<Phrase> uniqSrcPhr;
|
|
|
|
size_t totalE,distinctE;
|
|
std::vector<size_t> path1Best,pathExplored;
|
|
std::vector<double> pathCN;
|
|
|
|
~PDTAimp();
|
|
|
|
void Factors2String(Word const& w,std::string& s) const {
|
|
s=w.GetString(m_input,false);
|
|
}
|
|
|
|
void CleanUp();
|
|
|
|
TargetPhraseCollectionWithSourcePhrase const*
|
|
GetTargetPhraseCollection(Phrase const &src) const;
|
|
|
|
void Create(const std::vector<FactorType> &input
|
|
, const std::vector<FactorType> &output
|
|
, const std::string &filePath
|
|
, const std::vector<float> &weight);
|
|
|
|
|
|
typedef PhraseDictionaryTree::PrefixPtr PPtr;
|
|
typedef unsigned short Position;
|
|
typedef std::pair<Position,Position> Range;
|
|
struct State {
|
|
PPtr ptr;
|
|
Range range;
|
|
std::vector<float> scores;
|
|
Phrase src;
|
|
|
|
State() : range(0,0),scores(0),src(ARRAY_SIZE_INCR) {}
|
|
State(Position b,Position e,const PPtr& v,const std::vector<float>& sv=std::vector<float>(0))
|
|
: ptr(v),range(b,e),scores(sv),src(ARRAY_SIZE_INCR) {}
|
|
State(Range const& r,const PPtr& v,const std::vector<float>& sv=std::vector<float>(0))
|
|
: ptr(v),range(r),scores(sv),src(ARRAY_SIZE_INCR) {}
|
|
|
|
Position begin() const {
|
|
return range.first;
|
|
}
|
|
Position end() const {
|
|
return range.second;
|
|
}
|
|
std::vector<float> GetScores() const {
|
|
return scores;
|
|
}
|
|
|
|
friend std::ostream& operator<<(std::ostream& out,State const& s) {
|
|
out<<" R=("<<s.begin()<<","<<s.end()<<"),";
|
|
for(std::vector<float>::const_iterator scoreIterator = s.GetScores().begin(); scoreIterator<s.GetScores().end(); scoreIterator++) {
|
|
out<<", "<<*scoreIterator;
|
|
}
|
|
out<<")";
|
|
return out;
|
|
}
|
|
|
|
};
|
|
|
|
void CreateTargetPhrase(TargetPhrase& targetPhrase,
|
|
StringTgtCand::Tokens const& factorStrings,
|
|
Scores const& transVector,
|
|
Scores const& inputVector,
|
|
const std::string *alignmentString,
|
|
Phrase const* srcPtr=0) const;
|
|
|
|
TargetPhraseCollectionWithSourcePhrase* PruneTargetCandidates
|
|
(const std::vector<TargetPhrase> & tCands,
|
|
std::vector<std::pair<float,size_t> >& costs,
|
|
const std::vector<Phrase> &sourcePhrases) const;
|
|
|
|
|
|
// POD for target phrase scores
|
|
struct TScores {
|
|
float total;
|
|
Scores transScore, inputScores;
|
|
Phrase const* src;
|
|
|
|
TScores() : total(0.0),src(0) {}
|
|
};
|
|
|
|
void CacheSource(ConfusionNet const& src);
|
|
|
|
size_t GetNumInputScores() const {
|
|
return m_numInputScores;
|
|
}
|
|
};
|
|
|
|
}
|
|
#endif
|