mosesdecoder/moses/src/PrefixTreeMap.h

139 lines
3.1 KiB
C
Raw Normal View History

#ifndef PREFIX_TREE_MAP_H
#define PREFIX_TREE_MAP_H
#include<vector>
#include<climits>
#include<iostream>
#include <map>
#ifdef WITH_THREADS
#include <boost/thread/mutex.hpp>
#endif
#include "PrefixTree.h"
#include "File.h"
#include "LVoc.h"
#include "ObjectPool.h"
namespace Moses
{
typedef PrefixTreeF<LabelId,OFF_T> PTF;
typedef FilePtr<PTF> CPT;
typedef std::vector<CPT> Data;
typedef LVoc<std::string> WordVoc;
class GenericCandidate {
public:
typedef std::vector<IPhrase> PhraseList;
typedef std::vector< std::vector<float> > ScoreList;
public:
GenericCandidate(){
};
GenericCandidate(const GenericCandidate& other)
: m_PhraseList(other.m_PhraseList), m_ScoreList(other.m_ScoreList) {
};
GenericCandidate(const PhraseList& p, const ScoreList& s)
: m_PhraseList(p), m_ScoreList(s) {
};
~GenericCandidate(){
};
public:
size_t NumPhrases() const {
return m_PhraseList.size();
};
size_t NumScores() const {
return m_ScoreList.size();
};
const IPhrase& GetPhrase(unsigned int i) const {
return m_PhraseList.at(i);
}
const std::vector<float>& GetScore(unsigned int i) const {
return m_ScoreList.at(i);
}
void readBin(FILE* f);
void writeBin(FILE* f) const;
private:
PhraseList m_PhraseList;
ScoreList m_ScoreList;
};
/*
class PPtr {
public:
typedef unsigned IndexType;
public:
PPtr(PTF const* p, IndexType i, bool isRoot)
: m_Ptr(p), m_Index(i), m_IsRoot(isRoot){
};
~PPtr(){
};
};
*/
struct PPimp {
PTF const*p;unsigned idx;bool root;
PPimp(PTF const* x,unsigned i,bool b) : p(x),idx(i),root(b) {}
bool isValid() const {return root || (p && idx<p->size());}
bool isRoot() const {return root;}
PTF const* ptr() const {return p;}
};
class Candidates : public std::vector<GenericCandidate> {
typedef std::vector<GenericCandidate> MyBase;
public:
Candidates() : MyBase() {
};
void writeBin(FILE* f) const;
void readBin(FILE* f);
};
class PrefixTreeMap {
public:
PrefixTreeMap() : m_FileSrc(0), m_FileTgt(0) {
PTF::setDefault(InvalidOffT);
}
~PrefixTreeMap() {
if(m_FileSrc) {fClose(m_FileSrc);}
if(m_FileTgt) {fClose(m_FileTgt);}
FreeMemory();
}
public:
static const LabelId MagicWord;
public:
void FreeMemory();
int Read(const std::string& fileNameStem, int numVocs = -1);
void GetCandidates(const IPhrase& key, Candidates* cands);
void GetCandidates(const PPimp& p, Candidates* cands);
std::vector< std::string const * > ConvertPhrase(const IPhrase& p, unsigned int voc) const;
IPhrase ConvertPhrase(const std::vector< std::string >& p, unsigned int voc) const;
LabelId ConvertWord(const std::string& w, unsigned int voc) const;
std::string ConvertWord(LabelId w, unsigned int voc) const;
public: //low level
PPimp* GetRoot();
PPimp* Extend(PPimp* p, LabelId wi);
PPimp* Extend(PPimp* p, const std::string w, size_t voc){
return Extend(p, ConvertWord(w,voc));
}
private:
Data m_Data;
FILE* m_FileSrc;
FILE* m_FileTgt;
std::vector<WordVoc*> m_Voc;
ObjectPool<PPimp> m_PtrPool;
};
}
#endif //PREFIX_TREE_MAP_H