2008-06-11 14:52:57 +04:00
|
|
|
// $Id$
|
|
|
|
|
2010-02-24 14:15:44 +03:00
|
|
|
#ifndef moses_PhraseDictionaryTree_h
|
|
|
|
#define moses_PhraseDictionaryTree_h
|
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include <iostream>
|
2009-08-07 20:47:54 +04:00
|
|
|
|
|
|
|
#ifdef WITH_THREADS
|
|
|
|
#include <boost/thread/mutex.hpp>
|
|
|
|
#endif
|
|
|
|
|
2012-11-27 19:08:31 +04:00
|
|
|
#include "moses/TypeDef.h"
|
|
|
|
#include "moses/PrefixTree.h"
|
|
|
|
#include "moses/File.h"
|
|
|
|
#include "moses/ObjectPool.h"
|
|
|
|
#include "moses/LexicalReorderingTable.h"
|
|
|
|
#include "moses/LVoc.h"
|
|
|
|
#include "moses/TypeDef.h"
|
|
|
|
#include "moses/Util.h"
|
2008-09-12 22:09:06 +04:00
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
class Phrase;
|
|
|
|
class Word;
|
|
|
|
class ConfusionNet;
|
2010-01-28 15:12:57 +03:00
|
|
|
class PDTimp;
|
2008-09-12 22:09:06 +04:00
|
|
|
|
|
|
|
typedef PrefixTreeF<LabelId,OFF_T> PTF;
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2011-09-30 00:35:52 +04:00
|
|
|
//typedef std::pair<std::vector<std::string const*>,Scores > StringTgtCand;
|
2013-05-29 21:16:15 +04:00
|
|
|
struct StringTgtCand {
|
2011-09-30 00:35:52 +04:00
|
|
|
typedef std::vector<std::string const*> Tokens;
|
|
|
|
Tokens tokens;
|
|
|
|
Scores scores;
|
|
|
|
Tokens fnames;
|
|
|
|
std::vector<FValue> fvalues;
|
|
|
|
|
|
|
|
};
|
|
|
|
|
2012-06-29 02:29:46 +04:00
|
|
|
/** A phrase table for phrase-based decoding that is held on disk, rather than in memory
|
|
|
|
* Wrapper around a PDTimp class
|
|
|
|
*/
|
2013-02-06 15:29:54 +04:00
|
|
|
class PhraseDictionaryTree
|
2011-02-24 16:14:42 +03:00
|
|
|
{
|
|
|
|
PDTimp *imp; //implementation
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
PhraseDictionaryTree(const PhraseDictionaryTree&); //not implemented
|
|
|
|
void operator=(const PhraseDictionaryTree&); //not implemented
|
2008-06-11 14:52:57 +04:00
|
|
|
public:
|
2013-02-06 15:29:54 +04:00
|
|
|
PhraseDictionaryTree();
|
2011-02-24 16:14:42 +03:00
|
|
|
|
2012-11-14 23:48:08 +04:00
|
|
|
void NeedAlignmentInfo(bool a);
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
void PrintWordAlignment(bool a);
|
|
|
|
bool PrintWordAlignment();
|
|
|
|
|
|
|
|
|
|
|
|
virtual ~PhraseDictionaryTree();
|
|
|
|
|
|
|
|
size_t GetSize() const {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// convert from ascii phrase table format
|
|
|
|
// note: only creates table, does not keep it in memory
|
|
|
|
// -> use Read(outFileNamePrefix);
|
|
|
|
int Create(std::istream& in,const std::string& outFileNamePrefix);
|
|
|
|
|
|
|
|
int Read(const std::string& fileNamePrefix);
|
|
|
|
|
|
|
|
// free memory used by the prefix tree etc.
|
|
|
|
void FreeMemory() const;
|
|
|
|
|
|
|
|
|
|
|
|
/**************************************
|
|
|
|
* access with full source phrase *
|
|
|
|
**************************************/
|
|
|
|
// print target candidates for a given phrase, mainly for debugging
|
|
|
|
void PrintTargetCandidates(const std::vector<std::string>& src,
|
|
|
|
std::ostream& out) const;
|
|
|
|
|
|
|
|
// get the target candidates for a given phrase
|
|
|
|
void GetTargetCandidates(const std::vector<std::string>& src,
|
|
|
|
std::vector<StringTgtCand>& rv) const;
|
2013-05-29 21:16:15 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
// get the target candidates for a given phrase
|
|
|
|
void GetTargetCandidates(const std::vector<std::string>& src,
|
|
|
|
std::vector<StringTgtCand>& rv,
|
|
|
|
std::vector<std::string>& wa) const;
|
|
|
|
|
|
|
|
/*****************************
|
|
|
|
* access to prefix tree *
|
|
|
|
*****************************/
|
|
|
|
|
|
|
|
// 'pointer' into prefix tree
|
|
|
|
// the only permitted direct operation is a check for NULL,
|
|
|
|
// e.g. PrefixPtr p; if(p) ...
|
|
|
|
// other usage only through PhraseDictionaryTree-functions below
|
|
|
|
|
|
|
|
class PrefixPtr
|
|
|
|
{
|
|
|
|
PPimp* imp;
|
|
|
|
friend class PDTimp;
|
|
|
|
public:
|
|
|
|
PrefixPtr(PPimp* x=0) : imp(x) {}
|
|
|
|
operator bool() const;
|
|
|
|
};
|
|
|
|
|
|
|
|
// return pointer to root node
|
|
|
|
PrefixPtr GetRoot() const;
|
|
|
|
// extend pointer with a word/Factorstring and return the resulting successor
|
|
|
|
// pointer. If there is no such successor node, the result will evaluate to
|
|
|
|
// false. Requirement: the input pointer p evaluates to true.
|
|
|
|
PrefixPtr Extend(PrefixPtr p,const std::string& s) const;
|
|
|
|
|
|
|
|
// get the target candidates for a given prefix pointer
|
|
|
|
// requirement: the pointer has to evaluate to true
|
|
|
|
void GetTargetCandidates(PrefixPtr p,
|
|
|
|
std::vector<StringTgtCand>& rv) const;
|
|
|
|
void GetTargetCandidates(PrefixPtr p,
|
|
|
|
std::vector<StringTgtCand>& rv,
|
|
|
|
std::vector<std::string>& wa) const;
|
|
|
|
|
|
|
|
// print target candidates for a given prefix pointer to a stream, mainly
|
|
|
|
// for debugging
|
|
|
|
void PrintTargetCandidates(PrefixPtr p,std::ostream& out) const;
|
2012-12-07 20:05:50 +04:00
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
};
|
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
|
|
|
|
}
|
2010-02-24 14:15:44 +03:00
|
|
|
|
|
|
|
#endif
|