2010-02-24 14:15:44 +03:00
|
|
|
#ifndef moses_DynSuffixArray_h
|
|
|
|
#define moses_DynSuffixArray_h
|
2010-02-12 14:05:43 +03:00
|
|
|
|
|
|
|
#include <vector>
|
|
|
|
#include <set>
|
|
|
|
#include <algorithm>
|
|
|
|
#include <utility>
|
2012-11-27 20:31:42 +04:00
|
|
|
#include "moses/Util.h"
|
|
|
|
#include "moses/File.h"
|
2012-11-27 20:16:30 +04:00
|
|
|
#include "moses/TranslationModel/DynSAInclude/types.h"
|
2010-02-12 14:05:43 +03:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
namespace Moses
|
|
|
|
{
|
2013-06-26 20:19:09 +04:00
|
|
|
using namespace std;
|
|
|
|
typedef std::vector<unsigned> vuint_t;
|
2010-04-08 18:52:35 +04:00
|
|
|
|
2013-06-05 13:46:42 +04:00
|
|
|
|
2013-06-26 20:19:09 +04:00
|
|
|
/// compare position /i/ in the suffix array /m_sfa/ into corpus /m_crp/
|
|
|
|
/// against reference phrase /phrase/
|
|
|
|
// added by Ulrich Germann
|
|
|
|
class ComparePosition
|
|
|
|
{
|
|
|
|
vuint_t const& m_crp;
|
|
|
|
vuint_t const& m_sfa;
|
|
|
|
|
|
|
|
public:
|
|
|
|
ComparePosition(vuint_t const& crp, vuint_t const& sfa);
|
|
|
|
bool operator()(unsigned const& i, vector<wordID_t> const& phrase) const;
|
|
|
|
bool operator()(vector<wordID_t> const& phrase, unsigned const& i) const;
|
|
|
|
};
|
2013-06-05 13:46:42 +04:00
|
|
|
|
2010-02-12 14:05:43 +03:00
|
|
|
|
2012-06-27 03:45:02 +04:00
|
|
|
/** @todo ask Abbey Levenberg
|
|
|
|
*/
|
2013-06-26 20:19:09 +04:00
|
|
|
class DynSuffixArray
|
2011-02-24 16:14:42 +03:00
|
|
|
{
|
2010-04-08 18:52:35 +04:00
|
|
|
|
2010-02-12 14:05:43 +03:00
|
|
|
public:
|
|
|
|
DynSuffixArray();
|
|
|
|
DynSuffixArray(vuint_t*);
|
|
|
|
~DynSuffixArray();
|
2010-04-20 18:09:53 +04:00
|
|
|
bool GetCorpusIndex(const vuint_t*, vuint_t*);
|
|
|
|
void Load(FILE*);
|
|
|
|
void Save(FILE*);
|
2011-02-24 16:14:42 +03:00
|
|
|
void Insert(vuint_t*, unsigned);
|
2010-11-15 14:32:02 +03:00
|
|
|
void Delete(unsigned, unsigned);
|
|
|
|
void Substitute(vuint_t*, unsigned);
|
2010-04-08 18:52:35 +04:00
|
|
|
|
2013-06-05 13:46:42 +04:00
|
|
|
size_t GetCount(vuint_t const& phrase) const;
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
private:
|
2010-04-08 18:52:35 +04:00
|
|
|
vuint_t* m_SA;
|
|
|
|
vuint_t* m_ISA;
|
|
|
|
vuint_t* m_F;
|
|
|
|
vuint_t* m_L;
|
|
|
|
vuint_t* m_corpus;
|
2010-04-20 18:09:53 +04:00
|
|
|
void BuildAuxArrays();
|
|
|
|
void Qsort(int* array, int begin, int end);
|
|
|
|
int Compare(int, int, int);
|
|
|
|
void Reorder(unsigned, unsigned);
|
2010-05-07 13:50:19 +04:00
|
|
|
int LastFirstFunc(unsigned);
|
2010-04-20 18:09:53 +04:00
|
|
|
int Rank(unsigned, unsigned);
|
2010-02-12 14:05:43 +03:00
|
|
|
int F_firstIdx(unsigned);
|
2010-04-20 18:09:53 +04:00
|
|
|
void PrintAuxArrays() {
|
2010-04-08 18:52:35 +04:00
|
|
|
std::cerr << "SA\tISA\tF\tL\n";
|
|
|
|
for(size_t i=0; i < m_SA->size(); ++i)
|
2013-06-26 20:19:09 +04:00
|
|
|
std::cerr << m_SA->at(i) << "\t" << m_ISA->at(i) << "\t"
|
|
|
|
<< m_F->at(i) << "\t" << m_L->at(i) << std::endl;
|
2010-02-12 14:05:43 +03:00
|
|
|
}
|
|
|
|
};
|
|
|
|
} //end namespace
|
2010-02-24 14:15:44 +03:00
|
|
|
|
2010-02-12 14:05:43 +03:00
|
|
|
#endif
|