2010-02-24 14:15:44 +03:00
|
|
|
#ifndef moses_DynSuffixArray_h
|
|
|
|
#define moses_DynSuffixArray_h
|
2010-02-12 14:05:43 +03:00
|
|
|
|
|
|
|
#include <vector>
|
|
|
|
#include <set>
|
|
|
|
#include <algorithm>
|
|
|
|
#include <utility>
|
2012-11-27 20:31:42 +04:00
|
|
|
#include "moses/Util.h"
|
|
|
|
#include "moses/File.h"
|
2012-11-27 20:16:30 +04:00
|
|
|
#include "moses/TranslationModel/DynSAInclude/types.h"
|
2010-02-12 14:05:43 +03:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
namespace Moses
|
|
|
|
{
|
2010-04-08 18:52:35 +04:00
|
|
|
|
2010-02-12 14:05:43 +03:00
|
|
|
typedef std::vector<unsigned> vuint_t;
|
|
|
|
|
2012-06-27 03:45:02 +04:00
|
|
|
/** @todo ask Abbey Levenberg
|
|
|
|
*/
|
2011-02-24 16:14:42 +03:00
|
|
|
class DynSuffixArray
|
|
|
|
{
|
2010-04-08 18:52:35 +04:00
|
|
|
|
2010-02-12 14:05:43 +03:00
|
|
|
public:
|
|
|
|
DynSuffixArray();
|
|
|
|
DynSuffixArray(vuint_t*);
|
|
|
|
~DynSuffixArray();
|
2010-04-20 18:09:53 +04:00
|
|
|
bool GetCorpusIndex(const vuint_t*, vuint_t*);
|
|
|
|
void Load(FILE*);
|
|
|
|
void Save(FILE*);
|
2011-02-24 16:14:42 +03:00
|
|
|
void Insert(vuint_t*, unsigned);
|
2010-11-15 14:32:02 +03:00
|
|
|
void Delete(unsigned, unsigned);
|
|
|
|
void Substitute(vuint_t*, unsigned);
|
2010-04-08 18:52:35 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
private:
|
2010-04-08 18:52:35 +04:00
|
|
|
vuint_t* m_SA;
|
|
|
|
vuint_t* m_ISA;
|
|
|
|
vuint_t* m_F;
|
|
|
|
vuint_t* m_L;
|
|
|
|
vuint_t* m_corpus;
|
2010-04-20 18:09:53 +04:00
|
|
|
void BuildAuxArrays();
|
|
|
|
void Qsort(int* array, int begin, int end);
|
|
|
|
int Compare(int, int, int);
|
|
|
|
void Reorder(unsigned, unsigned);
|
2010-05-07 13:50:19 +04:00
|
|
|
int LastFirstFunc(unsigned);
|
2010-04-20 18:09:53 +04:00
|
|
|
int Rank(unsigned, unsigned);
|
2010-02-12 14:05:43 +03:00
|
|
|
int F_firstIdx(unsigned);
|
2010-04-20 18:09:53 +04:00
|
|
|
void PrintAuxArrays() {
|
2010-04-08 18:52:35 +04:00
|
|
|
std::cerr << "SA\tISA\tF\tL\n";
|
|
|
|
for(size_t i=0; i < m_SA->size(); ++i)
|
|
|
|
std::cerr << m_SA->at(i) << "\t" << m_ISA->at(i) << "\t" << m_F->at(i) << "\t" << m_L->at(i) << std::endl;
|
2010-02-12 14:05:43 +03:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
} //end namespace
|
2010-02-24 14:15:44 +03:00
|
|
|
|
2010-02-12 14:05:43 +03:00
|
|
|
#endif
|