2010-10-21 13:49:27 +04:00
|
|
|
#pragma once
|
|
|
|
|
2012-05-07 17:59:37 +04:00
|
|
|
#include "Vocabulary.h"
|
2010-10-21 13:49:27 +04:00
|
|
|
|
2011-02-24 16:57:11 +03:00
|
|
|
class TargetCorpus
|
2010-10-21 13:49:27 +04:00
|
|
|
{
|
|
|
|
public:
|
2011-02-24 16:57:11 +03:00
|
|
|
typedef unsigned int INDEX;
|
2010-10-21 13:49:27 +04:00
|
|
|
|
|
|
|
private:
|
2011-02-24 16:57:11 +03:00
|
|
|
WORD_ID *m_array;
|
|
|
|
INDEX *m_sentenceEnd;
|
|
|
|
Vocabulary m_vcb;
|
|
|
|
INDEX m_size;
|
|
|
|
INDEX m_sentenceCount;
|
2010-10-21 13:49:27 +04:00
|
|
|
|
2012-05-07 20:13:31 +04:00
|
|
|
// No copying allowed.
|
|
|
|
TargetCorpus(const TargetCorpus&);
|
|
|
|
void operator=(const TargetCorpus&);
|
|
|
|
|
2010-10-21 13:49:27 +04:00
|
|
|
public:
|
2012-05-07 20:13:31 +04:00
|
|
|
TargetCorpus();
|
2011-02-24 16:57:11 +03:00
|
|
|
~TargetCorpus();
|
2010-10-21 13:49:27 +04:00
|
|
|
|
2012-05-07 18:26:32 +04:00
|
|
|
void Create(const std::string& fileName );
|
2011-02-24 16:57:11 +03:00
|
|
|
WORD GetWordFromId( const WORD_ID id ) const;
|
2012-05-07 19:58:44 +04:00
|
|
|
WORD GetWord( INDEX sentence, char word ) const;
|
|
|
|
WORD_ID GetWordId( INDEX sentence, char word ) const;
|
|
|
|
char GetSentenceLength( INDEX sentence ) const;
|
2012-05-07 18:26:32 +04:00
|
|
|
void Load(const std::string& fileName );
|
2012-05-07 19:58:44 +04:00
|
|
|
void Save(const std::string& fileName ) const;
|
2010-10-21 13:49:27 +04:00
|
|
|
};
|