2012-05-07 17:59:37 +04:00
|
|
|
#pragma once
|
|
|
|
|
2012-05-07 18:41:18 +04:00
|
|
|
#include <vector>
|
|
|
|
#include <string>
|
|
|
|
|
|
|
|
class Alignment;
|
|
|
|
class PhrasePair;
|
|
|
|
class SuffixArray;
|
|
|
|
class TargetCorpus;
|
|
|
|
class Mismatch;
|
2010-10-21 13:49:27 +04:00
|
|
|
|
2011-02-24 16:57:11 +03:00
|
|
|
class PhrasePairCollection
|
2010-10-21 13:49:27 +04:00
|
|
|
{
|
|
|
|
public:
|
2011-02-24 16:57:11 +03:00
|
|
|
typedef unsigned int INDEX;
|
2010-10-21 13:49:27 +04:00
|
|
|
|
|
|
|
private:
|
2011-02-24 16:57:11 +03:00
|
|
|
SuffixArray *m_suffixArray;
|
|
|
|
TargetCorpus *m_targetCorpus;
|
|
|
|
Alignment *m_alignment;
|
2012-05-07 18:41:18 +04:00
|
|
|
std::vector<std::vector<PhrasePair*> > m_collection;
|
|
|
|
std::vector< Mismatch* > m_mismatch, m_unaligned;
|
2011-02-24 16:57:11 +03:00
|
|
|
int m_size;
|
|
|
|
int m_max_lookup;
|
2013-04-05 14:26:00 +04:00
|
|
|
int m_max_translation;
|
|
|
|
int m_max_example;
|
2010-10-21 13:49:27 +04:00
|
|
|
|
2012-05-07 20:13:31 +04:00
|
|
|
// No copying allowed.
|
|
|
|
PhrasePairCollection(const PhrasePairCollection&);
|
|
|
|
void operator=(const PhrasePairCollection&);
|
|
|
|
|
2010-10-21 13:49:27 +04:00
|
|
|
public:
|
2013-04-05 14:26:00 +04:00
|
|
|
PhrasePairCollection ( SuffixArray *, TargetCorpus *, Alignment *, int, int );
|
2011-02-24 16:57:11 +03:00
|
|
|
~PhrasePairCollection ();
|
2010-10-21 13:49:27 +04:00
|
|
|
|
2013-04-05 14:26:00 +04:00
|
|
|
int GetCollection( const std::vector<std::string >& sourceString );
|
|
|
|
void Print(bool pretty) const;
|
2012-05-07 19:58:44 +04:00
|
|
|
void PrintHTML() const;
|
2010-10-21 13:49:27 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
// sorting helper
|
2011-02-24 16:57:11 +03:00
|
|
|
struct CompareBySize {
|
2012-05-07 19:58:44 +04:00
|
|
|
bool operator()(const std::vector<PhrasePair*>& a, const std::vector<PhrasePair*>& b ) const {
|
2011-02-24 16:57:11 +03:00
|
|
|
return a.size() > b.size();
|
|
|
|
}
|
2010-10-21 13:49:27 +04:00
|
|
|
};
|