2012-05-29 21:38:57 +04:00
|
|
|
/*
|
|
|
|
* HypPackCollection.h
|
|
|
|
* kbmira - k-best Batch MIRA
|
|
|
|
*
|
|
|
|
* Abstracts away the mess of iterating through multiple
|
|
|
|
* collections of k-best lists, as well as deduping
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef MERT_HYP_PACK_COLLECTION_H
|
|
|
|
#define MERT_HYP_PACK_COLLECTION_H
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include <utility>
|
2012-05-30 15:47:20 +04:00
|
|
|
#include <stddef.h>
|
2012-05-29 21:38:57 +04:00
|
|
|
|
|
|
|
#include "FeatureDataIterator.h"
|
|
|
|
#include "ScoreDataIterator.h"
|
2012-06-27 00:33:41 +04:00
|
|
|
#include "MiraFeatureVector.h"
|
2012-05-29 21:38:57 +04:00
|
|
|
|
2012-06-30 23:23:45 +04:00
|
|
|
namespace MosesTuning
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
|
2012-06-30 23:23:45 +04:00
|
|
|
|
2012-05-29 21:38:57 +04:00
|
|
|
// Start with these abstract classes
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
class HypPackEnumerator
|
|
|
|
{
|
2012-05-29 21:38:57 +04:00
|
|
|
public:
|
2012-05-30 17:59:23 +04:00
|
|
|
virtual ~HypPackEnumerator() {}
|
|
|
|
|
2012-05-29 21:38:57 +04:00
|
|
|
virtual void reset() = 0;
|
|
|
|
virtual bool finished() = 0;
|
|
|
|
virtual void next() = 0;
|
|
|
|
|
2012-06-26 19:40:16 +04:00
|
|
|
virtual std::size_t cur_id() = 0;
|
2012-05-30 18:11:09 +04:00
|
|
|
virtual std::size_t cur_size() = 0;
|
|
|
|
virtual std::size_t num_dense() const = 0;
|
2012-06-27 00:33:41 +04:00
|
|
|
virtual const MiraFeatureVector& featuresAt(std::size_t i) = 0;
|
2012-05-30 18:11:09 +04:00
|
|
|
virtual const ScoreDataItem& scoresAt(std::size_t i) = 0;
|
2012-05-29 21:38:57 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
// Instantiation that streams from disk
|
|
|
|
// Low-memory, low-speed, sequential access
|
2013-05-29 21:16:15 +04:00
|
|
|
class StreamingHypPackEnumerator : public HypPackEnumerator
|
|
|
|
{
|
2012-05-29 21:38:57 +04:00
|
|
|
public:
|
2012-05-30 15:47:20 +04:00
|
|
|
StreamingHypPackEnumerator(std::vector<std::string> const& featureFiles,
|
2012-05-30 18:11:09 +04:00
|
|
|
std::vector<std::string> const& scoreFiles);
|
|
|
|
|
|
|
|
virtual std::size_t num_dense() const;
|
2012-05-29 21:38:57 +04:00
|
|
|
|
|
|
|
virtual void reset();
|
|
|
|
virtual bool finished();
|
|
|
|
virtual void next();
|
|
|
|
|
2012-06-26 19:40:16 +04:00
|
|
|
virtual std::size_t cur_id();
|
2012-05-30 18:11:09 +04:00
|
|
|
virtual std::size_t cur_size();
|
2012-06-27 00:33:41 +04:00
|
|
|
virtual const MiraFeatureVector& featuresAt(std::size_t i);
|
2012-05-30 18:11:09 +04:00
|
|
|
virtual const ScoreDataItem& scoresAt(std::size_t i);
|
|
|
|
|
2012-05-29 21:38:57 +04:00
|
|
|
private:
|
|
|
|
void prime();
|
2012-05-30 18:11:09 +04:00
|
|
|
std::size_t m_num_lists;
|
|
|
|
std::size_t m_sentenceId;
|
2012-05-30 15:47:20 +04:00
|
|
|
std::vector<std::string> m_featureFiles;
|
|
|
|
std::vector<std::string> m_scoreFiles;
|
2012-05-29 21:38:57 +04:00
|
|
|
|
|
|
|
bool m_primed;
|
|
|
|
int m_iNumDense;
|
2012-05-30 15:47:20 +04:00
|
|
|
std::vector<FeatureDataIterator> m_featureDataIters;
|
|
|
|
std::vector<ScoreDataIterator> m_scoreDataIters;
|
2012-05-30 18:11:09 +04:00
|
|
|
std::vector<std::pair<std::size_t,std::size_t> > m_current_indexes;
|
2012-06-27 00:33:41 +04:00
|
|
|
std::vector<MiraFeatureVector> m_current_featureVectors;
|
2012-05-29 21:38:57 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
// Instantiation that reads into memory
|
|
|
|
// High-memory, high-speed, random access
|
|
|
|
// (Actually randomizes with each call to reset)
|
2013-05-29 21:16:15 +04:00
|
|
|
class RandomAccessHypPackEnumerator : public HypPackEnumerator
|
|
|
|
{
|
2012-05-29 21:38:57 +04:00
|
|
|
public:
|
2012-05-30 15:47:20 +04:00
|
|
|
RandomAccessHypPackEnumerator(std::vector<std::string> const& featureFiles,
|
|
|
|
std::vector<std::string> const& scoreFiles,
|
2012-05-29 21:38:57 +04:00
|
|
|
bool no_shuffle);
|
|
|
|
|
2012-05-30 18:11:09 +04:00
|
|
|
virtual std::size_t num_dense() const;
|
|
|
|
|
2012-05-29 21:38:57 +04:00
|
|
|
virtual void reset();
|
|
|
|
virtual bool finished();
|
|
|
|
virtual void next();
|
|
|
|
|
2012-06-26 19:40:16 +04:00
|
|
|
virtual std::size_t cur_id();
|
2012-05-30 18:11:09 +04:00
|
|
|
virtual std::size_t cur_size();
|
2012-06-27 00:33:41 +04:00
|
|
|
virtual const MiraFeatureVector& featuresAt(std::size_t i);
|
2012-05-30 18:11:09 +04:00
|
|
|
virtual const ScoreDataItem& scoresAt(std::size_t i);
|
2012-05-29 21:38:57 +04:00
|
|
|
|
|
|
|
private:
|
|
|
|
bool m_no_shuffle;
|
2012-05-30 18:11:09 +04:00
|
|
|
std::size_t m_cur_index;
|
|
|
|
std::size_t m_num_dense;
|
|
|
|
std::vector<std::size_t> m_indexes;
|
2012-06-27 00:33:41 +04:00
|
|
|
std::vector<std::vector<MiraFeatureVector> > m_features;
|
2012-05-30 15:47:20 +04:00
|
|
|
std::vector<std::vector<ScoreDataItem> > m_scores;
|
2012-05-29 21:38:57 +04:00
|
|
|
};
|
|
|
|
|
2012-06-30 23:23:45 +04:00
|
|
|
}
|
|
|
|
|
2012-05-29 21:38:57 +04:00
|
|
|
#endif // MERT_HYP_PACK_COLLECTION_H
|
|
|
|
|
|
|
|
// --Emacs trickery--
|
|
|
|
// Local Variables:
|
|
|
|
// mode:c++
|
|
|
|
// c-basic-offset:2
|
|
|
|
// End:
|