Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Christian Federmann 2012-05-10 09:49:57 +02:00
commit 60dde0b06b
38 changed files with 270 additions and 278 deletions

View File

@ -14,6 +14,8 @@
#include "Util.h"
#include "Vocabulary.h"
using namespace std;
namespace {
// configure regularisation

View File

@ -10,8 +10,6 @@
#include "Scorer.h"
#include "ScopedVector.h"
using namespace std;
const int kBleuNgramOrder = 4;
class NgramCounts;
@ -29,15 +27,15 @@ public:
SHORTEST
};
explicit BleuScorer(const string& config = "");
explicit BleuScorer(const std::string& config = "");
~BleuScorer();
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
virtual float calculateScore(const vector<int>& comps) const;
virtual size_t NumberOfScores() const { return 2 * kBleuNgramOrder + 1; }
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
virtual float calculateScore(const std::vector<int>& comps) const;
virtual std::size_t NumberOfScores() const { return 2 * kBleuNgramOrder + 1; }
int CalcReferenceLength(size_t sentence_id, size_t length);
int CalcReferenceLength(std::size_t sentence_id, std::size_t length);
ReferenceLengthType GetReferenceLengthType() const { return m_ref_length_type; }
void SetReferenceLengthType(ReferenceLengthType type) { m_ref_length_type = type; }
@ -47,14 +45,14 @@ public:
/**
* Count the ngrams of each type, up to the given length in the input line.
*/
size_t CountNgrams(const string& line, NgramCounts& counts, unsigned int n);
std::size_t CountNgrams(const std::string& line, NgramCounts& counts, unsigned int n);
void DumpCounts(std::ostream* os, const NgramCounts& counts) const;
bool OpenReference(const char* filename, size_t file_id);
bool OpenReference(const char* filename, std::size_t file_id);
// NOTE: this function is used for unit testing.
bool OpenReferenceStream(std::istream* is, size_t file_id);
bool OpenReferenceStream(std::istream* is, std::size_t file_id);
private:
ReferenceLengthType m_ref_length_type;
@ -70,6 +68,6 @@ private:
/** Computes sentence-level BLEU+1 score.
* This function is used in PRO.
*/
float sentenceLevelBleuPlusOne(const vector<float>& stats);
float sentenceLevelBleuPlusOne(const std::vector<float>& stats);
#endif // MERT_BLEU_SCORER_H_

View File

@ -220,7 +220,7 @@ BOOST_AUTO_TEST_CASE(bleu_clipped_counts) {
BOOST_AUTO_TEST_CASE(calculate_actual_score) {
BOOST_REQUIRE(4 == kBleuNgramOrder);
vector<int> stats(2 * kBleuNgramOrder + 1);
std::vector<int> stats(2 * kBleuNgramOrder + 1);
BleuScorer scorer;
// unigram
@ -247,7 +247,7 @@ BOOST_AUTO_TEST_CASE(calculate_actual_score) {
BOOST_AUTO_TEST_CASE(sentence_level_bleu) {
BOOST_REQUIRE(4 == kBleuNgramOrder);
vector<float> stats(2 * kBleuNgramOrder + 1);
std::vector<float> stats(2 * kBleuNgramOrder + 1);
// unigram
stats[0] = 6.0;

View File

@ -4,6 +4,8 @@
#include <fstream>
#include <stdexcept>
using namespace std;
namespace {
inline int CalcDistance(int word1, int word2) {

View File

@ -6,34 +6,32 @@
#include "Types.h"
#include "Scorer.h"
using namespace std;
/**
* CderScorer class can compute both CDER and WER metric.
*/
class CderScorer: public StatisticsBasedScorer {
public:
explicit CderScorer(const string& config, bool allowed_long_jumps = true);
explicit CderScorer(const std::string& config, bool allowed_long_jumps = true);
~CderScorer();
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
virtual void prepareStatsVector(size_t sid, const string& text, vector<int>& stats);
virtual void prepareStatsVector(std::size_t sid, const std::string& text, std::vector<int>& stats);
virtual size_t NumberOfScores() const { return 2; }
virtual std::size_t NumberOfScores() const { return 2; }
virtual float calculateScore(const vector<int>& comps) const;
virtual float calculateScore(const std::vector<int>& comps) const;
private:
bool m_allowed_long_jumps;
typedef vector<int> sent_t;
vector<vector<sent_t> > m_ref_sentences;
typedef std::vector<int> sent_t;
std::vector<std::vector<sent_t> > m_ref_sentences;
void computeCD(const sent_t& cand, const sent_t& ref,
vector<int>& stats) const;
std::vector<int>& stats) const;
// no copying allowed
CderScorer(const CderScorer&);

View File

@ -30,7 +30,7 @@ BOOST_AUTO_TEST_CASE(shard_basic) {
data.getScoreData()->add(sa3);
data.getScoreData()->add(sa4);
vector<Data> shards;
std::vector<Data> shards;
data.createShards(2,0,"",shards);
BOOST_CHECK_EQUAL(shards.size(),2);

View File

@ -6,12 +6,12 @@
#define _FDSTREAM_
#include <iostream>
#include <string>
#if defined(__GLIBCXX__) || defined(__GLIBCPP__)
#include <ext/stdio_filebuf.h>
#define BUFFER_SIZE (1024)
using namespace std;
#define BUFFER_SIZE (32768)
class _fdstream
{
@ -20,16 +20,16 @@ protected:
_file_descriptor(-1), _filebuf(NULL)
{ }
_fdstream(int file_descriptor, ios_base::openmode openmode) :
_fdstream(int file_descriptor, std::ios_base::openmode openmode) :
_file_descriptor(file_descriptor), _openmode(openmode)
{
_filebuf = NULL;
open(file_descriptor, openmode);
}
ios_base::openmode openmode() const { return _openmode; }
std::ios_base::openmode openmode() const { return _openmode; }
void open(int file_descriptor, ios_base::openmode openmode)
void open(int file_descriptor, std::ios_base::openmode openmode)
{
if (!_filebuf)
// We create a C++ stream from a file descriptor
@ -38,10 +38,10 @@ protected:
// You can also create the filebuf from a FILE* with
// FILE* f = fdopen(file_descriptor, mode);
_filebuf = new __gnu_cxx::stdio_filebuf<char> (file_descriptor,
openmode);
openmode);
}
~_fdstream()
virtual ~_fdstream()
{
close(_file_descriptor);
delete _filebuf;
@ -50,7 +50,7 @@ protected:
int _file_descriptor;
__gnu_cxx::stdio_filebuf<char>* _filebuf;
ios_base::openmode _openmode;
std::ios_base::openmode _openmode;
};
class ifdstream : public _fdstream
@ -61,41 +61,41 @@ public:
{ }
ifdstream(int file_descriptor) :
_fdstream(file_descriptor, ios_base::in)
_fdstream(file_descriptor, std::ios_base::in)
{
_stream = new istream (_filebuf);
_stream = new std::istream(_filebuf);
}
void open(int file_descriptor)
{
if (!_stream)
{
_fdstream::open(file_descriptor, ios_base::in);
_stream = new istream (_filebuf);
_fdstream::open(file_descriptor, std::ios_base::in);
_stream = new std::istream(_filebuf);
}
}
ifdstream& operator>> (string& str)
ifdstream& operator>> (std::string& str)
{
(*_stream) >> str;
return *this;
}
size_t getline(string& str)
std::size_t getline(std::string& str)
{
char tmp[BUFFER_SIZE];
size_t ret = getline(tmp, BUFFER_SIZE);
std::size_t ret = getline(tmp, BUFFER_SIZE);
str = tmp;
return ret;
}
size_t getline (char* s, streamsize n)
std::size_t getline(char* s, std::streamsize n)
{
return (getline(s, n, '\n'));
}
size_t getline (char* s, streamsize n, char delim)
std::size_t getline(char* s, std::streamsize n, char delim)
{
int i = 0;
do{
@ -115,7 +115,7 @@ public:
}
private:
istream* _stream;
std::istream* _stream;
};
class ofdstream : public _fdstream
@ -126,22 +126,22 @@ public:
{ }
ofdstream(int file_descriptor) :
_fdstream(file_descriptor, ios_base::out)
_fdstream(file_descriptor, std::ios_base::out)
{
_stream = new ostream (_filebuf);
_stream = new std::ostream(_filebuf);
}
void open(int file_descriptor)
{
if (!_stream)
{
_fdstream::open(file_descriptor, ios_base::out);
_stream = new ostream (_filebuf);
}
{
_fdstream::open(file_descriptor, std::ios_base::out);
_stream = new std::ostream(_filebuf);
}
}
ofdstream& operator<< (const string& str)
ofdstream& operator<< (const std::string& str)
{
if (_stream->good())
(*_stream) << str;
@ -157,7 +157,7 @@ public:
}
private:
ostream* _stream;
std::ostream* _stream;
};
#else

View File

@ -6,11 +6,14 @@
*
*/
#include <iostream>
#include <fstream>
#include "FeatureArray.h"
#include "FileStream.h"
#include "Util.h"
using namespace std;
FeatureArray::FeatureArray()
: m_index(""), m_num_features(0), m_sparse_flag(false) {}

View File

@ -10,11 +10,9 @@
#define MERT_FEATURE_ARRAY_H_
#include <vector>
#include <iostream>
#include <iosfwd>
#include "FeatureStats.h"
using namespace std;
const char FEATURES_TXT_BEGIN[] = "FEATURES_TXT_BEGIN_0";
const char FEATURES_TXT_END[] = "FEATURES_TXT_END_0";
const char FEATURES_BIN_BEGIN[] = "FEATURES_BIN_BEGIN_0";
@ -27,7 +25,7 @@ private:
// the index inside the vector.
std::string m_index;
featarray_t m_array;
size_t m_num_features;
std::size_t m_num_features;
std::string m_features;
bool m_sparse_flag;
@ -42,27 +40,27 @@ public:
std::string getIndex() const { return m_index; }
void setIndex(const std::string& value) { m_index = value; }
FeatureStats& get(size_t i) { return m_array.at(i); }
const FeatureStats& get(size_t i) const { return m_array.at(i); }
FeatureStats& get(std::size_t i) { return m_array.at(i); }
const FeatureStats& get(std::size_t i) const { return m_array.at(i); }
void add(FeatureStats& e) { m_array.push_back(e); }
//ADDED BY TS
void swap(size_t i, size_t j) {
void swap(std::size_t i, std::size_t j) {
std::swap(m_array[i], m_array[j]);
}
void resize(size_t new_size) {
void resize(std::size_t new_size) {
m_array.resize(std::min(new_size, m_array.size()));
}
//END_ADDED
void merge(FeatureArray& e);
size_t size() const { return m_array.size(); }
std::size_t size() const { return m_array.size(); }
size_t NumberOfFeatures() const { return m_num_features; }
void NumberOfFeatures(size_t v) { m_num_features = v; }
std::size_t NumberOfFeatures() const { return m_num_features; }
void NumberOfFeatures(std::size_t v) { m_num_features = v; }
std::string Features() const { return m_features; }
void Features(const std::string& f) { m_features = f; }
@ -73,8 +71,8 @@ public:
void save(const std::string &file, bool bin=false);
void save(bool bin=false);
void loadtxt(std::istream* is, size_t n);
void loadbin(std::istream* is, size_t n);
void loadtxt(std::istream* is, std::size_t n);
void loadbin(std::istream* is, std::size_t n);
void load(std::istream* is);
void load(const std::string &file);

View File

@ -100,10 +100,10 @@ public:
std::string getFeatureName(std::size_t idx) const {
if (idx >= m_index_to_feature_name.size())
throw runtime_error("Error: you required an too big index");
throw std::runtime_error("Error: you required an too big index");
std::map<std::size_t, std::string>::const_iterator it = m_index_to_feature_name.find(idx);
if (it == m_index_to_feature_name.end()) {
throw runtime_error("Error: specified id is unknown: " + idx);
throw std::runtime_error("Error: specified id is unknown: " + idx);
} else {
return it->second;
}
@ -113,8 +113,9 @@ public:
std::map<std::string, std::size_t>::const_iterator it = m_feature_name_to_index.find(name);
if (it == m_feature_name_to_index.end()) {
std::string msg = "Error: feature " + name + " is unknown. Known features: ";
for (std::map<std::string, std::size_t>::const_iterator it = m_feature_name_to_index.begin(); it != m_feature_name_to_index.end(); it++) {
msg += it->first;
for (std::map<std::string, std::size_t>::const_iterator cit = m_feature_name_to_index.begin();
cit != m_feature_name_to_index.end(); cit++) {
msg += cit->first;
msg += ", ";
}

View File

@ -12,7 +12,7 @@ void CheckFeatureMap(const FeatureData* feature_data,
for (int i = 0; i < num_feature; ++i) {
std::stringstream ss;
ss << str << "_" << i;
const string& s = ss.str();
const std::string& s = ss.str();
BOOST_CHECK_EQUAL(feature_data->getFeatureIndex(s), *cnt);
BOOST_CHECK_EQUAL(feature_data->getFeatureName(*cnt).c_str(), s);
++(*cnt);

View File

@ -12,6 +12,8 @@
#include <cmath>
#include "Util.h"
using namespace std;
namespace {
const int kAvailableSize = 8;
} // namespace

View File

@ -16,20 +16,18 @@
#include <vector>
#include "Types.h"
using namespace std;
// Minimal sparse vector
class SparseVector {
public:
typedef std::map<size_t,FeatureStatsType> fvector_t;
typedef std::map<std::string, size_t> name2id_t;
typedef std::map<std::size_t,FeatureStatsType> fvector_t;
typedef std::map<std::string, std::size_t> name2id_t;
typedef std::vector<std::string> id2name_t;
FeatureStatsType get(const std::string& name) const;
FeatureStatsType get(size_t id) const;
FeatureStatsType get(std::size_t id) const;
void set(const std::string& name, FeatureStatsType value);
void clear();
size_t size() const { return m_fvector.size(); }
std::size_t size() const { return m_fvector.size(); }
void write(std::ostream& out, const std::string& sep = " ") const;
@ -46,8 +44,8 @@ SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
class FeatureStats
{
private:
size_t m_available_size;
size_t m_entries;
std::size_t m_available_size;
std::size_t m_entries;
// TODO: Use smart pointer for exceptional-safety.
featstats_t m_array;
@ -55,7 +53,7 @@ private:
public:
FeatureStats();
explicit FeatureStats(const size_t size);
explicit FeatureStats(const std::size_t size);
explicit FeatureStats(std::string &theString);
~FeatureStats();
@ -69,7 +67,7 @@ public:
bool isfull() const { return (m_entries < m_available_size) ? 0 : 1; }
void expand();
void add(FeatureStatsType v);
void addSparse(const string& name, FeatureStatsType v);
void addSparse(const std::string& name, FeatureStatsType v);
void clear() {
memset((void*)m_array, 0, GetArraySizeWithBytes());
@ -81,23 +79,23 @@ public:
clear();
}
FeatureStatsType get(size_t i) { return m_array[i]; }
FeatureStatsType get(size_t i)const { return m_array[i]; }
FeatureStatsType get(std::size_t i) { return m_array[i]; }
FeatureStatsType get(std::size_t i)const { return m_array[i]; }
featstats_t getArray() const { return m_array; }
const SparseVector& getSparse() const { return m_map; }
void set(std::string &theString);
inline size_t bytes() const { return GetArraySizeWithBytes(); }
inline std::size_t bytes() const { return GetArraySizeWithBytes(); }
size_t GetArraySizeWithBytes() const {
std::size_t GetArraySizeWithBytes() const {
return m_entries * sizeof(FeatureStatsType);
}
size_t size() const { return m_entries; }
std::size_t size() const { return m_entries; }
size_t available() const { return m_available_size; }
std::size_t available() const { return m_available_size; }
void savetxt(const std::string &file);
void savetxt(std::ostream* os);
@ -111,7 +109,7 @@ public:
/**
* Write the whole object to a stream.
*/
friend ostream& operator<<(ostream& o, const FeatureStats& e);
friend std::ostream& operator<<(std::ostream& o, const FeatureStats& e);
};
//ADEED_BY_TS

View File

@ -15,17 +15,17 @@ class InterpolatedScorer : public Scorer
{
public:
// name would be: "HAMMING,BLEU" or similar
InterpolatedScorer(const string& name, const string& config);
InterpolatedScorer(const std::string& name, const std::string& config);
virtual ~InterpolatedScorer() {}
virtual void score(const candidates_t& candidates, const diffs_t& diffs,
statscores_t& scores) const;
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
virtual size_t NumberOfScores() const {
size_t sz = 0;
virtual std::size_t NumberOfScores() const {
std::size_t sz = 0;
for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
itsc != m_scorers.end(); ++itsc) {
sz += (*itsc)->NumberOfScores();
@ -38,9 +38,9 @@ public:
/**
* Set the factors, which should be used for this metric
*/
virtual void setFactors(const string& factors);
virtual void setFactors(const std::string& factors);
virtual void setFilter(const string& filterCommand);
virtual void setFilter(const std::string& filterCommand);
protected:
ScopedVector<Scorer> m_scorers;
@ -49,7 +49,7 @@ protected:
// by Scorer objects.
ScopedVector<ScoreData> m_scorers_score_data;
vector<float> m_scorer_weights;
std::vector<float> m_scorer_weights;
};
#endif // MERT_INTERPOLATED_SCORER_H_

View File

@ -11,6 +11,7 @@
#include "TER/tercalc.h"
#include "TER/terAlignment.h"
using namespace std;
using namespace TERCpp;
MergeScorer::MergeScorer(const string& config)

View File

@ -1,15 +1,11 @@
#ifndef MERT_MERGE_SCORER_H_
#define MERT_MERGE_SCORER_H_
#include <iostream>
#include <set>
#include <string>
#include <vector>
#include "Scorer.h"
using namespace std;
class PerScorer;
class ScoreStats;
@ -20,16 +16,16 @@ const int kMergeScorerLength = 4;
*/
class MergeScorer: public StatisticsBasedScorer {
public:
explicit MergeScorer(const string& config = "");
explicit MergeScorer(const std::string& config = "");
~MergeScorer();
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
virtual size_t NumberOfScores() const { return 0; }
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
virtual std::size_t NumberOfScores() const { return 0; }
protected:
friend class PerScorer;
virtual float calculateScore(const vector<int>& comps) const;
virtual float calculateScore(const std::vector<int>& comps) const;
private:
// no copying allowed

View File

@ -17,7 +17,7 @@ BOOST_AUTO_TEST_CASE(ngram_basic) {
NgramCounts::const_iterator it = counts.find(key);
BOOST_CHECK(it != counts.end());
BOOST_CHECK_EQUAL(it->first.size(), key.size());
for (size_t i = 0; i < key.size(); ++i) {
for (std::size_t i = 0; i < key.size(); ++i) {
BOOST_CHECK_EQUAL(it->first[i], key[i]);
}
BOOST_CHECK_EQUAL(it->second, 1);

View File

@ -8,9 +8,7 @@
#include "Scorer.h"
#include "Types.h"
using namespace std;
static const float kMaxFloat = numeric_limits<float>::max();
static const float kMaxFloat = std::numeric_limits<float>::max();
class Point;
@ -24,10 +22,10 @@ protected:
FeatureDataHandle m_feature_data; // no accessor for them only child can use them
unsigned int m_num_random_directions;
const vector<bool>& m_positive;
const std::vector<bool>& m_positive;
public:
Optimizer(unsigned Pd, const vector<unsigned>& i2O, const vector<bool>& positive, const vector<parameter_t>& start, unsigned int nrandom);
Optimizer(unsigned Pd, const std::vector<unsigned>& i2O, const std::vector<bool>& positive, const std::vector<parameter_t>& start, unsigned int nrandom);
void SetScorer(Scorer *scorer) { m_scorer = scorer; }
void SetFeatureData(FeatureDataHandle feature_data) { m_feature_data = feature_data; }
@ -50,18 +48,18 @@ public:
/**
* Given a set of lambdas, get the nbest for each sentence.
*/
void Get1bests(const Point& param,vector<unsigned>& bests) const;
void Get1bests(const Point& param,std::vector<unsigned>& bests) const;
/**
* Given a set of nbests, get the Statistical score.
*/
statscore_t GetStatScore(const vector<unsigned>& nbests) const {
statscore_t GetStatScore(const std::vector<unsigned>& nbests) const {
return m_scorer->score(nbests);
}
statscore_t GetStatScore(const Point& param) const;
vector<statscore_t> GetIncStatScore(const vector<unsigned>& ref, const vector<vector<pair<unsigned,unsigned> > >& diffs) const;
std::vector<statscore_t> GetIncStatScore(const std::vector<unsigned>& ref, const std::vector<std::vector<std::pair<unsigned,unsigned> > >& diffs) const;
/**
* Get the optimal Lambda and the best score in a particular direction from a given Point.
@ -79,9 +77,9 @@ class SimpleOptimizer : public Optimizer
private:
const float kEPS;
public:
SimpleOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<bool>& positive,
const vector<parameter_t>& start, unsigned int nrandom)
: Optimizer(dim, i2O, positive, start,nrandom), kEPS(0.0001) {}
SimpleOptimizer(unsigned dim, const std::vector<unsigned>& i2O, const std::vector<bool>& positive,
const std::vector<parameter_t>& start, unsigned int nrandom)
: Optimizer(dim, i2O, positive, start,nrandom), kEPS(0.0001f) {}
virtual statscore_t TrueRun(Point&) const;
};
@ -93,9 +91,9 @@ class RandomDirectionOptimizer : public Optimizer
private:
const float kEPS;
public:
RandomDirectionOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<bool>& positive,
const vector<parameter_t>& start, unsigned int nrandom)
: Optimizer(dim, i2O, positive, start, nrandom), kEPS(0.0001) {}
RandomDirectionOptimizer(unsigned dim, const std::vector<unsigned>& i2O, const std::vector<bool>& positive,
const std::vector<parameter_t>& start, unsigned int nrandom)
: Optimizer(dim, i2O, positive, start, nrandom), kEPS(0.0001f) {}
virtual statscore_t TrueRun(Point&) const;
};
@ -105,8 +103,8 @@ public:
class RandomOptimizer : public Optimizer
{
public:
RandomOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<bool>& positive,
const vector<parameter_t>& start, unsigned int nrandom)
RandomOptimizer(unsigned dim, const std::vector<unsigned>& i2O, const std::vector<bool>& positive,
const std::vector<parameter_t>& start, unsigned int nrandom)
: Optimizer(dim, i2O, positive, start, nrandom) {}
virtual statscore_t TrueRun(Point&) const;
};

View File

@ -17,7 +17,7 @@ class OptimizerFactory
NOPTIMIZER
};
static std::vector<string> GetTypeNames();
static std::vector<std::string> GetTypeNames();
// Setup optimization types.
static void SetTypeNames();
@ -27,7 +27,7 @@ class OptimizerFactory
static Optimizer* BuildOptimizer(unsigned dim,
const std::vector<unsigned>& to_optimize,
const std::vector<bool>& positive,
const std::vector<bool>& positive,
const std::vector<parameter_t>& start,
const std::string& type,
unsigned int nrandom);
@ -36,7 +36,7 @@ class OptimizerFactory
OptimizerFactory() {}
~OptimizerFactory() {}
static vector<string> m_type_names;
static std::vector<std::string> m_type_names;
};
#endif // MERT_OPTIMIZER_FACTORY_H_

View File

@ -8,10 +8,10 @@
namespace {
inline bool CheckBuildOptimizer(unsigned dim,
const vector<unsigned>& to_optimize,
const vector<bool>& positive,
const vector<parameter_t>& start,
const string& type,
const std::vector<unsigned>& to_optimize,
const std::vector<bool>& positive,
const std::vector<parameter_t>& start,
const std::string& type,
unsigned int num_random) {
boost::scoped_ptr<Optimizer> optimizer(OptimizerFactory::BuildOptimizer(dim, to_optimize, positive, start, type, num_random));
return optimizer.get() != NULL;

View File

@ -7,8 +7,6 @@
#include "Types.h"
#include "Scorer.h"
using namespace std;
class ScoreStats;
/**
@ -20,13 +18,13 @@ class ScoreStats;
class PerScorer: public StatisticsBasedScorer
{
public:
explicit PerScorer(const string& config = "");
explicit PerScorer(const std::string& config = "");
~PerScorer();
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
virtual size_t NumberOfScores() const { return 3; }
virtual float calculateScore(const vector<int>& comps) const;
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
virtual std::size_t NumberOfScores() const { return 3; }
virtual float calculateScore(const std::vector<int>& comps) const;
private:
// no copying allowed
@ -34,8 +32,8 @@ private:
PerScorer& operator=(const PerScorer&);
// data extracted from reference files
vector<size_t> m_ref_lengths;
vector<multiset<int> > m_ref_tokens;
std::vector<std::size_t> m_ref_lengths;
std::vector<std::multiset<int> > m_ref_tokens;
};
#endif // MERT_PER_SCORER_H_

View File

@ -13,7 +13,7 @@ class Optimizer;
* A class that handles the subset of the Feature weight on which
* we run the optimization.
*/
class Point : public vector<parameter_t>
class Point : public std::vector<parameter_t>
{
friend class Optimizer;
@ -21,7 +21,7 @@ private:
/**
* The indices over which we optimize.
*/
static vector<unsigned int> m_opt_indices;
static std::vector<unsigned int> m_opt_indices;
/**
* Dimension of m_opt_indices and of the parent vector.
@ -31,7 +31,7 @@ private:
/**
* Fixed weights in case of partial optimzation.
*/
static map<unsigned int,parameter_t> m_fixed_weights;
static std::map<unsigned int,parameter_t> m_fixed_weights;
/**
* Total size of the parameter space; we have
@ -43,23 +43,23 @@ private:
/**
* The limits for randomization, both vectors are of full length, m_pdim.
*/
static vector<parameter_t> m_min;
static vector<parameter_t> m_max;
static std::vector<parameter_t> m_min;
static std::vector<parameter_t> m_max;
statscore_t m_score;
public:
static unsigned int getdim() { return m_dim; }
static void setdim(size_t d) { m_dim = d; }
static void setdim(std::size_t d) { m_dim = d; }
static unsigned int getpdim() { return m_pdim; }
static void setpdim(size_t pd) { m_pdim = pd; }
static void setpdim(std::size_t pd) { m_pdim = pd; }
static void set_optindices(const vector<unsigned int>& indices) {
static void set_optindices(const std::vector<unsigned int>& indices) {
m_opt_indices = indices;
}
static const vector<unsigned int>& get_optindices() {
static const std::vector<unsigned int>& get_optindices() {
return m_opt_indices;
}
@ -68,9 +68,9 @@ public:
}
Point();
Point(const vector<parameter_t>& init,
const vector<parameter_t>& min,
const vector<parameter_t>& max);
Point(const std::vector<parameter_t>& init,
const std::vector<parameter_t>& min,
const std::vector<parameter_t>& max);
~Point();
void Randomize();
@ -84,7 +84,7 @@ public:
/**
* Write the Whole featureweight to a stream (ie m_pdim float).
*/
friend ostream& operator<<(ostream& o,const Point& P);
friend std::ostream& operator<<(std::ostream& o,const Point& P);
void Normalize() { NormalizeL2(); }
void NormalizeL2();
@ -94,7 +94,7 @@ public:
* Return a vector of size m_pdim where all weights have been
* put (including fixed ones).
*/
void GetAllWeights(vector<parameter_t>& w) const;
void GetAllWeights(std::vector<parameter_t>& w) const;
statscore_t GetScore() const { return m_score; }
void SetScore(statscore_t score) { m_score = score; }

View File

@ -5,6 +5,8 @@
#include <unistd.h>
#include <csignal>
#include "Fdstream.h"
using namespace std;
#define CHILD_STDIN_READ pipefds_input[0]

View File

@ -3,7 +3,8 @@
#include <string>
#include "Fdstream.h"
class ofdstream;
class ifdstream;
/*
* This class runs the filter command in a child process and
@ -12,13 +13,13 @@
class PreProcessFilter
{
public:
PreProcessFilter(const string& filterCommand);
string ProcessSentence(const string& sentence);
~PreProcessFilter();
explicit PreProcessFilter(const std::string& filterCommand);
std::string ProcessSentence(const std::string& sentence);
~PreProcessFilter();
private:
ofdstream* m_toFilter;
ifdstream* m_fromFilter;
ifdstream* m_fromFilter;
};
#endif // MERT_PREPROCESSFILTER_H_

View File

@ -14,8 +14,8 @@
class Reference {
public:
// for m_length
typedef std::vector<size_t>::iterator iterator;
typedef std::vector<size_t>::const_iterator const_iterator;
typedef std::vector<std::size_t>::iterator iterator;
typedef std::vector<std::size_t>::const_iterator const_iterator;
Reference() : m_counts(new NgramCounts) { }
~Reference() { delete m_counts; }
@ -28,36 +28,36 @@ class Reference {
iterator end() { return m_length.end(); }
const_iterator end() const { return m_length.end(); }
void push_back(size_t len) { m_length.push_back(len); }
void push_back(std::size_t len) { m_length.push_back(len); }
size_t num_references() const { return m_length.size(); }
std::size_t num_references() const { return m_length.size(); }
int CalcAverage() const;
int CalcClosest(size_t length) const;
int CalcClosest(std::size_t length) const;
int CalcShortest() const;
private:
NgramCounts* m_counts;
// multiple reference lengths
std::vector<size_t> m_length;
std::vector<std::size_t> m_length;
};
// TODO(tetsuok): fix this function and related stuff.
// "average" reference length should not be calculated at sentence-level unlike "closest".
inline int Reference::CalcAverage() const {
int total = 0;
for (size_t i = 0; i < m_length.size(); ++i) {
for (std::size_t i = 0; i < m_length.size(); ++i) {
total += m_length[i];
}
return static_cast<int>(
static_cast<float>(total) / m_length.size());
}
inline int Reference::CalcClosest(size_t length) const {
inline int Reference::CalcClosest(std::size_t length) const {
int min_diff = INT_MAX;
int closest_ref_id = 0; // an index of the closest reference translation
for (size_t i = 0; i < m_length.size(); ++i) {
for (std::size_t i = 0; i < m_length.size(); ++i) {
const int ref_length = m_length[i];
const int length_diff = abs(ref_length - static_cast<int>(length));
const int abs_min_diff = abs(min_diff);

View File

@ -23,10 +23,10 @@ class ScopedVector {
m_vec.clear();
}
void reserve(size_t capacity) { m_vec.reserve(capacity); }
void resize(size_t size) { m_vec.resize(size); }
void reserve(std::size_t capacity) { m_vec.reserve(capacity); }
void resize(std::size_t size) { m_vec.resize(size); }
size_t size() const {return m_vec.size(); }
std::size_t size() const {return m_vec.size(); }
iterator begin() { return m_vec.begin(); }
const_iterator begin() const { return m_vec.begin(); }
@ -40,8 +40,8 @@ class ScopedVector {
std::vector<T*>* operator->() { return &m_vec; }
const std::vector<T*>* operator->() const { return &m_vec; }
T*& operator[](size_t i) { return m_vec[i]; }
const T* operator[](size_t i) const { return m_vec[i]; }
T*& operator[](std::size_t i) { return m_vec[i]; }
const T* operator[](std::size_t i) const { return m_vec[i]; }
private:
std::vector<T*> m_vec;

View File

@ -8,11 +8,14 @@
#include "ScoreData.h"
#include <iostream>
#include <fstream>
#include "Scorer.h"
#include "Util.h"
#include "FileStream.h"
using namespace std;
ScoreData::ScoreData(Scorer* scorer) :
m_scorer(scorer)
{

View File

@ -9,15 +9,13 @@
#ifndef MERT_SCORE_DATA_H_
#define MERT_SCORE_DATA_H_
#include <iostream>
#include <iosfwd>
#include <vector>
#include <stdexcept>
#include <string>
#include "ScoreArray.h"
#include "ScoreStats.h"
using namespace std;
class Scorer;
class ScoreData
@ -32,7 +30,7 @@ private:
Scorer* m_scorer;
std::string m_score_type;
size_t m_num_scores;
std::size_t m_num_scores;
public:
ScoreData(Scorer* scorer);
@ -44,11 +42,11 @@ public:
return m_array.at(getIndex(idx));
}
inline ScoreArray& get(size_t idx) {
inline ScoreArray& get(std::size_t idx) {
return m_array.at(idx);
}
inline const ScoreArray& get(size_t idx) const {
inline const ScoreArray& get(std::size_t idx) const {
return m_array.at(idx);
}
@ -60,11 +58,11 @@ public:
return (sent_idx > -1 && sent_idx < static_cast<int>(m_array.size())) ? true : false;
}
inline ScoreStats& get(size_t i, size_t j) {
inline ScoreStats& get(std::size_t i, std::size_t j) {
return m_array.at(i).get(j);
}
inline const ScoreStats& get(size_t i, size_t j) const {
inline const ScoreStats& get(std::size_t i, std::size_t j) const {
return m_array.at(i).get(j);
}
@ -77,8 +75,8 @@ public:
void add(ScoreArray& e);
void add(const ScoreStats& e, const std::string& sent_idx);
size_t NumberOfScores() const { return m_num_scores; }
size_t size() const { return m_array.size(); }
std::size_t NumberOfScores() const { return m_num_scores; }
std::size_t size() const { return m_array.size(); }
void save(const std::string &file, bool bin=false);
void save(std::ostream* os, bool bin=false);
@ -99,10 +97,10 @@ public:
return -1;
}
inline std::string getIndex(size_t idx) const {
inline std::string getIndex(std::size_t idx) const {
idx2name::const_iterator i = m_index_to_array_name.find(idx);
if (i != m_index_to_array_name.end())
throw runtime_error("there is no entry at index " + idx);
throw std::runtime_error("there is no entry at index " + idx);
return i->second;
}
};

View File

@ -8,6 +8,10 @@
#include "Util.h"
#include "ScoreStats.h"
#include <fstream>
#include <iostream>
using namespace std;
namespace {
const int kAvailableSize = 8;

View File

@ -10,27 +10,24 @@
#define MERT_SCORE_STATS_H_
#include <vector>
#include <iostream>
#include <fstream>
#include <iosfwd>
#include <cstdlib>
#include <cstring>
#include "Types.h"
using namespace std;
class ScoreStats
{
private:
size_t m_available_size;
size_t m_entries;
std::size_t m_available_size;
std::size_t m_entries;
// TODO: Use smart pointer for exceptional-safety.
scorestats_t m_array;
public:
ScoreStats();
explicit ScoreStats(const size_t size);
explicit ScoreStats(const std::size_t size);
~ScoreStats();
@ -46,7 +43,7 @@ public:
void add(ScoreStatsType v);
void clear() {
memset((void*)m_array, 0, GetArraySizeWithBytes());
std::memset((void*)m_array, 0, GetArraySizeWithBytes());
}
void reset() {
@ -54,8 +51,8 @@ public:
clear();
}
ScoreStatsType get(size_t i) { return m_array[i]; }
ScoreStatsType get(size_t i) const { return m_array[i]; }
ScoreStatsType get(std::size_t i) { return m_array[i]; }
ScoreStatsType get(std::size_t i) const { return m_array[i]; }
scorestats_t getArray() const { return m_array; }
void set(const std::string& str);
@ -63,34 +60,34 @@ public:
// Much more efficient than the above.
void set(const std::vector<ScoreStatsType>& stats) {
reset();
for (size_t i = 0; i < stats.size(); ++i) {
for (std::size_t i = 0; i < stats.size(); ++i) {
add(stats[i]);
}
}
size_t bytes() const { return GetArraySizeWithBytes(); }
std::size_t bytes() const { return GetArraySizeWithBytes(); }
size_t GetArraySizeWithBytes() const {
std::size_t GetArraySizeWithBytes() const {
return m_entries * sizeof(ScoreStatsType);
}
size_t size() const { return m_entries; }
std::size_t size() const { return m_entries; }
size_t available() const { return m_available_size; }
std::size_t available() const { return m_available_size; }
void savetxt(const std::string &file);
void savetxt(ostream* os);
void savebin(ostream* os);
void savetxt(std::ostream* os);
void savebin(std::ostream* os);
void savetxt();
void loadtxt(const std::string &file);
void loadtxt(istream* is);
void loadbin(istream* is);
void loadtxt(std::istream* is);
void loadbin(std::istream* is);
/**
* Write the whole object to a stream.
*/
friend ostream& operator<<(ostream& o, const ScoreStats& e);
friend std::ostream& operator<<(std::ostream& o, const ScoreStats& e);
};
//ADDED_BY_TS

View File

@ -4,6 +4,9 @@
#include "Vocabulary.h"
#include "Util.h"
#include "Singleton.h"
#include "PreProcessFilter.h"
using namespace std;
namespace {
@ -38,9 +41,9 @@ inline float score_average(const statscores_t& scores, size_t start, size_t end)
Scorer::Scorer(const string& name, const string& config)
: m_name(name),
m_vocab(mert::VocabularyFactory::GetVocabulary()),
m_score_data(0),
m_enable_preserve_case(true),
m_filter(NULL) {
m_filter(NULL),
m_score_data(NULL),
m_enable_preserve_case(true) {
InitConfig(config);
}

View File

@ -8,10 +8,8 @@
#include <vector>
#include "Types.h"
#include "ScoreData.h"
#include "PreProcessFilter.h"
using namespace std;
class PreProcessFilter;
class ScoreStats;
namespace mert {
@ -29,18 +27,18 @@ class Vocabulary;
class Scorer
{
public:
Scorer(const string& name, const string& config);
Scorer(const std::string& name, const std::string& config);
virtual ~Scorer();
/**
* Return the number of statistics needed for the computation of the score.
*/
virtual size_t NumberOfScores() const = 0;
virtual std::size_t NumberOfScores() const = 0;
/**
* Set the reference files. This must be called before prepareStats().
*/
virtual void setReferenceFiles(const vector<string>& referenceFiles) {
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles) {
// do nothing
}
@ -48,12 +46,12 @@ class Scorer
* Process the given guessed text, corresponding to the given reference sindex
* and add the appropriate statistics to the entry.
*/
virtual void prepareStats(size_t sindex, const string& text, ScoreStats& entry) {
virtual void prepareStats(std::size_t sindex, const std::string& text, ScoreStats& entry) {
// do nothing.
}
virtual void prepareStats(const string& sindex, const string& text, ScoreStats& entry) {
this->prepareStats(static_cast<size_t>(atoi(sindex.c_str())), text, entry);
virtual void prepareStats(const std::string& sindex, const std::string& text, ScoreStats& entry) {
this->prepareStats(static_cast<std::size_t>(atoi(sindex.c_str())), text, entry);
}
/**
@ -69,7 +67,7 @@ class Scorer
throw runtime_error("score data not loaded");
}
scores.push_back(0);
for (size_t i = 0; i < diffs.size(); ++i) {
for (std::size_t i = 0; i < diffs.size(); ++i) {
scores.push_back(0);
}
}
@ -86,11 +84,11 @@ class Scorer
return scores[0];
}
const string& getName() const {
const std::string& getName() const {
return m_name;
}
size_t getReferenceSize() const {
std::size_t getReferenceSize() const {
if (m_score_data) {
return m_score_data->size();
}
@ -107,33 +105,33 @@ class Scorer
/**
* Set the factors, which should be used for this metric
*/
virtual void setFactors(const string& factors);
virtual void setFactors(const std::string& factors);
mert::Vocabulary* GetVocab() const { return m_vocab; }
/**
* Set unix filter, which will be used to preprocess the sentences
*/
virtual void setFilter(const string& filterCommand);
virtual void setFilter(const std::string& filterCommand);
private:
void InitConfig(const string& config);
void InitConfig(const std::string& config);
/**
* Take the factored sentence and return the desired factors
*/
string applyFactors(const string& sentece) const;
std::string applyFactors(const std::string& sentece) const;
/**
* Preprocess the sentence with the filter (if given)
*/
string applyFilter(const string& sentence) const;
std::string applyFilter(const std::string& sentence) const;
string m_name;
std::string m_name;
mert::Vocabulary* m_vocab;
map<string, string> m_config;
vector<int> m_factors;
PreProcessFilter* m_filter;
std::map<std::string, std::string> m_config;
std::vector<int> m_factors;
PreProcessFilter* m_filter;
protected:
ScoreData* m_score_data;
@ -142,8 +140,8 @@ class Scorer
/**
* Get value of config variable. If not provided, return default.
*/
string getConfig(const string& key, const string& def="") const {
map<string,string>::const_iterator i = m_config.find(key);
std::string getConfig(const std::string& key, const std::string& def="") const {
std::map<std::string,std::string>::const_iterator i = m_config.find(key);
if (i == m_config.end()) {
return def;
} else {
@ -155,12 +153,12 @@ class Scorer
* Tokenise line and encode.
* Note: We assume that all tokens are separated by whitespaces.
*/
void TokenizeAndEncode(const string& line, vector<int>& encoded);
void TokenizeAndEncode(const std::string& line, std::vector<int>& encoded);
/**
* Every inherited scorer should call this function for each sentence
*/
string preprocessSentence(const string& sentence) const
std::string preprocessSentence(const std::string& sentence) const
{
return applyFactors(applyFilter(sentence));
}
@ -174,7 +172,7 @@ class Scorer
class StatisticsBasedScorer : public Scorer
{
public:
StatisticsBasedScorer(const string& name, const string& config);
StatisticsBasedScorer(const std::string& name, const std::string& config);
virtual ~StatisticsBasedScorer() {}
virtual void score(const candidates_t& candidates, const diffs_t& diffs,
statscores_t& scores) const;
@ -184,17 +182,17 @@ class StatisticsBasedScorer : public Scorer
enum RegularisationType {
NONE,
AVERAGE,
MINIMUM,
MINIMUM
};
/**
* Calculate the actual score.
*/
virtual statscore_t calculateScore(const vector<int>& totals) const = 0;
virtual statscore_t calculateScore(const std::vector<int>& totals) const = 0;
// regularisation
RegularisationType m_regularization_type;
size_t m_regularization_window;
std::size_t m_regularization_window;
};
#endif // MERT_SCORER_H_

View File

@ -54,7 +54,7 @@ private:
std::map<int, float> weightsMap;
void loadWeights(const string& weightsfile);
void loadWeights(const std::string& weightsfile);
// no copying allowed.
SemposScorer(const SemposScorer&);

View File

@ -9,6 +9,7 @@
#include "TER/terAlignment.h"
#include "Util.h"
using namespace std;
using namespace TERCpp;
TerScorer::TerScorer(const string& config)

View File

@ -1,7 +1,6 @@
#ifndef MERT_TER_SCORER_H_
#define MERT_TER_SCORER_H_
#include <iostream>
#include <set>
#include <string>
#include <vector>
@ -9,8 +8,6 @@
#include "Types.h"
#include "Scorer.h"
using namespace std;
class ScoreStats;
/**
@ -19,35 +16,31 @@ class ScoreStats;
class TerScorer: public StatisticsBasedScorer
{
public:
explicit TerScorer(const string& config = "");
explicit TerScorer(const std::string& config = "");
~TerScorer();
virtual void setReferenceFiles(const vector<string>& referenceFiles);
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
virtual size_t NumberOfScores() const {
virtual std::size_t NumberOfScores() const {
// cerr << "TerScorer: " << (LENGTH + 1) << endl;
return kLENGTH + 1;
}
virtual float calculateScore(const vector<int>& comps) const;
void whoami() const {
cerr << "I AM TerScorer" << std::endl;
}
virtual float calculateScore(const std::vector<int>& comps) const;
private:
const int kLENGTH;
string m_java_env;
string m_ter_com_env;
std::string m_java_env;
std::string m_ter_com_env;
// data extracted from reference files
vector<size_t> m_ref_lengths;
vector<multiset<int> > m_ref_tokens;
vector<vector<int> > m_references;
vector<vector<vector<int> > > m_multi_references;
string m_pid;
std::vector<std::size_t> m_ref_lengths;
std::vector<std::multiset<int> > m_ref_tokens;
std::vector<std::vector<int> > m_references;
std::vector<std::vector<std::vector<int> > > m_multi_references;
std::string m_pid;
// no copying allowed
TerScorer(const TerScorer&);

View File

@ -4,8 +4,7 @@
#include <vector>
#include <map>
#include <string>
using namespace std;
#include <utility>
class FeatureStats;
class FeatureArray;
@ -15,29 +14,29 @@ class ScoreArray;
class ScoreData;
typedef float parameter_t;
//typedef vector<parameter_t> parameters_t;confusing; use vector<parameter_t>
typedef vector<pair<unsigned int, unsigned int> > diff_t;
typedef pair<float,diff_t > threshold;
typedef vector<diff_t> diffs_t;
typedef vector<unsigned int> candidates_t;
//typedef std::vector<parameter_t> parameters_t;confusing; use std::vector<parameter_t>
typedef std::vector<std::pair<unsigned int, unsigned int> > diff_t;
typedef std::pair<float,diff_t > threshold;
typedef std::vector<diff_t> diffs_t;
typedef std::vector<unsigned int> candidates_t;
typedef float statscore_t;
typedef vector<statscore_t> statscores_t;
typedef std::vector<statscore_t> statscores_t;
typedef float FeatureStatsType;
typedef FeatureStatsType* featstats_t;
//typedef vector<FeatureStatsType> featstats_t;
typedef vector<FeatureStats> featarray_t;
typedef vector<FeatureArray> featdata_t;
//typedef std::vector<FeatureStatsType> featstats_t;
typedef std::vector<FeatureStats> featarray_t;
typedef std::vector<FeatureArray> featdata_t;
typedef int ScoreStatsType;
typedef ScoreStatsType* scorestats_t;
//typedef vector<ScoreStatsType> scorestats_t;
typedef vector<ScoreStats> scorearray_t;
typedef vector<ScoreArray> scoredata_t;
//typedef std::vector<ScoreStatsType> scorestats_t;
typedef std::vector<ScoreStats> scorearray_t;
typedef std::vector<ScoreArray> scoredata_t;
typedef map<size_t, std::string> idx2name;
typedef map<std::string, size_t> name2idx;
typedef std::map<std::size_t, std::string> idx2name;
typedef std::map<std::string, std::size_t> name2idx;
#endif // MERT_TYPE_H_

View File

@ -22,8 +22,6 @@
#include "Types.h"
using namespace std;
#ifdef TRACE_ENABLE
#define TRACE_ERR(str) { std::cerr << str; }
#else
@ -40,11 +38,11 @@ const float kEPS = 0.0001f;
template <typename T>
bool IsAlmostEqual(T expected, T actual, float round=kEPS) {
if (abs(expected - actual) < round) {
if (std::abs(expected - actual) < round) {
return true;
} else {
cerr << "Fail: expected = " << expected
<< " (actual = " << actual << ")" << endl;
std::cerr << "Fail: expected = " << expected
<< " (actual = " << actual << ")" << std::endl;
return false;
}
}

View File

@ -48,7 +48,7 @@ class Vocabulary {
bool empty() const { return m_vocab.empty(); }
size_t size() const { return m_vocab.size(); }
std::size_t size() const { return m_vocab.size(); }
iterator find(const std::string& str) { return m_vocab.find(str); }
const_iterator find(const std::string& str) const { return m_vocab.find(str); }