mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
60dde0b06b
@ -14,6 +14,8 @@
|
||||
#include "Util.h"
|
||||
#include "Vocabulary.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace {
|
||||
|
||||
// configure regularisation
|
||||
|
@ -10,8 +10,6 @@
|
||||
#include "Scorer.h"
|
||||
#include "ScopedVector.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
const int kBleuNgramOrder = 4;
|
||||
|
||||
class NgramCounts;
|
||||
@ -29,15 +27,15 @@ public:
|
||||
SHORTEST
|
||||
};
|
||||
|
||||
explicit BleuScorer(const string& config = "");
|
||||
explicit BleuScorer(const std::string& config = "");
|
||||
~BleuScorer();
|
||||
|
||||
virtual void setReferenceFiles(const vector<string>& referenceFiles);
|
||||
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
|
||||
virtual float calculateScore(const vector<int>& comps) const;
|
||||
virtual size_t NumberOfScores() const { return 2 * kBleuNgramOrder + 1; }
|
||||
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
|
||||
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
|
||||
virtual float calculateScore(const std::vector<int>& comps) const;
|
||||
virtual std::size_t NumberOfScores() const { return 2 * kBleuNgramOrder + 1; }
|
||||
|
||||
int CalcReferenceLength(size_t sentence_id, size_t length);
|
||||
int CalcReferenceLength(std::size_t sentence_id, std::size_t length);
|
||||
|
||||
ReferenceLengthType GetReferenceLengthType() const { return m_ref_length_type; }
|
||||
void SetReferenceLengthType(ReferenceLengthType type) { m_ref_length_type = type; }
|
||||
@ -47,14 +45,14 @@ public:
|
||||
/**
|
||||
* Count the ngrams of each type, up to the given length in the input line.
|
||||
*/
|
||||
size_t CountNgrams(const string& line, NgramCounts& counts, unsigned int n);
|
||||
std::size_t CountNgrams(const std::string& line, NgramCounts& counts, unsigned int n);
|
||||
|
||||
void DumpCounts(std::ostream* os, const NgramCounts& counts) const;
|
||||
|
||||
bool OpenReference(const char* filename, size_t file_id);
|
||||
bool OpenReference(const char* filename, std::size_t file_id);
|
||||
|
||||
// NOTE: this function is used for unit testing.
|
||||
bool OpenReferenceStream(std::istream* is, size_t file_id);
|
||||
bool OpenReferenceStream(std::istream* is, std::size_t file_id);
|
||||
|
||||
private:
|
||||
ReferenceLengthType m_ref_length_type;
|
||||
@ -70,6 +68,6 @@ private:
|
||||
/** Computes sentence-level BLEU+1 score.
|
||||
* This function is used in PRO.
|
||||
*/
|
||||
float sentenceLevelBleuPlusOne(const vector<float>& stats);
|
||||
float sentenceLevelBleuPlusOne(const std::vector<float>& stats);
|
||||
|
||||
#endif // MERT_BLEU_SCORER_H_
|
||||
|
@ -220,7 +220,7 @@ BOOST_AUTO_TEST_CASE(bleu_clipped_counts) {
|
||||
|
||||
BOOST_AUTO_TEST_CASE(calculate_actual_score) {
|
||||
BOOST_REQUIRE(4 == kBleuNgramOrder);
|
||||
vector<int> stats(2 * kBleuNgramOrder + 1);
|
||||
std::vector<int> stats(2 * kBleuNgramOrder + 1);
|
||||
BleuScorer scorer;
|
||||
|
||||
// unigram
|
||||
@ -247,7 +247,7 @@ BOOST_AUTO_TEST_CASE(calculate_actual_score) {
|
||||
|
||||
BOOST_AUTO_TEST_CASE(sentence_level_bleu) {
|
||||
BOOST_REQUIRE(4 == kBleuNgramOrder);
|
||||
vector<float> stats(2 * kBleuNgramOrder + 1);
|
||||
std::vector<float> stats(2 * kBleuNgramOrder + 1);
|
||||
|
||||
// unigram
|
||||
stats[0] = 6.0;
|
||||
|
@ -4,6 +4,8 @@
|
||||
#include <fstream>
|
||||
#include <stdexcept>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace {
|
||||
|
||||
inline int CalcDistance(int word1, int word2) {
|
||||
|
@ -6,34 +6,32 @@
|
||||
#include "Types.h"
|
||||
#include "Scorer.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
/**
|
||||
* CderScorer class can compute both CDER and WER metric.
|
||||
*/
|
||||
class CderScorer: public StatisticsBasedScorer {
|
||||
public:
|
||||
explicit CderScorer(const string& config, bool allowed_long_jumps = true);
|
||||
explicit CderScorer(const std::string& config, bool allowed_long_jumps = true);
|
||||
~CderScorer();
|
||||
|
||||
virtual void setReferenceFiles(const vector<string>& referenceFiles);
|
||||
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
|
||||
|
||||
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
|
||||
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
|
||||
|
||||
virtual void prepareStatsVector(size_t sid, const string& text, vector<int>& stats);
|
||||
virtual void prepareStatsVector(std::size_t sid, const std::string& text, std::vector<int>& stats);
|
||||
|
||||
virtual size_t NumberOfScores() const { return 2; }
|
||||
virtual std::size_t NumberOfScores() const { return 2; }
|
||||
|
||||
virtual float calculateScore(const vector<int>& comps) const;
|
||||
virtual float calculateScore(const std::vector<int>& comps) const;
|
||||
|
||||
private:
|
||||
bool m_allowed_long_jumps;
|
||||
|
||||
typedef vector<int> sent_t;
|
||||
vector<vector<sent_t> > m_ref_sentences;
|
||||
typedef std::vector<int> sent_t;
|
||||
std::vector<std::vector<sent_t> > m_ref_sentences;
|
||||
|
||||
void computeCD(const sent_t& cand, const sent_t& ref,
|
||||
vector<int>& stats) const;
|
||||
std::vector<int>& stats) const;
|
||||
|
||||
// no copying allowed
|
||||
CderScorer(const CderScorer&);
|
||||
|
@ -30,7 +30,7 @@ BOOST_AUTO_TEST_CASE(shard_basic) {
|
||||
data.getScoreData()->add(sa3);
|
||||
data.getScoreData()->add(sa4);
|
||||
|
||||
vector<Data> shards;
|
||||
std::vector<Data> shards;
|
||||
data.createShards(2,0,"",shards);
|
||||
|
||||
BOOST_CHECK_EQUAL(shards.size(),2);
|
||||
|
@ -6,12 +6,12 @@
|
||||
#define _FDSTREAM_
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#if defined(__GLIBCXX__) || defined(__GLIBCPP__)
|
||||
#include <ext/stdio_filebuf.h>
|
||||
|
||||
#define BUFFER_SIZE (1024)
|
||||
|
||||
using namespace std;
|
||||
#define BUFFER_SIZE (32768)
|
||||
|
||||
class _fdstream
|
||||
{
|
||||
@ -20,16 +20,16 @@ protected:
|
||||
_file_descriptor(-1), _filebuf(NULL)
|
||||
{ }
|
||||
|
||||
_fdstream(int file_descriptor, ios_base::openmode openmode) :
|
||||
_fdstream(int file_descriptor, std::ios_base::openmode openmode) :
|
||||
_file_descriptor(file_descriptor), _openmode(openmode)
|
||||
{
|
||||
_filebuf = NULL;
|
||||
open(file_descriptor, openmode);
|
||||
}
|
||||
|
||||
ios_base::openmode openmode() const { return _openmode; }
|
||||
std::ios_base::openmode openmode() const { return _openmode; }
|
||||
|
||||
void open(int file_descriptor, ios_base::openmode openmode)
|
||||
void open(int file_descriptor, std::ios_base::openmode openmode)
|
||||
{
|
||||
if (!_filebuf)
|
||||
// We create a C++ stream from a file descriptor
|
||||
@ -38,10 +38,10 @@ protected:
|
||||
// You can also create the filebuf from a FILE* with
|
||||
// FILE* f = fdopen(file_descriptor, mode);
|
||||
_filebuf = new __gnu_cxx::stdio_filebuf<char> (file_descriptor,
|
||||
openmode);
|
||||
openmode);
|
||||
}
|
||||
|
||||
~_fdstream()
|
||||
virtual ~_fdstream()
|
||||
{
|
||||
close(_file_descriptor);
|
||||
delete _filebuf;
|
||||
@ -50,7 +50,7 @@ protected:
|
||||
|
||||
int _file_descriptor;
|
||||
__gnu_cxx::stdio_filebuf<char>* _filebuf;
|
||||
ios_base::openmode _openmode;
|
||||
std::ios_base::openmode _openmode;
|
||||
};
|
||||
|
||||
class ifdstream : public _fdstream
|
||||
@ -61,41 +61,41 @@ public:
|
||||
{ }
|
||||
|
||||
ifdstream(int file_descriptor) :
|
||||
_fdstream(file_descriptor, ios_base::in)
|
||||
_fdstream(file_descriptor, std::ios_base::in)
|
||||
{
|
||||
_stream = new istream (_filebuf);
|
||||
_stream = new std::istream(_filebuf);
|
||||
}
|
||||
|
||||
void open(int file_descriptor)
|
||||
{
|
||||
if (!_stream)
|
||||
{
|
||||
_fdstream::open(file_descriptor, ios_base::in);
|
||||
_stream = new istream (_filebuf);
|
||||
_fdstream::open(file_descriptor, std::ios_base::in);
|
||||
_stream = new std::istream(_filebuf);
|
||||
}
|
||||
}
|
||||
|
||||
ifdstream& operator>> (string& str)
|
||||
ifdstream& operator>> (std::string& str)
|
||||
{
|
||||
(*_stream) >> str;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
size_t getline(string& str)
|
||||
std::size_t getline(std::string& str)
|
||||
{
|
||||
char tmp[BUFFER_SIZE];
|
||||
size_t ret = getline(tmp, BUFFER_SIZE);
|
||||
std::size_t ret = getline(tmp, BUFFER_SIZE);
|
||||
str = tmp;
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t getline (char* s, streamsize n)
|
||||
std::size_t getline(char* s, std::streamsize n)
|
||||
{
|
||||
return (getline(s, n, '\n'));
|
||||
}
|
||||
|
||||
size_t getline (char* s, streamsize n, char delim)
|
||||
std::size_t getline(char* s, std::streamsize n, char delim)
|
||||
{
|
||||
int i = 0;
|
||||
do{
|
||||
@ -115,7 +115,7 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
istream* _stream;
|
||||
std::istream* _stream;
|
||||
};
|
||||
|
||||
class ofdstream : public _fdstream
|
||||
@ -126,22 +126,22 @@ public:
|
||||
{ }
|
||||
|
||||
ofdstream(int file_descriptor) :
|
||||
_fdstream(file_descriptor, ios_base::out)
|
||||
_fdstream(file_descriptor, std::ios_base::out)
|
||||
{
|
||||
_stream = new ostream (_filebuf);
|
||||
_stream = new std::ostream(_filebuf);
|
||||
}
|
||||
|
||||
void open(int file_descriptor)
|
||||
{
|
||||
if (!_stream)
|
||||
{
|
||||
_fdstream::open(file_descriptor, ios_base::out);
|
||||
_stream = new ostream (_filebuf);
|
||||
}
|
||||
{
|
||||
_fdstream::open(file_descriptor, std::ios_base::out);
|
||||
_stream = new std::ostream(_filebuf);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ofdstream& operator<< (const string& str)
|
||||
ofdstream& operator<< (const std::string& str)
|
||||
{
|
||||
if (_stream->good())
|
||||
(*_stream) << str;
|
||||
@ -157,7 +157,7 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
ostream* _stream;
|
||||
std::ostream* _stream;
|
||||
};
|
||||
|
||||
#else
|
||||
|
@ -6,11 +6,14 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include "FeatureArray.h"
|
||||
#include "FileStream.h"
|
||||
#include "Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
FeatureArray::FeatureArray()
|
||||
: m_index(""), m_num_features(0), m_sparse_flag(false) {}
|
||||
|
||||
|
@ -10,11 +10,9 @@
|
||||
#define MERT_FEATURE_ARRAY_H_
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <iosfwd>
|
||||
#include "FeatureStats.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
const char FEATURES_TXT_BEGIN[] = "FEATURES_TXT_BEGIN_0";
|
||||
const char FEATURES_TXT_END[] = "FEATURES_TXT_END_0";
|
||||
const char FEATURES_BIN_BEGIN[] = "FEATURES_BIN_BEGIN_0";
|
||||
@ -27,7 +25,7 @@ private:
|
||||
// the index inside the vector.
|
||||
std::string m_index;
|
||||
featarray_t m_array;
|
||||
size_t m_num_features;
|
||||
std::size_t m_num_features;
|
||||
std::string m_features;
|
||||
bool m_sparse_flag;
|
||||
|
||||
@ -42,27 +40,27 @@ public:
|
||||
std::string getIndex() const { return m_index; }
|
||||
void setIndex(const std::string& value) { m_index = value; }
|
||||
|
||||
FeatureStats& get(size_t i) { return m_array.at(i); }
|
||||
const FeatureStats& get(size_t i) const { return m_array.at(i); }
|
||||
FeatureStats& get(std::size_t i) { return m_array.at(i); }
|
||||
const FeatureStats& get(std::size_t i) const { return m_array.at(i); }
|
||||
|
||||
void add(FeatureStats& e) { m_array.push_back(e); }
|
||||
|
||||
//ADDED BY TS
|
||||
void swap(size_t i, size_t j) {
|
||||
void swap(std::size_t i, std::size_t j) {
|
||||
std::swap(m_array[i], m_array[j]);
|
||||
}
|
||||
|
||||
void resize(size_t new_size) {
|
||||
void resize(std::size_t new_size) {
|
||||
m_array.resize(std::min(new_size, m_array.size()));
|
||||
}
|
||||
//END_ADDED
|
||||
|
||||
void merge(FeatureArray& e);
|
||||
|
||||
size_t size() const { return m_array.size(); }
|
||||
std::size_t size() const { return m_array.size(); }
|
||||
|
||||
size_t NumberOfFeatures() const { return m_num_features; }
|
||||
void NumberOfFeatures(size_t v) { m_num_features = v; }
|
||||
std::size_t NumberOfFeatures() const { return m_num_features; }
|
||||
void NumberOfFeatures(std::size_t v) { m_num_features = v; }
|
||||
|
||||
std::string Features() const { return m_features; }
|
||||
void Features(const std::string& f) { m_features = f; }
|
||||
@ -73,8 +71,8 @@ public:
|
||||
void save(const std::string &file, bool bin=false);
|
||||
void save(bool bin=false);
|
||||
|
||||
void loadtxt(std::istream* is, size_t n);
|
||||
void loadbin(std::istream* is, size_t n);
|
||||
void loadtxt(std::istream* is, std::size_t n);
|
||||
void loadbin(std::istream* is, std::size_t n);
|
||||
void load(std::istream* is);
|
||||
void load(const std::string &file);
|
||||
|
||||
|
@ -100,10 +100,10 @@ public:
|
||||
|
||||
std::string getFeatureName(std::size_t idx) const {
|
||||
if (idx >= m_index_to_feature_name.size())
|
||||
throw runtime_error("Error: you required an too big index");
|
||||
throw std::runtime_error("Error: you required an too big index");
|
||||
std::map<std::size_t, std::string>::const_iterator it = m_index_to_feature_name.find(idx);
|
||||
if (it == m_index_to_feature_name.end()) {
|
||||
throw runtime_error("Error: specified id is unknown: " + idx);
|
||||
throw std::runtime_error("Error: specified id is unknown: " + idx);
|
||||
} else {
|
||||
return it->second;
|
||||
}
|
||||
@ -113,8 +113,9 @@ public:
|
||||
std::map<std::string, std::size_t>::const_iterator it = m_feature_name_to_index.find(name);
|
||||
if (it == m_feature_name_to_index.end()) {
|
||||
std::string msg = "Error: feature " + name + " is unknown. Known features: ";
|
||||
for (std::map<std::string, std::size_t>::const_iterator it = m_feature_name_to_index.begin(); it != m_feature_name_to_index.end(); it++) {
|
||||
msg += it->first;
|
||||
for (std::map<std::string, std::size_t>::const_iterator cit = m_feature_name_to_index.begin();
|
||||
cit != m_feature_name_to_index.end(); cit++) {
|
||||
msg += cit->first;
|
||||
msg += ", ";
|
||||
}
|
||||
|
||||
|
@ -12,7 +12,7 @@ void CheckFeatureMap(const FeatureData* feature_data,
|
||||
for (int i = 0; i < num_feature; ++i) {
|
||||
std::stringstream ss;
|
||||
ss << str << "_" << i;
|
||||
const string& s = ss.str();
|
||||
const std::string& s = ss.str();
|
||||
BOOST_CHECK_EQUAL(feature_data->getFeatureIndex(s), *cnt);
|
||||
BOOST_CHECK_EQUAL(feature_data->getFeatureName(*cnt).c_str(), s);
|
||||
++(*cnt);
|
||||
|
@ -12,6 +12,8 @@
|
||||
#include <cmath>
|
||||
#include "Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace {
|
||||
const int kAvailableSize = 8;
|
||||
} // namespace
|
||||
|
@ -16,20 +16,18 @@
|
||||
#include <vector>
|
||||
#include "Types.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
// Minimal sparse vector
|
||||
class SparseVector {
|
||||
public:
|
||||
typedef std::map<size_t,FeatureStatsType> fvector_t;
|
||||
typedef std::map<std::string, size_t> name2id_t;
|
||||
typedef std::map<std::size_t,FeatureStatsType> fvector_t;
|
||||
typedef std::map<std::string, std::size_t> name2id_t;
|
||||
typedef std::vector<std::string> id2name_t;
|
||||
|
||||
FeatureStatsType get(const std::string& name) const;
|
||||
FeatureStatsType get(size_t id) const;
|
||||
FeatureStatsType get(std::size_t id) const;
|
||||
void set(const std::string& name, FeatureStatsType value);
|
||||
void clear();
|
||||
size_t size() const { return m_fvector.size(); }
|
||||
std::size_t size() const { return m_fvector.size(); }
|
||||
|
||||
void write(std::ostream& out, const std::string& sep = " ") const;
|
||||
|
||||
@ -46,8 +44,8 @@ SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
|
||||
class FeatureStats
|
||||
{
|
||||
private:
|
||||
size_t m_available_size;
|
||||
size_t m_entries;
|
||||
std::size_t m_available_size;
|
||||
std::size_t m_entries;
|
||||
|
||||
// TODO: Use smart pointer for exceptional-safety.
|
||||
featstats_t m_array;
|
||||
@ -55,7 +53,7 @@ private:
|
||||
|
||||
public:
|
||||
FeatureStats();
|
||||
explicit FeatureStats(const size_t size);
|
||||
explicit FeatureStats(const std::size_t size);
|
||||
explicit FeatureStats(std::string &theString);
|
||||
|
||||
~FeatureStats();
|
||||
@ -69,7 +67,7 @@ public:
|
||||
bool isfull() const { return (m_entries < m_available_size) ? 0 : 1; }
|
||||
void expand();
|
||||
void add(FeatureStatsType v);
|
||||
void addSparse(const string& name, FeatureStatsType v);
|
||||
void addSparse(const std::string& name, FeatureStatsType v);
|
||||
|
||||
void clear() {
|
||||
memset((void*)m_array, 0, GetArraySizeWithBytes());
|
||||
@ -81,23 +79,23 @@ public:
|
||||
clear();
|
||||
}
|
||||
|
||||
FeatureStatsType get(size_t i) { return m_array[i]; }
|
||||
FeatureStatsType get(size_t i)const { return m_array[i]; }
|
||||
FeatureStatsType get(std::size_t i) { return m_array[i]; }
|
||||
FeatureStatsType get(std::size_t i)const { return m_array[i]; }
|
||||
featstats_t getArray() const { return m_array; }
|
||||
|
||||
const SparseVector& getSparse() const { return m_map; }
|
||||
|
||||
void set(std::string &theString);
|
||||
|
||||
inline size_t bytes() const { return GetArraySizeWithBytes(); }
|
||||
inline std::size_t bytes() const { return GetArraySizeWithBytes(); }
|
||||
|
||||
size_t GetArraySizeWithBytes() const {
|
||||
std::size_t GetArraySizeWithBytes() const {
|
||||
return m_entries * sizeof(FeatureStatsType);
|
||||
}
|
||||
|
||||
size_t size() const { return m_entries; }
|
||||
std::size_t size() const { return m_entries; }
|
||||
|
||||
size_t available() const { return m_available_size; }
|
||||
std::size_t available() const { return m_available_size; }
|
||||
|
||||
void savetxt(const std::string &file);
|
||||
void savetxt(std::ostream* os);
|
||||
@ -111,7 +109,7 @@ public:
|
||||
/**
|
||||
* Write the whole object to a stream.
|
||||
*/
|
||||
friend ostream& operator<<(ostream& o, const FeatureStats& e);
|
||||
friend std::ostream& operator<<(std::ostream& o, const FeatureStats& e);
|
||||
};
|
||||
|
||||
//ADEED_BY_TS
|
||||
|
@ -15,17 +15,17 @@ class InterpolatedScorer : public Scorer
|
||||
{
|
||||
public:
|
||||
// name would be: "HAMMING,BLEU" or similar
|
||||
InterpolatedScorer(const string& name, const string& config);
|
||||
InterpolatedScorer(const std::string& name, const std::string& config);
|
||||
virtual ~InterpolatedScorer() {}
|
||||
|
||||
virtual void score(const candidates_t& candidates, const diffs_t& diffs,
|
||||
statscores_t& scores) const;
|
||||
|
||||
virtual void setReferenceFiles(const vector<string>& referenceFiles);
|
||||
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
|
||||
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
|
||||
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
|
||||
|
||||
virtual size_t NumberOfScores() const {
|
||||
size_t sz = 0;
|
||||
virtual std::size_t NumberOfScores() const {
|
||||
std::size_t sz = 0;
|
||||
for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
|
||||
itsc != m_scorers.end(); ++itsc) {
|
||||
sz += (*itsc)->NumberOfScores();
|
||||
@ -38,9 +38,9 @@ public:
|
||||
/**
|
||||
* Set the factors, which should be used for this metric
|
||||
*/
|
||||
virtual void setFactors(const string& factors);
|
||||
virtual void setFactors(const std::string& factors);
|
||||
|
||||
virtual void setFilter(const string& filterCommand);
|
||||
virtual void setFilter(const std::string& filterCommand);
|
||||
|
||||
protected:
|
||||
ScopedVector<Scorer> m_scorers;
|
||||
@ -49,7 +49,7 @@ protected:
|
||||
// by Scorer objects.
|
||||
ScopedVector<ScoreData> m_scorers_score_data;
|
||||
|
||||
vector<float> m_scorer_weights;
|
||||
std::vector<float> m_scorer_weights;
|
||||
};
|
||||
|
||||
#endif // MERT_INTERPOLATED_SCORER_H_
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "TER/tercalc.h"
|
||||
#include "TER/terAlignment.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace TERCpp;
|
||||
|
||||
MergeScorer::MergeScorer(const string& config)
|
||||
|
@ -1,15 +1,11 @@
|
||||
#ifndef MERT_MERGE_SCORER_H_
|
||||
#define MERT_MERGE_SCORER_H_
|
||||
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Scorer.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
class PerScorer;
|
||||
class ScoreStats;
|
||||
|
||||
@ -20,16 +16,16 @@ const int kMergeScorerLength = 4;
|
||||
*/
|
||||
class MergeScorer: public StatisticsBasedScorer {
|
||||
public:
|
||||
explicit MergeScorer(const string& config = "");
|
||||
explicit MergeScorer(const std::string& config = "");
|
||||
~MergeScorer();
|
||||
|
||||
virtual void setReferenceFiles(const vector<string>& referenceFiles);
|
||||
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
|
||||
virtual size_t NumberOfScores() const { return 0; }
|
||||
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
|
||||
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
|
||||
virtual std::size_t NumberOfScores() const { return 0; }
|
||||
|
||||
protected:
|
||||
friend class PerScorer;
|
||||
virtual float calculateScore(const vector<int>& comps) const;
|
||||
virtual float calculateScore(const std::vector<int>& comps) const;
|
||||
|
||||
private:
|
||||
// no copying allowed
|
||||
|
@ -17,7 +17,7 @@ BOOST_AUTO_TEST_CASE(ngram_basic) {
|
||||
NgramCounts::const_iterator it = counts.find(key);
|
||||
BOOST_CHECK(it != counts.end());
|
||||
BOOST_CHECK_EQUAL(it->first.size(), key.size());
|
||||
for (size_t i = 0; i < key.size(); ++i) {
|
||||
for (std::size_t i = 0; i < key.size(); ++i) {
|
||||
BOOST_CHECK_EQUAL(it->first[i], key[i]);
|
||||
}
|
||||
BOOST_CHECK_EQUAL(it->second, 1);
|
||||
|
@ -8,9 +8,7 @@
|
||||
#include "Scorer.h"
|
||||
#include "Types.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
static const float kMaxFloat = numeric_limits<float>::max();
|
||||
static const float kMaxFloat = std::numeric_limits<float>::max();
|
||||
|
||||
class Point;
|
||||
|
||||
@ -24,10 +22,10 @@ protected:
|
||||
FeatureDataHandle m_feature_data; // no accessor for them only child can use them
|
||||
unsigned int m_num_random_directions;
|
||||
|
||||
const vector<bool>& m_positive;
|
||||
const std::vector<bool>& m_positive;
|
||||
|
||||
public:
|
||||
Optimizer(unsigned Pd, const vector<unsigned>& i2O, const vector<bool>& positive, const vector<parameter_t>& start, unsigned int nrandom);
|
||||
Optimizer(unsigned Pd, const std::vector<unsigned>& i2O, const std::vector<bool>& positive, const std::vector<parameter_t>& start, unsigned int nrandom);
|
||||
|
||||
void SetScorer(Scorer *scorer) { m_scorer = scorer; }
|
||||
void SetFeatureData(FeatureDataHandle feature_data) { m_feature_data = feature_data; }
|
||||
@ -50,18 +48,18 @@ public:
|
||||
/**
|
||||
* Given a set of lambdas, get the nbest for each sentence.
|
||||
*/
|
||||
void Get1bests(const Point& param,vector<unsigned>& bests) const;
|
||||
void Get1bests(const Point& param,std::vector<unsigned>& bests) const;
|
||||
|
||||
/**
|
||||
* Given a set of nbests, get the Statistical score.
|
||||
*/
|
||||
statscore_t GetStatScore(const vector<unsigned>& nbests) const {
|
||||
statscore_t GetStatScore(const std::vector<unsigned>& nbests) const {
|
||||
return m_scorer->score(nbests);
|
||||
}
|
||||
|
||||
statscore_t GetStatScore(const Point& param) const;
|
||||
|
||||
vector<statscore_t> GetIncStatScore(const vector<unsigned>& ref, const vector<vector<pair<unsigned,unsigned> > >& diffs) const;
|
||||
std::vector<statscore_t> GetIncStatScore(const std::vector<unsigned>& ref, const std::vector<std::vector<std::pair<unsigned,unsigned> > >& diffs) const;
|
||||
|
||||
/**
|
||||
* Get the optimal Lambda and the best score in a particular direction from a given Point.
|
||||
@ -79,9 +77,9 @@ class SimpleOptimizer : public Optimizer
|
||||
private:
|
||||
const float kEPS;
|
||||
public:
|
||||
SimpleOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<bool>& positive,
|
||||
const vector<parameter_t>& start, unsigned int nrandom)
|
||||
: Optimizer(dim, i2O, positive, start,nrandom), kEPS(0.0001) {}
|
||||
SimpleOptimizer(unsigned dim, const std::vector<unsigned>& i2O, const std::vector<bool>& positive,
|
||||
const std::vector<parameter_t>& start, unsigned int nrandom)
|
||||
: Optimizer(dim, i2O, positive, start,nrandom), kEPS(0.0001f) {}
|
||||
virtual statscore_t TrueRun(Point&) const;
|
||||
};
|
||||
|
||||
@ -93,9 +91,9 @@ class RandomDirectionOptimizer : public Optimizer
|
||||
private:
|
||||
const float kEPS;
|
||||
public:
|
||||
RandomDirectionOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<bool>& positive,
|
||||
const vector<parameter_t>& start, unsigned int nrandom)
|
||||
: Optimizer(dim, i2O, positive, start, nrandom), kEPS(0.0001) {}
|
||||
RandomDirectionOptimizer(unsigned dim, const std::vector<unsigned>& i2O, const std::vector<bool>& positive,
|
||||
const std::vector<parameter_t>& start, unsigned int nrandom)
|
||||
: Optimizer(dim, i2O, positive, start, nrandom), kEPS(0.0001f) {}
|
||||
virtual statscore_t TrueRun(Point&) const;
|
||||
};
|
||||
|
||||
@ -105,8 +103,8 @@ public:
|
||||
class RandomOptimizer : public Optimizer
|
||||
{
|
||||
public:
|
||||
RandomOptimizer(unsigned dim, const vector<unsigned>& i2O, const vector<bool>& positive,
|
||||
const vector<parameter_t>& start, unsigned int nrandom)
|
||||
RandomOptimizer(unsigned dim, const std::vector<unsigned>& i2O, const std::vector<bool>& positive,
|
||||
const std::vector<parameter_t>& start, unsigned int nrandom)
|
||||
: Optimizer(dim, i2O, positive, start, nrandom) {}
|
||||
virtual statscore_t TrueRun(Point&) const;
|
||||
};
|
||||
|
@ -17,7 +17,7 @@ class OptimizerFactory
|
||||
NOPTIMIZER
|
||||
};
|
||||
|
||||
static std::vector<string> GetTypeNames();
|
||||
static std::vector<std::string> GetTypeNames();
|
||||
|
||||
// Setup optimization types.
|
||||
static void SetTypeNames();
|
||||
@ -27,7 +27,7 @@ class OptimizerFactory
|
||||
|
||||
static Optimizer* BuildOptimizer(unsigned dim,
|
||||
const std::vector<unsigned>& to_optimize,
|
||||
const std::vector<bool>& positive,
|
||||
const std::vector<bool>& positive,
|
||||
const std::vector<parameter_t>& start,
|
||||
const std::string& type,
|
||||
unsigned int nrandom);
|
||||
@ -36,7 +36,7 @@ class OptimizerFactory
|
||||
OptimizerFactory() {}
|
||||
~OptimizerFactory() {}
|
||||
|
||||
static vector<string> m_type_names;
|
||||
static std::vector<std::string> m_type_names;
|
||||
};
|
||||
|
||||
#endif // MERT_OPTIMIZER_FACTORY_H_
|
||||
|
@ -8,10 +8,10 @@
|
||||
namespace {
|
||||
|
||||
inline bool CheckBuildOptimizer(unsigned dim,
|
||||
const vector<unsigned>& to_optimize,
|
||||
const vector<bool>& positive,
|
||||
const vector<parameter_t>& start,
|
||||
const string& type,
|
||||
const std::vector<unsigned>& to_optimize,
|
||||
const std::vector<bool>& positive,
|
||||
const std::vector<parameter_t>& start,
|
||||
const std::string& type,
|
||||
unsigned int num_random) {
|
||||
boost::scoped_ptr<Optimizer> optimizer(OptimizerFactory::BuildOptimizer(dim, to_optimize, positive, start, type, num_random));
|
||||
return optimizer.get() != NULL;
|
||||
|
@ -7,8 +7,6 @@
|
||||
#include "Types.h"
|
||||
#include "Scorer.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
class ScoreStats;
|
||||
|
||||
/**
|
||||
@ -20,13 +18,13 @@ class ScoreStats;
|
||||
class PerScorer: public StatisticsBasedScorer
|
||||
{
|
||||
public:
|
||||
explicit PerScorer(const string& config = "");
|
||||
explicit PerScorer(const std::string& config = "");
|
||||
~PerScorer();
|
||||
|
||||
virtual void setReferenceFiles(const vector<string>& referenceFiles);
|
||||
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
|
||||
virtual size_t NumberOfScores() const { return 3; }
|
||||
virtual float calculateScore(const vector<int>& comps) const;
|
||||
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
|
||||
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
|
||||
virtual std::size_t NumberOfScores() const { return 3; }
|
||||
virtual float calculateScore(const std::vector<int>& comps) const;
|
||||
|
||||
private:
|
||||
// no copying allowed
|
||||
@ -34,8 +32,8 @@ private:
|
||||
PerScorer& operator=(const PerScorer&);
|
||||
|
||||
// data extracted from reference files
|
||||
vector<size_t> m_ref_lengths;
|
||||
vector<multiset<int> > m_ref_tokens;
|
||||
std::vector<std::size_t> m_ref_lengths;
|
||||
std::vector<std::multiset<int> > m_ref_tokens;
|
||||
};
|
||||
|
||||
#endif // MERT_PER_SCORER_H_
|
||||
|
28
mert/Point.h
28
mert/Point.h
@ -13,7 +13,7 @@ class Optimizer;
|
||||
* A class that handles the subset of the Feature weight on which
|
||||
* we run the optimization.
|
||||
*/
|
||||
class Point : public vector<parameter_t>
|
||||
class Point : public std::vector<parameter_t>
|
||||
{
|
||||
friend class Optimizer;
|
||||
|
||||
@ -21,7 +21,7 @@ private:
|
||||
/**
|
||||
* The indices over which we optimize.
|
||||
*/
|
||||
static vector<unsigned int> m_opt_indices;
|
||||
static std::vector<unsigned int> m_opt_indices;
|
||||
|
||||
/**
|
||||
* Dimension of m_opt_indices and of the parent vector.
|
||||
@ -31,7 +31,7 @@ private:
|
||||
/**
|
||||
* Fixed weights in case of partial optimzation.
|
||||
*/
|
||||
static map<unsigned int,parameter_t> m_fixed_weights;
|
||||
static std::map<unsigned int,parameter_t> m_fixed_weights;
|
||||
|
||||
/**
|
||||
* Total size of the parameter space; we have
|
||||
@ -43,23 +43,23 @@ private:
|
||||
/**
|
||||
* The limits for randomization, both vectors are of full length, m_pdim.
|
||||
*/
|
||||
static vector<parameter_t> m_min;
|
||||
static vector<parameter_t> m_max;
|
||||
static std::vector<parameter_t> m_min;
|
||||
static std::vector<parameter_t> m_max;
|
||||
|
||||
statscore_t m_score;
|
||||
|
||||
public:
|
||||
static unsigned int getdim() { return m_dim; }
|
||||
static void setdim(size_t d) { m_dim = d; }
|
||||
static void setdim(std::size_t d) { m_dim = d; }
|
||||
|
||||
static unsigned int getpdim() { return m_pdim; }
|
||||
static void setpdim(size_t pd) { m_pdim = pd; }
|
||||
static void setpdim(std::size_t pd) { m_pdim = pd; }
|
||||
|
||||
static void set_optindices(const vector<unsigned int>& indices) {
|
||||
static void set_optindices(const std::vector<unsigned int>& indices) {
|
||||
m_opt_indices = indices;
|
||||
}
|
||||
|
||||
static const vector<unsigned int>& get_optindices() {
|
||||
static const std::vector<unsigned int>& get_optindices() {
|
||||
return m_opt_indices;
|
||||
}
|
||||
|
||||
@ -68,9 +68,9 @@ public:
|
||||
}
|
||||
|
||||
Point();
|
||||
Point(const vector<parameter_t>& init,
|
||||
const vector<parameter_t>& min,
|
||||
const vector<parameter_t>& max);
|
||||
Point(const std::vector<parameter_t>& init,
|
||||
const std::vector<parameter_t>& min,
|
||||
const std::vector<parameter_t>& max);
|
||||
~Point();
|
||||
|
||||
void Randomize();
|
||||
@ -84,7 +84,7 @@ public:
|
||||
/**
|
||||
* Write the Whole featureweight to a stream (ie m_pdim float).
|
||||
*/
|
||||
friend ostream& operator<<(ostream& o,const Point& P);
|
||||
friend std::ostream& operator<<(std::ostream& o,const Point& P);
|
||||
|
||||
void Normalize() { NormalizeL2(); }
|
||||
void NormalizeL2();
|
||||
@ -94,7 +94,7 @@ public:
|
||||
* Return a vector of size m_pdim where all weights have been
|
||||
* put (including fixed ones).
|
||||
*/
|
||||
void GetAllWeights(vector<parameter_t>& w) const;
|
||||
void GetAllWeights(std::vector<parameter_t>& w) const;
|
||||
|
||||
statscore_t GetScore() const { return m_score; }
|
||||
void SetScore(statscore_t score) { m_score = score; }
|
||||
|
@ -5,6 +5,8 @@
|
||||
#include <unistd.h>
|
||||
#include <csignal>
|
||||
|
||||
#include "Fdstream.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define CHILD_STDIN_READ pipefds_input[0]
|
||||
|
@ -3,7 +3,8 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "Fdstream.h"
|
||||
class ofdstream;
|
||||
class ifdstream;
|
||||
|
||||
/*
|
||||
* This class runs the filter command in a child process and
|
||||
@ -12,13 +13,13 @@
|
||||
class PreProcessFilter
|
||||
{
|
||||
public:
|
||||
PreProcessFilter(const string& filterCommand);
|
||||
string ProcessSentence(const string& sentence);
|
||||
~PreProcessFilter();
|
||||
explicit PreProcessFilter(const std::string& filterCommand);
|
||||
std::string ProcessSentence(const std::string& sentence);
|
||||
~PreProcessFilter();
|
||||
|
||||
private:
|
||||
ofdstream* m_toFilter;
|
||||
ifdstream* m_fromFilter;
|
||||
ifdstream* m_fromFilter;
|
||||
};
|
||||
|
||||
#endif // MERT_PREPROCESSFILTER_H_
|
||||
|
@ -14,8 +14,8 @@
|
||||
class Reference {
|
||||
public:
|
||||
// for m_length
|
||||
typedef std::vector<size_t>::iterator iterator;
|
||||
typedef std::vector<size_t>::const_iterator const_iterator;
|
||||
typedef std::vector<std::size_t>::iterator iterator;
|
||||
typedef std::vector<std::size_t>::const_iterator const_iterator;
|
||||
|
||||
Reference() : m_counts(new NgramCounts) { }
|
||||
~Reference() { delete m_counts; }
|
||||
@ -28,36 +28,36 @@ class Reference {
|
||||
iterator end() { return m_length.end(); }
|
||||
const_iterator end() const { return m_length.end(); }
|
||||
|
||||
void push_back(size_t len) { m_length.push_back(len); }
|
||||
void push_back(std::size_t len) { m_length.push_back(len); }
|
||||
|
||||
size_t num_references() const { return m_length.size(); }
|
||||
std::size_t num_references() const { return m_length.size(); }
|
||||
|
||||
int CalcAverage() const;
|
||||
int CalcClosest(size_t length) const;
|
||||
int CalcClosest(std::size_t length) const;
|
||||
int CalcShortest() const;
|
||||
|
||||
private:
|
||||
NgramCounts* m_counts;
|
||||
|
||||
// multiple reference lengths
|
||||
std::vector<size_t> m_length;
|
||||
std::vector<std::size_t> m_length;
|
||||
};
|
||||
|
||||
// TODO(tetsuok): fix this function and related stuff.
|
||||
// "average" reference length should not be calculated at sentence-level unlike "closest".
|
||||
inline int Reference::CalcAverage() const {
|
||||
int total = 0;
|
||||
for (size_t i = 0; i < m_length.size(); ++i) {
|
||||
for (std::size_t i = 0; i < m_length.size(); ++i) {
|
||||
total += m_length[i];
|
||||
}
|
||||
return static_cast<int>(
|
||||
static_cast<float>(total) / m_length.size());
|
||||
}
|
||||
|
||||
inline int Reference::CalcClosest(size_t length) const {
|
||||
inline int Reference::CalcClosest(std::size_t length) const {
|
||||
int min_diff = INT_MAX;
|
||||
int closest_ref_id = 0; // an index of the closest reference translation
|
||||
for (size_t i = 0; i < m_length.size(); ++i) {
|
||||
for (std::size_t i = 0; i < m_length.size(); ++i) {
|
||||
const int ref_length = m_length[i];
|
||||
const int length_diff = abs(ref_length - static_cast<int>(length));
|
||||
const int abs_min_diff = abs(min_diff);
|
||||
|
@ -23,10 +23,10 @@ class ScopedVector {
|
||||
m_vec.clear();
|
||||
}
|
||||
|
||||
void reserve(size_t capacity) { m_vec.reserve(capacity); }
|
||||
void resize(size_t size) { m_vec.resize(size); }
|
||||
void reserve(std::size_t capacity) { m_vec.reserve(capacity); }
|
||||
void resize(std::size_t size) { m_vec.resize(size); }
|
||||
|
||||
size_t size() const {return m_vec.size(); }
|
||||
std::size_t size() const {return m_vec.size(); }
|
||||
|
||||
iterator begin() { return m_vec.begin(); }
|
||||
const_iterator begin() const { return m_vec.begin(); }
|
||||
@ -40,8 +40,8 @@ class ScopedVector {
|
||||
std::vector<T*>* operator->() { return &m_vec; }
|
||||
const std::vector<T*>* operator->() const { return &m_vec; }
|
||||
|
||||
T*& operator[](size_t i) { return m_vec[i]; }
|
||||
const T* operator[](size_t i) const { return m_vec[i]; }
|
||||
T*& operator[](std::size_t i) { return m_vec[i]; }
|
||||
const T* operator[](std::size_t i) const { return m_vec[i]; }
|
||||
|
||||
private:
|
||||
std::vector<T*> m_vec;
|
||||
|
@ -8,11 +8,14 @@
|
||||
|
||||
#include "ScoreData.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include "Scorer.h"
|
||||
#include "Util.h"
|
||||
#include "FileStream.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
ScoreData::ScoreData(Scorer* scorer) :
|
||||
m_scorer(scorer)
|
||||
{
|
||||
|
@ -9,15 +9,13 @@
|
||||
#ifndef MERT_SCORE_DATA_H_
|
||||
#define MERT_SCORE_DATA_H_
|
||||
|
||||
#include <iostream>
|
||||
#include <iosfwd>
|
||||
#include <vector>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include "ScoreArray.h"
|
||||
#include "ScoreStats.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
class Scorer;
|
||||
|
||||
class ScoreData
|
||||
@ -32,7 +30,7 @@ private:
|
||||
|
||||
Scorer* m_scorer;
|
||||
std::string m_score_type;
|
||||
size_t m_num_scores;
|
||||
std::size_t m_num_scores;
|
||||
|
||||
public:
|
||||
ScoreData(Scorer* scorer);
|
||||
@ -44,11 +42,11 @@ public:
|
||||
return m_array.at(getIndex(idx));
|
||||
}
|
||||
|
||||
inline ScoreArray& get(size_t idx) {
|
||||
inline ScoreArray& get(std::size_t idx) {
|
||||
return m_array.at(idx);
|
||||
}
|
||||
|
||||
inline const ScoreArray& get(size_t idx) const {
|
||||
inline const ScoreArray& get(std::size_t idx) const {
|
||||
return m_array.at(idx);
|
||||
}
|
||||
|
||||
@ -60,11 +58,11 @@ public:
|
||||
return (sent_idx > -1 && sent_idx < static_cast<int>(m_array.size())) ? true : false;
|
||||
}
|
||||
|
||||
inline ScoreStats& get(size_t i, size_t j) {
|
||||
inline ScoreStats& get(std::size_t i, std::size_t j) {
|
||||
return m_array.at(i).get(j);
|
||||
}
|
||||
|
||||
inline const ScoreStats& get(size_t i, size_t j) const {
|
||||
inline const ScoreStats& get(std::size_t i, std::size_t j) const {
|
||||
return m_array.at(i).get(j);
|
||||
}
|
||||
|
||||
@ -77,8 +75,8 @@ public:
|
||||
void add(ScoreArray& e);
|
||||
void add(const ScoreStats& e, const std::string& sent_idx);
|
||||
|
||||
size_t NumberOfScores() const { return m_num_scores; }
|
||||
size_t size() const { return m_array.size(); }
|
||||
std::size_t NumberOfScores() const { return m_num_scores; }
|
||||
std::size_t size() const { return m_array.size(); }
|
||||
|
||||
void save(const std::string &file, bool bin=false);
|
||||
void save(std::ostream* os, bool bin=false);
|
||||
@ -99,10 +97,10 @@ public:
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline std::string getIndex(size_t idx) const {
|
||||
inline std::string getIndex(std::size_t idx) const {
|
||||
idx2name::const_iterator i = m_index_to_array_name.find(idx);
|
||||
if (i != m_index_to_array_name.end())
|
||||
throw runtime_error("there is no entry at index " + idx);
|
||||
throw std::runtime_error("there is no entry at index " + idx);
|
||||
return i->second;
|
||||
}
|
||||
};
|
||||
|
@ -8,6 +8,10 @@
|
||||
|
||||
#include "Util.h"
|
||||
#include "ScoreStats.h"
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace {
|
||||
const int kAvailableSize = 8;
|
||||
|
@ -10,27 +10,24 @@
|
||||
#define MERT_SCORE_STATS_H_
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iosfwd>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
class ScoreStats
|
||||
{
|
||||
private:
|
||||
size_t m_available_size;
|
||||
size_t m_entries;
|
||||
std::size_t m_available_size;
|
||||
std::size_t m_entries;
|
||||
|
||||
// TODO: Use smart pointer for exceptional-safety.
|
||||
scorestats_t m_array;
|
||||
|
||||
public:
|
||||
ScoreStats();
|
||||
explicit ScoreStats(const size_t size);
|
||||
explicit ScoreStats(const std::size_t size);
|
||||
|
||||
~ScoreStats();
|
||||
|
||||
@ -46,7 +43,7 @@ public:
|
||||
void add(ScoreStatsType v);
|
||||
|
||||
void clear() {
|
||||
memset((void*)m_array, 0, GetArraySizeWithBytes());
|
||||
std::memset((void*)m_array, 0, GetArraySizeWithBytes());
|
||||
}
|
||||
|
||||
void reset() {
|
||||
@ -54,8 +51,8 @@ public:
|
||||
clear();
|
||||
}
|
||||
|
||||
ScoreStatsType get(size_t i) { return m_array[i]; }
|
||||
ScoreStatsType get(size_t i) const { return m_array[i]; }
|
||||
ScoreStatsType get(std::size_t i) { return m_array[i]; }
|
||||
ScoreStatsType get(std::size_t i) const { return m_array[i]; }
|
||||
scorestats_t getArray() const { return m_array; }
|
||||
|
||||
void set(const std::string& str);
|
||||
@ -63,34 +60,34 @@ public:
|
||||
// Much more efficient than the above.
|
||||
void set(const std::vector<ScoreStatsType>& stats) {
|
||||
reset();
|
||||
for (size_t i = 0; i < stats.size(); ++i) {
|
||||
for (std::size_t i = 0; i < stats.size(); ++i) {
|
||||
add(stats[i]);
|
||||
}
|
||||
}
|
||||
|
||||
size_t bytes() const { return GetArraySizeWithBytes(); }
|
||||
std::size_t bytes() const { return GetArraySizeWithBytes(); }
|
||||
|
||||
size_t GetArraySizeWithBytes() const {
|
||||
std::size_t GetArraySizeWithBytes() const {
|
||||
return m_entries * sizeof(ScoreStatsType);
|
||||
}
|
||||
|
||||
size_t size() const { return m_entries; }
|
||||
std::size_t size() const { return m_entries; }
|
||||
|
||||
size_t available() const { return m_available_size; }
|
||||
std::size_t available() const { return m_available_size; }
|
||||
|
||||
void savetxt(const std::string &file);
|
||||
void savetxt(ostream* os);
|
||||
void savebin(ostream* os);
|
||||
void savetxt(std::ostream* os);
|
||||
void savebin(std::ostream* os);
|
||||
void savetxt();
|
||||
|
||||
void loadtxt(const std::string &file);
|
||||
void loadtxt(istream* is);
|
||||
void loadbin(istream* is);
|
||||
void loadtxt(std::istream* is);
|
||||
void loadbin(std::istream* is);
|
||||
|
||||
/**
|
||||
* Write the whole object to a stream.
|
||||
*/
|
||||
friend ostream& operator<<(ostream& o, const ScoreStats& e);
|
||||
friend std::ostream& operator<<(std::ostream& o, const ScoreStats& e);
|
||||
};
|
||||
|
||||
//ADDED_BY_TS
|
||||
|
@ -4,6 +4,9 @@
|
||||
#include "Vocabulary.h"
|
||||
#include "Util.h"
|
||||
#include "Singleton.h"
|
||||
#include "PreProcessFilter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace {
|
||||
|
||||
@ -38,9 +41,9 @@ inline float score_average(const statscores_t& scores, size_t start, size_t end)
|
||||
Scorer::Scorer(const string& name, const string& config)
|
||||
: m_name(name),
|
||||
m_vocab(mert::VocabularyFactory::GetVocabulary()),
|
||||
m_score_data(0),
|
||||
m_enable_preserve_case(true),
|
||||
m_filter(NULL) {
|
||||
m_filter(NULL),
|
||||
m_score_data(NULL),
|
||||
m_enable_preserve_case(true) {
|
||||
InitConfig(config);
|
||||
}
|
||||
|
||||
|
@ -8,10 +8,8 @@
|
||||
#include <vector>
|
||||
#include "Types.h"
|
||||
#include "ScoreData.h"
|
||||
#include "PreProcessFilter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
class PreProcessFilter;
|
||||
class ScoreStats;
|
||||
|
||||
namespace mert {
|
||||
@ -29,18 +27,18 @@ class Vocabulary;
|
||||
class Scorer
|
||||
{
|
||||
public:
|
||||
Scorer(const string& name, const string& config);
|
||||
Scorer(const std::string& name, const std::string& config);
|
||||
virtual ~Scorer();
|
||||
|
||||
/**
|
||||
* Return the number of statistics needed for the computation of the score.
|
||||
*/
|
||||
virtual size_t NumberOfScores() const = 0;
|
||||
virtual std::size_t NumberOfScores() const = 0;
|
||||
|
||||
/**
|
||||
* Set the reference files. This must be called before prepareStats().
|
||||
*/
|
||||
virtual void setReferenceFiles(const vector<string>& referenceFiles) {
|
||||
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles) {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
@ -48,12 +46,12 @@ class Scorer
|
||||
* Process the given guessed text, corresponding to the given reference sindex
|
||||
* and add the appropriate statistics to the entry.
|
||||
*/
|
||||
virtual void prepareStats(size_t sindex, const string& text, ScoreStats& entry) {
|
||||
virtual void prepareStats(std::size_t sindex, const std::string& text, ScoreStats& entry) {
|
||||
// do nothing.
|
||||
}
|
||||
|
||||
virtual void prepareStats(const string& sindex, const string& text, ScoreStats& entry) {
|
||||
this->prepareStats(static_cast<size_t>(atoi(sindex.c_str())), text, entry);
|
||||
virtual void prepareStats(const std::string& sindex, const std::string& text, ScoreStats& entry) {
|
||||
this->prepareStats(static_cast<std::size_t>(atoi(sindex.c_str())), text, entry);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -69,7 +67,7 @@ class Scorer
|
||||
throw runtime_error("score data not loaded");
|
||||
}
|
||||
scores.push_back(0);
|
||||
for (size_t i = 0; i < diffs.size(); ++i) {
|
||||
for (std::size_t i = 0; i < diffs.size(); ++i) {
|
||||
scores.push_back(0);
|
||||
}
|
||||
}
|
||||
@ -86,11 +84,11 @@ class Scorer
|
||||
return scores[0];
|
||||
}
|
||||
|
||||
const string& getName() const {
|
||||
const std::string& getName() const {
|
||||
return m_name;
|
||||
}
|
||||
|
||||
size_t getReferenceSize() const {
|
||||
std::size_t getReferenceSize() const {
|
||||
if (m_score_data) {
|
||||
return m_score_data->size();
|
||||
}
|
||||
@ -107,33 +105,33 @@ class Scorer
|
||||
/**
|
||||
* Set the factors, which should be used for this metric
|
||||
*/
|
||||
virtual void setFactors(const string& factors);
|
||||
virtual void setFactors(const std::string& factors);
|
||||
|
||||
mert::Vocabulary* GetVocab() const { return m_vocab; }
|
||||
|
||||
/**
|
||||
* Set unix filter, which will be used to preprocess the sentences
|
||||
*/
|
||||
virtual void setFilter(const string& filterCommand);
|
||||
|
||||
virtual void setFilter(const std::string& filterCommand);
|
||||
|
||||
private:
|
||||
void InitConfig(const string& config);
|
||||
void InitConfig(const std::string& config);
|
||||
|
||||
/**
|
||||
* Take the factored sentence and return the desired factors
|
||||
*/
|
||||
string applyFactors(const string& sentece) const;
|
||||
std::string applyFactors(const std::string& sentece) const;
|
||||
|
||||
/**
|
||||
* Preprocess the sentence with the filter (if given)
|
||||
*/
|
||||
string applyFilter(const string& sentence) const;
|
||||
std::string applyFilter(const std::string& sentence) const;
|
||||
|
||||
string m_name;
|
||||
std::string m_name;
|
||||
mert::Vocabulary* m_vocab;
|
||||
map<string, string> m_config;
|
||||
vector<int> m_factors;
|
||||
PreProcessFilter* m_filter;
|
||||
std::map<std::string, std::string> m_config;
|
||||
std::vector<int> m_factors;
|
||||
PreProcessFilter* m_filter;
|
||||
|
||||
protected:
|
||||
ScoreData* m_score_data;
|
||||
@ -142,8 +140,8 @@ class Scorer
|
||||
/**
|
||||
* Get value of config variable. If not provided, return default.
|
||||
*/
|
||||
string getConfig(const string& key, const string& def="") const {
|
||||
map<string,string>::const_iterator i = m_config.find(key);
|
||||
std::string getConfig(const std::string& key, const std::string& def="") const {
|
||||
std::map<std::string,std::string>::const_iterator i = m_config.find(key);
|
||||
if (i == m_config.end()) {
|
||||
return def;
|
||||
} else {
|
||||
@ -155,12 +153,12 @@ class Scorer
|
||||
* Tokenise line and encode.
|
||||
* Note: We assume that all tokens are separated by whitespaces.
|
||||
*/
|
||||
void TokenizeAndEncode(const string& line, vector<int>& encoded);
|
||||
void TokenizeAndEncode(const std::string& line, std::vector<int>& encoded);
|
||||
|
||||
/**
|
||||
* Every inherited scorer should call this function for each sentence
|
||||
*/
|
||||
string preprocessSentence(const string& sentence) const
|
||||
std::string preprocessSentence(const std::string& sentence) const
|
||||
{
|
||||
return applyFactors(applyFilter(sentence));
|
||||
}
|
||||
@ -174,7 +172,7 @@ class Scorer
|
||||
class StatisticsBasedScorer : public Scorer
|
||||
{
|
||||
public:
|
||||
StatisticsBasedScorer(const string& name, const string& config);
|
||||
StatisticsBasedScorer(const std::string& name, const std::string& config);
|
||||
virtual ~StatisticsBasedScorer() {}
|
||||
virtual void score(const candidates_t& candidates, const diffs_t& diffs,
|
||||
statscores_t& scores) const;
|
||||
@ -184,17 +182,17 @@ class StatisticsBasedScorer : public Scorer
|
||||
enum RegularisationType {
|
||||
NONE,
|
||||
AVERAGE,
|
||||
MINIMUM,
|
||||
MINIMUM
|
||||
};
|
||||
|
||||
/**
|
||||
* Calculate the actual score.
|
||||
*/
|
||||
virtual statscore_t calculateScore(const vector<int>& totals) const = 0;
|
||||
virtual statscore_t calculateScore(const std::vector<int>& totals) const = 0;
|
||||
|
||||
// regularisation
|
||||
RegularisationType m_regularization_type;
|
||||
size_t m_regularization_window;
|
||||
std::size_t m_regularization_window;
|
||||
};
|
||||
|
||||
#endif // MERT_SCORER_H_
|
||||
|
@ -54,7 +54,7 @@ private:
|
||||
|
||||
std::map<int, float> weightsMap;
|
||||
|
||||
void loadWeights(const string& weightsfile);
|
||||
void loadWeights(const std::string& weightsfile);
|
||||
|
||||
// no copying allowed.
|
||||
SemposScorer(const SemposScorer&);
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "TER/terAlignment.h"
|
||||
#include "Util.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace TERCpp;
|
||||
|
||||
TerScorer::TerScorer(const string& config)
|
||||
|
@ -1,7 +1,6 @@
|
||||
#ifndef MERT_TER_SCORER_H_
|
||||
#define MERT_TER_SCORER_H_
|
||||
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -9,8 +8,6 @@
|
||||
#include "Types.h"
|
||||
#include "Scorer.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
class ScoreStats;
|
||||
|
||||
/**
|
||||
@ -19,35 +16,31 @@ class ScoreStats;
|
||||
class TerScorer: public StatisticsBasedScorer
|
||||
{
|
||||
public:
|
||||
explicit TerScorer(const string& config = "");
|
||||
explicit TerScorer(const std::string& config = "");
|
||||
~TerScorer();
|
||||
|
||||
virtual void setReferenceFiles(const vector<string>& referenceFiles);
|
||||
virtual void prepareStats(size_t sid, const string& text, ScoreStats& entry);
|
||||
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
|
||||
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
|
||||
|
||||
virtual size_t NumberOfScores() const {
|
||||
virtual std::size_t NumberOfScores() const {
|
||||
// cerr << "TerScorer: " << (LENGTH + 1) << endl;
|
||||
return kLENGTH + 1;
|
||||
}
|
||||
|
||||
virtual float calculateScore(const vector<int>& comps) const;
|
||||
|
||||
void whoami() const {
|
||||
cerr << "I AM TerScorer" << std::endl;
|
||||
}
|
||||
virtual float calculateScore(const std::vector<int>& comps) const;
|
||||
|
||||
private:
|
||||
const int kLENGTH;
|
||||
|
||||
string m_java_env;
|
||||
string m_ter_com_env;
|
||||
std::string m_java_env;
|
||||
std::string m_ter_com_env;
|
||||
|
||||
// data extracted from reference files
|
||||
vector<size_t> m_ref_lengths;
|
||||
vector<multiset<int> > m_ref_tokens;
|
||||
vector<vector<int> > m_references;
|
||||
vector<vector<vector<int> > > m_multi_references;
|
||||
string m_pid;
|
||||
std::vector<std::size_t> m_ref_lengths;
|
||||
std::vector<std::multiset<int> > m_ref_tokens;
|
||||
std::vector<std::vector<int> > m_references;
|
||||
std::vector<std::vector<std::vector<int> > > m_multi_references;
|
||||
std::string m_pid;
|
||||
|
||||
// no copying allowed
|
||||
TerScorer(const TerScorer&);
|
||||
|
31
mert/Types.h
31
mert/Types.h
@ -4,8 +4,7 @@
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
#include <utility>
|
||||
|
||||
class FeatureStats;
|
||||
class FeatureArray;
|
||||
@ -15,29 +14,29 @@ class ScoreArray;
|
||||
class ScoreData;
|
||||
|
||||
typedef float parameter_t;
|
||||
//typedef vector<parameter_t> parameters_t;confusing; use vector<parameter_t>
|
||||
typedef vector<pair<unsigned int, unsigned int> > diff_t;
|
||||
typedef pair<float,diff_t > threshold;
|
||||
typedef vector<diff_t> diffs_t;
|
||||
typedef vector<unsigned int> candidates_t;
|
||||
//typedef std::vector<parameter_t> parameters_t;confusing; use std::vector<parameter_t>
|
||||
typedef std::vector<std::pair<unsigned int, unsigned int> > diff_t;
|
||||
typedef std::pair<float,diff_t > threshold;
|
||||
typedef std::vector<diff_t> diffs_t;
|
||||
typedef std::vector<unsigned int> candidates_t;
|
||||
|
||||
typedef float statscore_t;
|
||||
typedef vector<statscore_t> statscores_t;
|
||||
typedef std::vector<statscore_t> statscores_t;
|
||||
|
||||
|
||||
typedef float FeatureStatsType;
|
||||
typedef FeatureStatsType* featstats_t;
|
||||
//typedef vector<FeatureStatsType> featstats_t;
|
||||
typedef vector<FeatureStats> featarray_t;
|
||||
typedef vector<FeatureArray> featdata_t;
|
||||
//typedef std::vector<FeatureStatsType> featstats_t;
|
||||
typedef std::vector<FeatureStats> featarray_t;
|
||||
typedef std::vector<FeatureArray> featdata_t;
|
||||
|
||||
typedef int ScoreStatsType;
|
||||
typedef ScoreStatsType* scorestats_t;
|
||||
//typedef vector<ScoreStatsType> scorestats_t;
|
||||
typedef vector<ScoreStats> scorearray_t;
|
||||
typedef vector<ScoreArray> scoredata_t;
|
||||
//typedef std::vector<ScoreStatsType> scorestats_t;
|
||||
typedef std::vector<ScoreStats> scorearray_t;
|
||||
typedef std::vector<ScoreArray> scoredata_t;
|
||||
|
||||
typedef map<size_t, std::string> idx2name;
|
||||
typedef map<std::string, size_t> name2idx;
|
||||
typedef std::map<std::size_t, std::string> idx2name;
|
||||
typedef std::map<std::string, std::size_t> name2idx;
|
||||
|
||||
#endif // MERT_TYPE_H_
|
||||
|
@ -22,8 +22,6 @@
|
||||
|
||||
#include "Types.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#ifdef TRACE_ENABLE
|
||||
#define TRACE_ERR(str) { std::cerr << str; }
|
||||
#else
|
||||
@ -40,11 +38,11 @@ const float kEPS = 0.0001f;
|
||||
|
||||
template <typename T>
|
||||
bool IsAlmostEqual(T expected, T actual, float round=kEPS) {
|
||||
if (abs(expected - actual) < round) {
|
||||
if (std::abs(expected - actual) < round) {
|
||||
return true;
|
||||
} else {
|
||||
cerr << "Fail: expected = " << expected
|
||||
<< " (actual = " << actual << ")" << endl;
|
||||
std::cerr << "Fail: expected = " << expected
|
||||
<< " (actual = " << actual << ")" << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ class Vocabulary {
|
||||
|
||||
bool empty() const { return m_vocab.empty(); }
|
||||
|
||||
size_t size() const { return m_vocab.size(); }
|
||||
std::size_t size() const { return m_vocab.size(); }
|
||||
|
||||
iterator find(const std::string& str) { return m_vocab.find(str); }
|
||||
const_iterator find(const std::string& str) const { return m_vocab.find(str); }
|
||||
|
Loading…
Reference in New Issue
Block a user