mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 11:28:48 +03:00
Use util::TokenIter to tokenize n-best lists.
Reduce creating std::string objects, too. In both ScoreArray and FeatureArray classes, the private members to track sentence indices (namely, "m_index") were unnecessarily declared as std::string, but it's better to directly declare them as 'int'.
This commit is contained in:
parent
cd3fb3b831
commit
38e145e556
@ -17,6 +17,10 @@
|
||||
#include "Util.h"
|
||||
#include "util/check.hh"
|
||||
|
||||
#include "util/tokenize_piece.hh"
|
||||
#include "util/string_piece.hh"
|
||||
#include "FeatureDataIterator.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace MosesTuning
|
||||
@ -137,24 +141,32 @@ void Data::loadNBest(const string &file)
|
||||
throw runtime_error("Unable to open: " + file);
|
||||
|
||||
ScoreStats scoreentry;
|
||||
string line, sentence_index, sentence, feature_str, alignment;
|
||||
string line, sentence, feature_str, alignment;
|
||||
int sentence_index;
|
||||
|
||||
while (getline(inp, line, '\n')) {
|
||||
if (line.empty()) continue;
|
||||
// adding statistics for error measures
|
||||
scoreentry.clear();
|
||||
|
||||
getNextPound(line, sentence_index, "|||"); // first field
|
||||
getNextPound(line, sentence, "|||"); // second field
|
||||
getNextPound(line, feature_str, "|||"); // third field
|
||||
util::TokenIter<util::MultiCharacter> it(line, util::MultiCharacter("|||"));
|
||||
|
||||
if (line.length() > 0) {
|
||||
string temp;
|
||||
getNextPound(line, temp, "|||"); //fourth field sentence score
|
||||
if (line.length() > 0) {
|
||||
getNextPound(line, alignment, "|||"); //fifth field (if present) is either phrase or word alignment
|
||||
if (line.length() > 0) {
|
||||
getNextPound(line, alignment, "|||"); //sixth field (if present) is word alignment
|
||||
sentence_index = ParseInt(*it);
|
||||
++it;
|
||||
sentence = it->as_string();
|
||||
++it;
|
||||
feature_str = it->as_string();
|
||||
++it;
|
||||
|
||||
if (it) {
|
||||
++it; // skip model score.
|
||||
|
||||
if (it) {
|
||||
++it;
|
||||
alignment = it->as_string(); //fifth field (if present) is either phrase or word alignment
|
||||
if (it) {
|
||||
++it;
|
||||
alignment = it->as_string(); //sixth field (if present) is word alignment
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -216,7 +228,7 @@ void Data::InitFeatureMap(const string& str) {
|
||||
}
|
||||
|
||||
void Data::AddFeatures(const string& str,
|
||||
const string& sentence_index) {
|
||||
int sentence_index) {
|
||||
string buf = str;
|
||||
string substr;
|
||||
FeatureStats feature_entry;
|
||||
|
@ -18,7 +18,7 @@
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
|
||||
class Scorer;
|
||||
|
||||
typedef boost::shared_ptr<ScoreData> ScoreDataHandle;
|
||||
@ -91,7 +91,7 @@ public:
|
||||
// Helper functions for loadnbest();
|
||||
void InitFeatureMap(const std::string& str);
|
||||
void AddFeatures(const std::string& str,
|
||||
const std::string& sentence_index);
|
||||
int sentence_index);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -15,14 +15,14 @@ BOOST_AUTO_TEST_CASE(shard_basic) {
|
||||
Data data(scorer.get());
|
||||
FeatureArray fa1, fa2, fa3, fa4;
|
||||
ScoreArray sa1, sa2, sa3, sa4;
|
||||
fa1.setIndex("1");
|
||||
fa2.setIndex("2");
|
||||
fa3.setIndex("3");
|
||||
fa4.setIndex("4");
|
||||
sa1.setIndex("1");
|
||||
sa2.setIndex("2");
|
||||
sa3.setIndex("3");
|
||||
sa4.setIndex("4");
|
||||
fa1.setIndex(1);
|
||||
fa2.setIndex(2);
|
||||
fa3.setIndex(3);
|
||||
fa4.setIndex(4);
|
||||
sa1.setIndex(1);
|
||||
sa2.setIndex(2);
|
||||
sa3.setIndex(3);
|
||||
sa4.setIndex(4);
|
||||
data.getFeatureData()->add(fa1);
|
||||
data.getFeatureData()->add(fa2);
|
||||
data.getFeatureData()->add(fa3);
|
||||
|
@ -16,10 +16,10 @@ using namespace std;
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
|
||||
|
||||
FeatureArray::FeatureArray()
|
||||
: m_index(""), m_num_features(0){}
|
||||
: m_index(0), m_num_features(0){}
|
||||
|
||||
FeatureArray::~FeatureArray() {}
|
||||
|
||||
@ -115,7 +115,7 @@ void FeatureArray::load(istream* is, const SparseVector& sparseWeights)
|
||||
}
|
||||
getNextPound(stringBuf, substring);
|
||||
getNextPound(stringBuf, substring);
|
||||
m_index = substring;
|
||||
m_index = atoi(substring.c_str());
|
||||
getNextPound(stringBuf, substring);
|
||||
number_of_entries = atoi(substring.c_str());
|
||||
getNextPound(stringBuf, substring);
|
||||
@ -160,4 +160,3 @@ bool FeatureArray::check_consistency() const
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
|
||||
|
||||
const char FEATURES_TXT_BEGIN[] = "FEATURES_TXT_BEGIN_0";
|
||||
const char FEATURES_TXT_END[] = "FEATURES_TXT_END_0";
|
||||
@ -27,8 +27,7 @@ class FeatureArray
|
||||
private:
|
||||
// idx to identify the utterance. It can differ from
|
||||
// the index inside the vector.
|
||||
|
||||
std::string m_index;
|
||||
int m_index;
|
||||
featarray_t m_array;
|
||||
std::size_t m_num_features;
|
||||
std::string m_features;
|
||||
@ -40,8 +39,8 @@ public:
|
||||
void clear() { m_array.clear(); }
|
||||
|
||||
|
||||
std::string getIndex() const { return m_index; }
|
||||
void setIndex(const std::string& value) { m_index = value; }
|
||||
int getIndex() const { return m_index; }
|
||||
void setIndex(const int value) { m_index = value; }
|
||||
|
||||
FeatureStats& get(std::size_t i) { return m_array.at(i); }
|
||||
const FeatureStats& get(std::size_t i) const { return m_array.at(i); }
|
||||
|
@ -16,7 +16,7 @@ using namespace std;
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
|
||||
|
||||
|
||||
FeatureData::FeatureData()
|
||||
@ -90,7 +90,7 @@ void FeatureData::add(FeatureArray& e)
|
||||
}
|
||||
}
|
||||
|
||||
void FeatureData::add(FeatureStats& e, const string& sent_idx)
|
||||
void FeatureData::add(FeatureStats& e, int sent_idx)
|
||||
{
|
||||
if (exists(sent_idx)) { // array at position e.getIndex() already exists
|
||||
//enlarge array at position e.getIndex()
|
||||
@ -168,4 +168,3 @@ string FeatureData::ToString() const {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -16,7 +16,7 @@
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
|
||||
|
||||
class FeatureData
|
||||
{
|
||||
@ -35,9 +35,6 @@ public:
|
||||
|
||||
void clear() { m_array.clear(); }
|
||||
|
||||
FeatureArray get(const std::string& idx) {
|
||||
return m_array.at(getIndex(idx));
|
||||
}
|
||||
FeatureArray& get(size_t idx) {
|
||||
return m_array.at(idx);
|
||||
}
|
||||
@ -45,11 +42,11 @@ public:
|
||||
return m_array.at(idx);
|
||||
}
|
||||
|
||||
inline bool exists(const std::string& sent_idx) const {
|
||||
return exists(getIndex(sent_idx));
|
||||
inline bool exists(int sent_idx) const {
|
||||
return existsInternal(getIndex(sent_idx));
|
||||
}
|
||||
|
||||
inline bool exists(int sent_idx) const {
|
||||
inline bool existsInternal(int sent_idx) const {
|
||||
return (sent_idx > -1 && sent_idx < static_cast<int>(m_array.size())) ? true : false;
|
||||
}
|
||||
|
||||
@ -62,7 +59,7 @@ public:
|
||||
}
|
||||
|
||||
void add(FeatureArray& e);
|
||||
void add(FeatureStats& e, const std::string& sent_idx);
|
||||
void add(FeatureStats& e, int sent_idx);
|
||||
|
||||
std::size_t size() const { return m_array.size(); }
|
||||
|
||||
@ -83,7 +80,7 @@ public:
|
||||
|
||||
void setIndex();
|
||||
|
||||
inline int getIndex(const std::string& idx) const {
|
||||
inline int getIndex(int idx) const {
|
||||
name2idx::const_iterator i = m_array_name_to_index.find(idx);
|
||||
if (i != m_array_name_to_index.end())
|
||||
return i->second;
|
||||
@ -91,7 +88,7 @@ public:
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline std::string getIndex(std::size_t idx) const {
|
||||
inline int getName(std::size_t idx) const {
|
||||
idx2name::const_iterator i = m_index_to_array_name.find(idx);
|
||||
if (i != m_index_to_array_name.end())
|
||||
throw std::runtime_error("there is no entry at index " + idx);
|
||||
|
@ -6,7 +6,7 @@ using namespace std;
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
|
||||
|
||||
// TODO: This is too long. Consider creating a function for
|
||||
// initialization such as Init().
|
||||
@ -88,10 +88,6 @@ void InterpolatedScorer::setScoreData(ScoreData* data)
|
||||
for (size_t i = 0; i < data->size(); i++) {
|
||||
ScoreArray scoreArray = data->get(i);
|
||||
ScoreArray newScoreArray;
|
||||
std::string istr;
|
||||
std::stringstream out;
|
||||
out << i;
|
||||
istr = out.str();
|
||||
size_t numNBest = scoreArray.size();
|
||||
//cout << " Datasize " << data->size() << " NumNBest " << numNBest << endl ;
|
||||
for (size_t j = 0; j < numNBest ; j++) {
|
||||
@ -105,7 +101,7 @@ void InterpolatedScorer::setScoreData(ScoreData* data)
|
||||
//cout << " last " << last << " NumScores " << numScoresScorer << "newScorestats " << newScoreStats << endl;
|
||||
newScoreArray.add(newScoreStats);
|
||||
}
|
||||
newScoreArray.setIndex(istr);
|
||||
newScoreArray.setIndex(i);
|
||||
newData->add(newScoreArray);
|
||||
}
|
||||
//newData->dump();
|
||||
@ -224,4 +220,3 @@ void InterpolatedScorer::setFilter(const string& filterCommand)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -14,10 +14,10 @@ using namespace std;
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
|
||||
|
||||
ScoreArray::ScoreArray()
|
||||
: m_num_scores(0), m_index("") {}
|
||||
: m_num_scores(0), m_index(0) {}
|
||||
|
||||
void ScoreArray::savetxt(ostream* os, const string& sctype)
|
||||
{
|
||||
@ -109,7 +109,7 @@ void ScoreArray::load(istream* is)
|
||||
}
|
||||
getNextPound(stringBuf, substring);
|
||||
getNextPound(stringBuf, substring);
|
||||
m_index = substring;
|
||||
m_index = atoi(substring.c_str());
|
||||
getNextPound(stringBuf, substring);
|
||||
number_of_entries = atoi(substring.c_str());
|
||||
getNextPound(stringBuf, substring);
|
||||
@ -166,4 +166,3 @@ bool ScoreArray::check_consistency() const
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
|
||||
const char SCORES_TXT_BEGIN[] = "SCORES_TXT_BEGIN_0";
|
||||
const char SCORES_TXT_END[] = "SCORES_TXT_END_0";
|
||||
const char SCORES_BIN_BEGIN[] = "SCORES_BIN_BEGIN_0";
|
||||
@ -32,7 +32,7 @@ class ScoreArray
|
||||
|
||||
// indexx to identify the utterance.
|
||||
// It can differ from the index inside the vector.
|
||||
std::string m_index;
|
||||
int m_index;
|
||||
|
||||
public:
|
||||
ScoreArray();
|
||||
@ -40,9 +40,9 @@ public:
|
||||
|
||||
void clear() { m_array.clear(); }
|
||||
|
||||
std::string getIndex() const { return m_index; }
|
||||
int getIndex() const { return m_index; }
|
||||
|
||||
void setIndex(const std::string& value) { m_index = value; }
|
||||
void setIndex(int value) { m_index = value; }
|
||||
|
||||
ScoreStats& get(std::size_t i) { return m_array.at(i); }
|
||||
|
||||
|
@ -18,7 +18,7 @@ using namespace std;
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
|
||||
|
||||
ScoreData::ScoreData(Scorer* scorer) :
|
||||
m_scorer(scorer)
|
||||
@ -95,7 +95,7 @@ void ScoreData::add(ScoreArray& e)
|
||||
}
|
||||
}
|
||||
|
||||
void ScoreData::add(const ScoreStats& e, const string& sent_idx)
|
||||
void ScoreData::add(const ScoreStats& e, int sent_idx)
|
||||
{
|
||||
if (exists(sent_idx)) { // array at position e.getIndex() already exists
|
||||
// Enlarge array at position e.getIndex()
|
||||
@ -139,4 +139,3 @@ void ScoreData::setIndex()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -18,7 +18,7 @@
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
|
||||
|
||||
class Scorer;
|
||||
|
||||
@ -42,10 +42,6 @@ public:
|
||||
|
||||
void clear() { m_array.clear(); }
|
||||
|
||||
inline ScoreArray get(const std::string& idx) {
|
||||
return m_array.at(getIndex(idx));
|
||||
}
|
||||
|
||||
inline ScoreArray& get(std::size_t idx) {
|
||||
return m_array.at(idx);
|
||||
}
|
||||
@ -54,11 +50,11 @@ public:
|
||||
return m_array.at(idx);
|
||||
}
|
||||
|
||||
inline bool exists(const std::string& sent_idx) const {
|
||||
return exists(getIndex(sent_idx));
|
||||
inline bool exists(int sent_idx) const {
|
||||
return existsInternal(getIndex(sent_idx));
|
||||
}
|
||||
|
||||
inline bool exists(int sent_idx) const {
|
||||
inline bool existsInternal(int sent_idx) const {
|
||||
return (sent_idx > -1 && sent_idx < static_cast<int>(m_array.size())) ? true : false;
|
||||
}
|
||||
|
||||
@ -77,7 +73,7 @@ public:
|
||||
}
|
||||
|
||||
void add(ScoreArray& e);
|
||||
void add(const ScoreStats& e, const std::string& sent_idx);
|
||||
void add(const ScoreStats& e, int sent_idx);
|
||||
|
||||
std::size_t NumberOfScores() const { return m_num_scores; }
|
||||
std::size_t size() const { return m_array.size(); }
|
||||
@ -93,7 +89,7 @@ public:
|
||||
|
||||
void setIndex();
|
||||
|
||||
inline int getIndex(const std::string& idx) const {
|
||||
inline int getIndex(const int idx) const {
|
||||
name2idx::const_iterator i = m_array_name_to_index.find(idx);
|
||||
if (i != m_array_name_to_index.end())
|
||||
return i->second;
|
||||
@ -101,7 +97,7 @@ public:
|
||||
return -1;
|
||||
}
|
||||
|
||||
inline std::string getIndex(std::size_t idx) const {
|
||||
inline int getName(std::size_t idx) const {
|
||||
idx2name::const_iterator i = m_index_to_array_name.find(idx);
|
||||
if (i != m_index_to_array_name.end())
|
||||
throw std::runtime_error("there is no entry at index " + idx);
|
||||
|
@ -39,8 +39,8 @@ typedef ScoreStatsType* scorestats_t;
|
||||
typedef std::vector<ScoreStats> scorearray_t;
|
||||
typedef std::vector<ScoreArray> scoredata_t;
|
||||
|
||||
typedef std::map<std::size_t, std::string> idx2name;
|
||||
typedef std::map<std::string, std::size_t> name2idx;
|
||||
typedef std::map<std::size_t, int> idx2name;
|
||||
typedef std::map<int, std::size_t> name2idx;
|
||||
|
||||
typedef enum { HAMMING_DISTANCE=0, KENDALL_DISTANCE } distanceMetric_t;
|
||||
typedef enum { REFERENCE_CHOICE_AVERAGE=0, REFERENCE_CHOICE_CLOSEST } distanceMetricReferenceChoice_t;
|
||||
|
@ -60,8 +60,7 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap)
|
||||
for (int j = 0; j < n; ++j)
|
||||
{
|
||||
int randomIndex = random() % n;
|
||||
string str_j = int2string(j);
|
||||
scoredata->add(entries[randomIndex], str_j);
|
||||
scoredata->add(entries[randomIndex], j);
|
||||
}
|
||||
g_scorer->setScoreData(scoredata);
|
||||
candidates_t candidates(n, 0);
|
||||
@ -93,8 +92,7 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap)
|
||||
ScoreData* scoredata = new ScoreData(g_scorer);
|
||||
for (int sid = 0; sid < n; ++sid)
|
||||
{
|
||||
string str_sid = int2string(sid);
|
||||
scoredata->add(entries[sid], str_sid);
|
||||
scoredata->add(entries[sid], sid);
|
||||
}
|
||||
g_scorer->setScoreData(scoredata);
|
||||
candidates_t candidates(n, 0);
|
||||
|
Loading…
Reference in New Issue
Block a user