Integrate Lexi's LR Score into tuning

This commit is contained in:
Hieu Hoang 2012-07-10 09:25:00 +01:00
parent 8180181a89
commit 7d664b745e
6 changed files with 63 additions and 6 deletions

View File

@ -11,6 +11,10 @@
#include <string.h>
#include <stdint.h>
#ifdef WIN32
#include <float.h>
#endif
namespace lm {
// 1 for '\t', '\n', and ' '. This is stricter than isspace.
@ -95,8 +99,13 @@ void ReadBackoff(util::FilePiece &in, float &backoff) {
backoff = in.ReadFloat();
if (backoff == ngram::kExtensionBackoff) backoff = ngram::kNoExtensionBackoff;
{
#ifdef WIN32
int float_class = _fpclass(backoff);
UTIL_THROW_IF(float_class == _FPCLASS_SNAN || float_class == _FPCLASS_QNAN || float_class == _FPCLASS_NINF || float_class == _FPCLASS_PINF, FormatLoadException, "Bad backoff " << backoff);
#else
int float_class = fpclassify(backoff);
UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << backoff);
#endif
}
UTIL_THROW_IF(in.get() != '\n', FormatLoadException, "Expected newline after backoff");
break;

View File

@ -140,7 +140,7 @@ void Data::loadNBest(const string &file)
throw runtime_error("Unable to open: " + file);
ScoreStats scoreentry;
string line, sentence_index, sentence, feature_str;
string line, sentence_index, sentence, feature_str, alignment;
while (getline(inp, line, '\n')) {
if (line.empty()) continue;
@ -151,7 +151,21 @@ void Data::loadNBest(const string &file)
getNextPound(line, sentence, "|||"); // second field
getNextPound(line, feature_str, "|||"); // third field
if (line.length() > 0) {
string temp;
getNextPound(line, temp, "|||"); //fourth field sentence score
if (line.length() > 0) {
getNextPound(line, alignment, "|||"); //fourth field only there if alignment scorer
}
}
//TODO check alignment exists if scorers need it
if (m_scorer->useAlignment()) {
sentence += "|||";
sentence += alignment;
}
m_scorer->prepareStats(sentence_index, sentence, scoreentry);
m_score_data->add(scoreentry, sentence_index);
// examine first line for name of features

View File

@ -66,6 +66,17 @@ InterpolatedScorer::InterpolatedScorer(const string& name, const string& config)
cerr <<endl;
}
bool InterpolatedScorer::useAlignment() const {
//cout << "InterpolatedScorer::useAlignment" << endl;
for (vector<Scorer*>::const_iterator itsc = m_scorers.begin(); itsc < m_scorers.end(); itsc++) {
if ((*itsc)->useAlignment()) {
//cout <<"InterpolatedScorer::useAlignment Returning true"<<endl;
return true;
}
}
return false;
};
void InterpolatedScorer::setScoreData(ScoreData* data)
{
size_t last = 0;
@ -156,11 +167,23 @@ void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles)
void InterpolatedScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
{
stringstream buff;
string align = text;
string sentence = "";
size_t alignmentData = text.find("|||");
//Get sentence and alignment parts
if(alignmentData != string::npos) {
getNextPound(align,sentence, "|||");
}
int i = 0;
for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin();
itsc != m_scorers.end(); ++itsc) {
for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin(); itsc != m_scorers.end(); ++itsc) {
ScoreStats tempEntry;
(*itsc)->prepareStats(sid, text, tempEntry);
if ((*itsc)->useAlignment()) {
(*itsc)->prepareStats(sid, text, tempEntry);
}
else {
(*itsc)->prepareStats(sid, sentence, tempEntry);
}
if (i > 0) buff << " ";
buff << tempEntry;
i++;

View File

@ -46,6 +46,8 @@ public:
virtual void setFilter(const std::string& filterCommand);
bool useAlignment() const;
protected:
ScopedVector<Scorer> m_scorers;

View File

@ -103,6 +103,15 @@ class Scorer
m_score_data = data;
}
/**
* The scorer returns if it uses the reference alignment data
* for permutation distance scores
**/
virtual bool useAlignment() const {
//cout << "Scorer::useAlignment returning false " << endl;
return false;
};
/**
* Set the factors, which should be used for this metric
*/

View File

@ -46,8 +46,8 @@ Scorer* ScorerFactory::getScorer(const string& type, const string& config) {
return new SemposScorer(config);
} else if (type == "MERGE") {
return new MergeScorer(config);
} else if (type == "LRSCORE") {
return new PermutationScorer(config);
} else if ((type == "HAMMING") || (type == "KENDALL")) {
return (PermutationScorer*) new PermutationScorer(type, config);
} else {
if (type.find(',') != string::npos) {
return new InterpolatedScorer(type, config);