This commit is contained in:
Hieu Hoang 2013-05-29 18:16:15 +01:00
parent 59bd7deb4b
commit 6249432407
501 changed files with 20914 additions and 20027 deletions

View File

@ -70,12 +70,10 @@ const PhraseNode* OnDiskQuery::Query(const SourcePhrase& sourcePhrase)
const PhraseNode *node = &m_wrapper.GetRootSourceNode();
assert(node);
for (size_t pos = 0; pos < sourcePhrase.GetSize(); ++pos)
{
for (size_t pos = 0; pos < sourcePhrase.GetSize(); ++pos) {
const Word &word = sourcePhrase.GetWord(pos);
node = node->GetChild(word, m_wrapper);
if (node == NULL)
{
if (node == NULL) {
break;
}
}

View File

@ -29,8 +29,7 @@ public:
const PhraseNode *Query(const SourcePhrase& sourcePhrase);
inline const PhraseNode *Query(const std::vector<std::string>& tokens)
{
inline const PhraseNode *Query(const std::vector<std::string>& tokens) {
return Query(Tokenize(tokens));
}

View File

@ -212,8 +212,8 @@ Word *OnDiskWrapper::ConvertFromMoses(Moses::FactorDirection /* direction */
for (size_t ind = 1 ; ind < factorsVec.size() ; ++ind) {
size_t factorType = factorsVec[ind];
const Moses::Factor *factor = origWord.GetFactor(factorType);
if (factor == NULL)
{ // can have less factors than factorType.size()
if (factor == NULL) {
// can have less factors than factorType.size()
break;
}
CHECK(factor);

View File

@ -253,8 +253,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
if (GetWord(targetPos).IsNonTerminal()) {
alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
}
else {
} else {
alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
}

View File

@ -99,7 +99,8 @@ size_t Word::ReadFromFile(std::fstream &file)
void Word::ConvertToMoses(
const std::vector<Moses::FactorType> &outputFactorsVec,
const Vocab &vocab,
Moses::Word &overwrite) const {
Moses::Word &overwrite) const
{
Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance();
overwrite = Moses::Word(m_isNonTerminal);

View File

@ -77,8 +77,7 @@ public:
Moses::Word &overwrite) const;
void DebugPrint(std::ostream &out, const Vocab &vocab) const;
inline const std::string &GetString(const Vocab &vocab) const
{
inline const std::string &GetString(const Vocab &vocab) const {
return vocab.GetString(m_vocabId);
}

View File

@ -33,8 +33,7 @@ int main(int argc, char **argv)
if(i + 1 == argc)
usage();
ttable = argv[++i];
}
else
} else
usage();
}
@ -56,22 +55,19 @@ int main(int argc, char **argv)
cerr << "line: " << line << endl;
const PhraseNode* node = onDiskQuery.Query(tokens);
if (node)
{ // source phrase points to a bunch of rules
if (node) {
// source phrase points to a bunch of rules
const TargetPhraseCollection *coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper);
string str = coll->GetDebugStr();
cout << "Found " << coll->GetSize() << endl;
for (size_t ind = 0; ind < coll->GetSize(); ++ind)
{
for (size_t ind = 0; ind < coll->GetSize(); ++ind) {
const TargetPhrase &targetPhrase = coll->GetTargetPhrase(ind);
cerr << " ";
targetPhrase.DebugPrint(cerr, onDiskWrapper.GetVocab());
cerr << endl;
}
}
else
{
} else {
cout << "Not found" << endl;
}

View File

@ -5,7 +5,8 @@
#include <stdlib.h>
#include <cstring>
namespace {
namespace
{
const int LINE_MAX_LENGTH = 10000;

View File

@ -281,7 +281,8 @@ void Mismatch::PrintClippedHTML( ostream* out, int width )
*out << "</td></tr>";
}
void Mismatch::LabelSourceMatches(int *source_annotation, int *target_annotation, int source_id, int label ) {
void Mismatch::LabelSourceMatches(int *source_annotation, int *target_annotation, int source_id, int label )
{
for(INDEX ap=0; ap<m_num_alignment_points; ap++) {
if (m_alignment->GetSourceWord( m_sentence_id, ap ) == source_id) {
source_annotation[ source_id ] = label;

View File

@ -34,7 +34,9 @@ public:
Mismatch( SuffixArray *sa, TargetCorpus *tc, Alignment *a, INDEX sentence_id, INDEX position, int source_length, int target_length, int source_start, int source_end );
~Mismatch();
bool Unaligned() const { return m_unaligned; }
bool Unaligned() const {
return m_unaligned;
}
void PrintClippedHTML(std::ostream* out, int width );
void LabelSourceMatches(int *source_annotation, int *target_annotation, int source_id, int label );
};

View File

@ -261,8 +261,7 @@ void PhrasePair::PrintClippedHTML( ostream* out, int width ) const
if (target_post.size() < target_post_width) {
target_post_width = target_post.size();
}
else {
} else {
while(target_post_width>0 &&
target_post.substr(target_post_width-1,1) != " ") {
target_post_width--;

View File

@ -75,8 +75,7 @@ int PhrasePairCollection::GetCollection( const vector< string >& sourceString )
m_size++;
}
}
}
else {
} else {
//cerr << "mismatch " << (i-first_match)
// << " in sentence " << sentence_id
// << ", starting at word " << source_start
@ -111,8 +110,7 @@ void PhrasePairCollection::Print(bool pretty) const
for(int j=0; j<ppWithSameTarget->size() && j<m_max_example; j++, p++ ) {
if (pretty) {
(*p)->PrintPretty( &cout, 100 );
}
else {
} else {
(*p)->Print( &cout );
}
if (ppWithSameTarget->size() > m_max_example) {
@ -138,8 +136,7 @@ void PhrasePairCollection::PrintHTML() const
<< (m_collection.end() - ppWithSameTarget==1?"":"s") << " ("
<< (m_collection.end() - ppWithSameTarget)
<< "/" << m_size << ")</p>";
}
else {
} else {
cout << "<p class=\"pp_target_header\">";
(*(ppWithSameTarget->begin()))->PrintTarget( &cout );
cout << " (" << count << "/" << m_size << ")" << endl;

View File

@ -5,7 +5,8 @@
#include <stdlib.h>
#include <cstring>
namespace {
namespace
{
const int LINE_MAX_LENGTH = 10000;

View File

@ -5,7 +5,8 @@
#include <stdlib.h>
#include <cstring>
namespace {
namespace
{
const int LINE_MAX_LENGTH = 10000;

View File

@ -2,7 +2,8 @@
#include "Vocabulary.h"
#include <fstream>
namespace {
namespace
{
const int MAX_LENGTH = 10000;

View File

@ -34,11 +34,13 @@ static const std::string base64_chars =
"0123456789+/";
static inline bool is_base64(unsigned char c) {
static inline bool is_base64(unsigned char c)
{
return (isalnum(c) || (c == '+') || (c == '/'));
}
std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) {
std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len)
{
std::string ret;
int i = 0;
int j = 0;
@ -59,8 +61,7 @@ std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_
}
}
if (i)
{
if (i) {
for(j = i; j < 3; j++)
char_array_3[j] = '\0';
@ -81,7 +82,8 @@ std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_
}
std::string base64_decode(std::string const& encoded_string) {
std::string base64_decode(std::string const& encoded_string)
{
int in_len = encoded_string.size();
int i = 0;
int j = 0;
@ -90,7 +92,8 @@ std::string base64_decode(std::string const& encoded_string) {
std::string ret;
while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
char_array_4[i++] = encoded_string[in_]; in_++;
char_array_4[i++] = encoded_string[in_];
in_++;
if (i ==4) {
for (i = 0; i <4; i++)
char_array_4[i] = base64_chars.find(char_array_4[i]);

View File

@ -150,22 +150,19 @@ int main(int argc, char* argv[])
cout << "TOTAL: " << total << endl;
if (htmlFlag) {
ppCollection.PrintHTML();
}
else {
} else {
ppCollection.Print(prettyFlag);
}
cout << "-|||- BICONCOR END -|||-" << endl << flush;
}
}
else if (queryFlag) {
} else if (queryFlag) {
cerr << "query is " << query << endl;
vector< string > queryString = alignment.Tokenize( query.c_str() );
PhrasePairCollection ppCollection( &suffixArray, &targetCorpus, &alignment, max_translation, max_example );
ppCollection.GetCollection( queryString );
if (htmlFlag) {
ppCollection.PrintHTML();
}
else {
} else {
ppCollection.Print(prettyFlag);
}
}

View File

@ -42,7 +42,8 @@ namespace Moses
, const std::vector<float> &weightT
, size_t tableLimit
, const LMList &languageModels
, float weightWP) {
, float weightWP)
{
m_languageModels = &languageModels;
m_weightT = weightT;
@ -104,7 +105,8 @@ namespace Moses
return true;
}
void PhraseDictionaryInterpolated::InitializeForInput(InputType const& source) {
void PhraseDictionaryInterpolated::InitializeForInput(InputType const& source)
{
for (size_t i = 0; i < m_dictionaries.size(); ++i) {
m_dictionaries[i]->InitializeForInput(source);
}
@ -115,7 +117,8 @@ namespace Moses
const TargetPhraseCollection*
PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const {
PhraseDictionaryInterpolated::GetTargetPhraseCollection(const Phrase& src) const
{
delete m_targetPhrases;
m_targetPhrases = new TargetPhraseCollection();

View File

@ -39,7 +39,9 @@ class PhraseDictionaryInterpolated : public PhraseDictionary
PhraseDictionaryInterpolated
(size_t numScoreComponent,size_t numInputScores,const PhraseDictionaryFeature* feature);
virtual ~PhraseDictionaryInterpolated() {delete m_targetPhrases;}
virtual ~PhraseDictionaryInterpolated() {
delete m_targetPhrases;
}
// initialize ...
bool Load(const std::vector<FactorType> &input

View File

@ -31,7 +31,8 @@ BOOST_AUTO_TEST_SUITE(phrase_length_feature)
//TODO: Factor out setup code so that it can be reused
static Word MakeWord(string text) {
static Word MakeWord(string text)
{
FactorCollection &factorCollection = FactorCollection::Instance();
const Factor* f = factorCollection.AddFactor(Input,0,text);
Word w;
@ -40,7 +41,8 @@ static Word MakeWord(string text) {
}
BOOST_AUTO_TEST_CASE(evaluate) {
BOOST_AUTO_TEST_CASE(evaluate)
{
Word w1 = MakeWord("w1");
Word w2 = MakeWord("y2");
Word w3 = MakeWord("x3");

View File

@ -39,7 +39,8 @@ namespace MosesTest
BOOST_AUTO_TEST_SUITE(target_bigram)
static Word MakeWord(string text) {
static Word MakeWord(string text)
{
FactorCollection &factorCollection = FactorCollection::Instance();
const Factor* f = factorCollection.AddFactor(Input,0,text);
Word w;
@ -47,26 +48,24 @@ static Word MakeWord(string text) {
return w;
}
class VocabFileFixture {
class VocabFileFixture
{
public:
template<class I>
VocabFileFixture(I begin, I end)
{
VocabFileFixture(I begin, I end) {
char name[] = "TargetBigramXXXXXX";
int fd = mkstemp(name);
BOOST_CHECK(fd != -1);
BOOST_CHECK(!close(fd));
filename = name;
ofstream out(name);
for (I i = begin; i != end; ++i)
{
for (I i = begin; i != end; ++i) {
out << *i << endl;
}
out.close();
}
~VocabFileFixture()
{
~VocabFileFixture() {
BOOST_CHECK(!remove(filename.c_str()));
}

View File

@ -18,7 +18,8 @@
using namespace std;
namespace {
namespace
{
// configure regularisation
const char KEY_REFLEN[] = "reflen";
@ -34,7 +35,8 @@ namespace MosesTuning
BleuScorer::BleuScorer(const string& config)
: StatisticsBasedScorer("BLEU", config),
m_ref_length_type(CLOSEST) {
m_ref_length_type(CLOSEST)
{
const string reflen = getConfig(KEY_REFLEN, REFLEN_CLOSEST);
if (reflen == REFLEN_AVERAGE) {
m_ref_length_type = AVERAGE;
@ -101,7 +103,8 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
}
}
bool BleuScorer::OpenReference(const char* filename, size_t file_id) {
bool BleuScorer::OpenReference(const char* filename, size_t file_id)
{
ifstream ifs(filename);
if (!ifs) {
cerr << "Cannot open " << filename << endl;
@ -110,7 +113,8 @@ bool BleuScorer::OpenReference(const char* filename, size_t file_id) {
return OpenReferenceStream(&ifs, file_id);
}
bool BleuScorer::OpenReferenceStream(istream* is, size_t file_id) {
bool BleuScorer::OpenReferenceStream(istream* is, size_t file_id)
{
if (is == NULL) return false;
string line;
@ -203,7 +207,8 @@ statscore_t BleuScorer::calculateScore(const vector<int>& comps) const
return exp(logbleu);
}
int BleuScorer::CalcReferenceLength(size_t sentence_id, size_t length) {
int BleuScorer::CalcReferenceLength(size_t sentence_id, size_t length)
{
switch (m_ref_length_type) {
case AVERAGE:
return m_references[sentence_id]->CalcAverage();
@ -221,7 +226,8 @@ int BleuScorer::CalcReferenceLength(size_t sentence_id, size_t length) {
}
void BleuScorer::DumpCounts(ostream* os,
const NgramCounts& counts) const {
const NgramCounts& counts) const
{
for (NgramCounts::const_iterator it = counts.begin();
it != counts.end(); ++it) {
*os << "(";
@ -238,7 +244,8 @@ void BleuScorer::DumpCounts(ostream* os,
}
float smoothedSentenceBleu
(const std::vector<float>& stats, float smoothing, bool smoothBP) {
(const std::vector<float>& stats, float smoothing, bool smoothBP)
{
CHECK(stats.size() == kBleuNgramOrder * 2 + 1);
@ -282,7 +289,8 @@ float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vec
return exp(logbleu) * stats[kBleuNgramOrder*2];
}
float unsmoothedBleu(const std::vector<float>& stats) {
float unsmoothedBleu(const std::vector<float>& stats)
{
CHECK(stats.size() == kBleuNgramOrder * 2 + 1);
float logbleu = 0.0;
@ -298,7 +306,8 @@ float unsmoothedBleu(const std::vector<float>& stats) {
return exp(logbleu);
}
vector<float> BleuScorer::ScoreNbestList(const string& scoreFile, const string& featureFile) {
vector<float> BleuScorer::ScoreNbestList(const string& scoreFile, const string& featureFile)
{
vector<string> scoreFiles;
vector<string> featureFiles;
scoreFiles.push_back(scoreFile);

View File

@ -38,14 +38,22 @@ public:
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
virtual statscore_t calculateScore(const std::vector<int>& comps) const;
virtual std::size_t NumberOfScores() const { return 2 * kBleuNgramOrder + 1; }
virtual std::size_t NumberOfScores() const {
return 2 * kBleuNgramOrder + 1;
}
int CalcReferenceLength(std::size_t sentence_id, std::size_t length);
ReferenceLengthType GetReferenceLengthType() const { return m_ref_length_type; }
void SetReferenceLengthType(ReferenceLengthType type) { m_ref_length_type = type; }
ReferenceLengthType GetReferenceLengthType() const {
return m_ref_length_type;
}
void SetReferenceLengthType(ReferenceLengthType type) {
m_ref_length_type = type;
}
const std::vector<Reference*>& GetReferences() const { return m_references.get(); }
const std::vector<Reference*>& GetReferences() const {
return m_references.get();
}
/**
* Count the ngrams of each type, up to the given length in the input line.

View File

@ -10,16 +10,19 @@
using namespace MosesTuning;
namespace {
namespace
{
NgramCounts* g_counts = NULL;
NgramCounts* GetNgramCounts() {
NgramCounts* GetNgramCounts()
{
assert(g_counts);
return g_counts;
}
void SetNgramCounts(NgramCounts* counts) {
void SetNgramCounts(NgramCounts* counts)
{
g_counts = counts;
}
@ -58,33 +61,38 @@ struct Fourgram {
NgramCounts::Key instance;
};
bool CheckUnigram(const std::string& str) {
bool CheckUnigram(const std::string& str)
{
Unigram unigram(str);
NgramCounts::Value v;
return GetNgramCounts()->Lookup(unigram.instance, &v);
}
bool CheckBigram(const std::string& a, const std::string& b) {
bool CheckBigram(const std::string& a, const std::string& b)
{
Bigram bigram(a, b);
NgramCounts::Value v;
return GetNgramCounts()->Lookup(bigram.instance, &v);
}
bool CheckTrigram(const std::string& a, const std::string& b,
const std::string& c) {
const std::string& c)
{
Trigram trigram(a, b, c);
NgramCounts::Value v;
return GetNgramCounts()->Lookup(trigram.instance, &v);
}
bool CheckFourgram(const std::string& a, const std::string& b,
const std::string& c, const std::string& d) {
const std::string& c, const std::string& d)
{
Fourgram fourgram(a, b, c, d);
NgramCounts::Value v;
return GetNgramCounts()->Lookup(fourgram.instance, &v);
}
void SetUpReferences(BleuScorer& scorer) {
void SetUpReferences(BleuScorer& scorer)
{
// The following examples are taken from Koehn, "Statistical Machine Translation",
// Cambridge University Press, 2010.
{
@ -115,7 +123,8 @@ void SetUpReferences(BleuScorer& scorer) {
} // namespace
BOOST_AUTO_TEST_CASE(bleu_reference_type) {
BOOST_AUTO_TEST_CASE(bleu_reference_type)
{
BleuScorer scorer;
// BleuScorer will use "closest" by default.
BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::CLOSEST);
@ -127,7 +136,8 @@ BOOST_AUTO_TEST_CASE(bleu_reference_type) {
BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::SHORTEST);
}
BOOST_AUTO_TEST_CASE(bleu_reference_type_with_config) {
BOOST_AUTO_TEST_CASE(bleu_reference_type_with_config)
{
{
BleuScorer scorer("reflen:average");
BOOST_CHECK_EQUAL(scorer.GetReferenceLengthType(), BleuScorer::AVERAGE);
@ -139,7 +149,8 @@ BOOST_AUTO_TEST_CASE(bleu_reference_type_with_config) {
}
}
BOOST_AUTO_TEST_CASE(bleu_count_ngrams) {
BOOST_AUTO_TEST_CASE(bleu_count_ngrams)
{
BleuScorer scorer;
std::string line = "I saw a girl with a telescope .";
@ -198,7 +209,8 @@ BOOST_AUTO_TEST_CASE(bleu_count_ngrams) {
BOOST_CHECK(CheckFourgram("with", "a", "telescope", "."));
}
BOOST_AUTO_TEST_CASE(bleu_clipped_counts) {
BOOST_AUTO_TEST_CASE(bleu_clipped_counts)
{
BleuScorer scorer;
SetUpReferences(scorer);
std::string line("israeli officials responsibility of airport safety");
@ -220,7 +232,8 @@ BOOST_AUTO_TEST_CASE(bleu_clipped_counts) {
BOOST_CHECK_EQUAL(entry.get(7), 3); // fourgram
}
BOOST_AUTO_TEST_CASE(calculate_actual_score) {
BOOST_AUTO_TEST_CASE(calculate_actual_score)
{
BOOST_REQUIRE(4 == kBleuNgramOrder);
std::vector<int> stats(2 * kBleuNgramOrder + 1);
BleuScorer scorer;
@ -247,7 +260,8 @@ BOOST_AUTO_TEST_CASE(calculate_actual_score) {
BOOST_CHECK_CLOSE(0.5115f, scorer.calculateScore(stats), 0.01);
}
BOOST_AUTO_TEST_CASE(sentence_level_bleu) {
BOOST_AUTO_TEST_CASE(sentence_level_bleu)
{
BOOST_REQUIRE(4 == kBleuNgramOrder);
std::vector<float> stats(2 * kBleuNgramOrder + 1);

View File

@ -6,9 +6,11 @@
using namespace std;
namespace {
namespace
{
inline int CalcDistance(int word1, int word2) {
inline int CalcDistance(int word1, int word2)
{
return word1 == word2 ? 0 : 1;
}
@ -82,7 +84,8 @@ float CderScorer::calculateScore(const vector<int>& comps) const
}
void CderScorer::computeCD(const sent_t& cand, const sent_t& ref,
vector<int>& stats) const {
vector<int>& stats) const
{
int I = cand.size() + 1; // Number of inter-words positions in candidate sentence
int L = ref.size() + 1; // Number of inter-words positions in reference sentence
@ -95,11 +98,9 @@ void CderScorer::computeCD(const sent_t& cand, const sent_t& ref,
for (int i = 1; i < I; ++i) (*row)[i] = 1;
// Calculating costs for next row using costs from the previous row.
while (++l < L)
{
while (++l < L) {
vector<int>* nextRow = new vector<int>(I);
for (int i = 0; i < I; ++i)
{
for (int i = 0; i < I; ++i) {
vector<int> possibleCosts;
if (i > 0) {
possibleCosts.push_back((*nextRow)[i-1] + 1); // Deletion

View File

@ -13,7 +13,8 @@ namespace MosesTuning
/**
* CderScorer class can compute both CDER and WER metric.
*/
class CderScorer: public StatisticsBasedScorer {
class CderScorer: public StatisticsBasedScorer
{
public:
explicit CderScorer(const std::string& config, bool allowed_long_jumps = true);
~CderScorer();
@ -24,7 +25,9 @@ class CderScorer: public StatisticsBasedScorer {
virtual void prepareStatsVector(std::size_t sid, const std::string& text, std::vector<int>& stats);
virtual std::size_t NumberOfScores() const { return 2; }
virtual std::size_t NumberOfScores() const {
return 2;
}
virtual float calculateScore(const std::vector<int>& comps) const;

View File

@ -48,7 +48,8 @@ Data::Data(Scorer* scorer, const string& sparse_weights_file)
//ADDED BY TS
// TODO: This is too long; consider creating additional functions to
// reduce the lines of this function.
void Data::removeDuplicates() {
void Data::removeDuplicates()
{
size_t nSentences = m_feature_data->size();
assert(m_score_data->size() == nSentences);
@ -128,7 +129,8 @@ void Data::removeDuplicates() {
}
//END_ADDED
void Data::load(const std::string &featfile, const std::string &scorefile) {
void Data::load(const std::string &featfile, const std::string &scorefile)
{
m_feature_data->load(featfile, m_sparse_weights);
m_score_data->load(scorefile);
}
@ -192,7 +194,8 @@ void Data::loadNBest(const string &file)
}
}
void Data::save(const std::string &featfile, const std::string &scorefile, bool bin) {
void Data::save(const std::string &featfile, const std::string &scorefile, bool bin)
{
if (bin)
cerr << "Binary write mode is selected" << endl;
else
@ -202,7 +205,8 @@ void Data::save(const std::string &featfile, const std::string &scorefile, bool
m_score_data->save(scorefile, bin);
}
void Data::InitFeatureMap(const string& str) {
void Data::InitFeatureMap(const string& str)
{
string buf = str;
string substr;
string features = "";
@ -231,7 +235,8 @@ void Data::InitFeatureMap(const string& str) {
}
void Data::AddFeatures(const string& str,
int sentence_index) {
int sentence_index)
{
string buf = str;
string substr;
FeatureStats feature_entry;

View File

@ -44,18 +44,28 @@ public:
m_feature_data->clear();
}
ScoreDataHandle getScoreData() { return m_score_data; }
ScoreDataHandle getScoreData() {
return m_score_data;
}
FeatureDataHandle getFeatureData() { return m_feature_data; }
FeatureDataHandle getFeatureData() {
return m_feature_data;
}
Scorer* getScorer() { return m_scorer; }
Scorer* getScorer() {
return m_scorer;
}
std::size_t NumberOfFeatures() const {
return m_feature_data->NumberOfFeatures();
}
std::string Features() const { return m_feature_data->Features(); }
void Features(const std::string &f) { m_feature_data->Features(f); }
std::string Features() const {
return m_feature_data->Features();
}
void Features(const std::string &f) {
m_feature_data->Features(f);
}
void loadNBest(const std::string &file);

View File

@ -10,7 +10,8 @@
using namespace MosesTuning;
//very basic test of sharding
BOOST_AUTO_TEST_CASE(shard_basic) {
BOOST_AUTO_TEST_CASE(shard_basic)
{
boost::scoped_ptr<Scorer> scorer(ScorerFactory::getScorer("BLEU", ""));
Data data(scorer.get());
FeatureArray fa1, fa2, fa3, fa4;
@ -39,7 +40,8 @@ BOOST_AUTO_TEST_CASE(shard_basic) {
BOOST_CHECK_EQUAL(shards[1].getFeatureData()->size(),(std::size_t)2);
}
BOOST_AUTO_TEST_CASE(init_feature_map_test) {
BOOST_AUTO_TEST_CASE(init_feature_map_test)
{
boost::scoped_ptr<Scorer> scorer(ScorerFactory::getScorer("BLEU", ""));
Data data(scorer.get());
@ -49,7 +51,8 @@ BOOST_AUTO_TEST_CASE(init_feature_map_test) {
BOOST_CHECK_EQUAL(expected, data.Features());
}
BOOST_AUTO_TEST_CASE(add_features_test) {
BOOST_AUTO_TEST_CASE(add_features_test)
{
boost::scoped_ptr<Scorer> scorer(ScorerFactory::getScorer("BLEU", ""));
Data data(scorer.get());

View File

@ -24,16 +24,16 @@ protected:
{ }
_fdstream(int file_descriptor, std::ios_base::openmode openmode) :
_file_descriptor(file_descriptor), _openmode(openmode)
{
_file_descriptor(file_descriptor), _openmode(openmode) {
_filebuf = NULL;
open(file_descriptor, openmode);
}
std::ios_base::openmode openmode() const { return _openmode; }
std::ios_base::openmode openmode() const {
return _openmode;
}
void open(int file_descriptor, std::ios_base::openmode openmode)
{
void open(int file_descriptor, std::ios_base::openmode openmode) {
if (!_filebuf)
// We create a C++ stream from a file descriptor
// stdio_filebuf is not synced with stdio.
@ -44,8 +44,7 @@ protected:
openmode);
}
virtual ~_fdstream()
{
virtual ~_fdstream() {
close(_file_descriptor);
delete _filebuf;
_filebuf = NULL;
@ -64,42 +63,35 @@ public:
{ }
ifdstream(int file_descriptor) :
_fdstream(file_descriptor, std::ios_base::in)
{
_fdstream(file_descriptor, std::ios_base::in) {
_stream = new std::istream(_filebuf);
}
void open(int file_descriptor)
{
if (!_stream)
{
void open(int file_descriptor) {
if (!_stream) {
_fdstream::open(file_descriptor, std::ios_base::in);
_stream = new std::istream(_filebuf);
}
}
ifdstream& operator>> (std::string& str)
{
ifdstream& operator>> (std::string& str) {
(*_stream) >> str;
return *this;
}
std::size_t getline(std::string& str)
{
std::size_t getline(std::string& str) {
char tmp[BUFFER_SIZE];
std::size_t ret = getline(tmp, BUFFER_SIZE);
str = tmp;
return ret;
}
std::size_t getline(char* s, std::streamsize n)
{
std::size_t getline(char* s, std::streamsize n) {
return (getline(s, n, '\n'));
}
std::size_t getline(char* s, std::streamsize n, char delim)
{
std::size_t getline(char* s, std::streamsize n, char delim) {
int i = 0;
do {
s[i] = _stream->get();
@ -111,8 +103,7 @@ public:
return i-1;
}
~ifdstream()
{
~ifdstream() {
//this->~_fdstream();
delete _stream;
}
@ -129,23 +120,19 @@ public:
{ }
ofdstream(int file_descriptor) :
_fdstream(file_descriptor, std::ios_base::out)
{
_fdstream(file_descriptor, std::ios_base::out) {
_stream = new std::ostream(_filebuf);
}
void open(int file_descriptor)
{
if (!_stream)
{
void open(int file_descriptor) {
if (!_stream) {
_fdstream::open(file_descriptor, std::ios_base::out);
_stream = new std::ostream(_filebuf);
}
}
ofdstream& operator<< (const std::string& str)
{
ofdstream& operator<< (const std::string& str) {
if (_stream->good())
(*_stream) << str;
@ -153,8 +140,7 @@ public:
return *this;
}
~ofdstream()
{
~ofdstream() {
//this->~_fdstream();
delete _stream;
}

View File

@ -36,16 +36,28 @@ public:
FeatureArray();
~FeatureArray();
void clear() { m_array.clear(); }
void clear() {
m_array.clear();
}
int getIndex() const { return m_index; }
void setIndex(const int value) { m_index = value; }
int getIndex() const {
return m_index;
}
void setIndex(const int value) {
m_index = value;
}
FeatureStats& get(std::size_t i) { return m_array.at(i); }
const FeatureStats& get(std::size_t i) const { return m_array.at(i); }
FeatureStats& get(std::size_t i) {
return m_array.at(i);
}
const FeatureStats& get(std::size_t i) const {
return m_array.at(i);
}
void add(FeatureStats& e) { m_array.push_back(e); }
void add(FeatureStats& e) {
m_array.push_back(e);
}
//ADDED BY TS
void swap(std::size_t i, std::size_t j) {
@ -59,13 +71,23 @@ public:
void merge(FeatureArray& e);
std::size_t size() const { return m_array.size(); }
std::size_t size() const {
return m_array.size();
}
std::size_t NumberOfFeatures() const { return m_num_features; }
void NumberOfFeatures(std::size_t v) { m_num_features = v; }
std::size_t NumberOfFeatures() const {
return m_num_features;
}
void NumberOfFeatures(std::size_t v) {
m_num_features = v;
}
std::string Features() const { return m_features; }
void Features(const std::string& f) { m_features = f; }
std::string Features() const {
return m_features;
}
void Features(const std::string& f) {
m_features = f;
}
void savetxt(std::ostream* os);
void savebin(std::ostream* os);

View File

@ -38,7 +38,8 @@ void FeatureData::save(const string &file, bool bin)
ofs.close();
}
void FeatureData::save(bool bin) {
void FeatureData::save(bool bin)
{
save(&cout, bin);
}
@ -145,7 +146,8 @@ void FeatureData::setFeatureMap(const string& feat)
}
}
string FeatureData::ToString() const {
string FeatureData::ToString() const
{
string res;
{

View File

@ -33,7 +33,9 @@ public:
FeatureData();
~FeatureData() {}
void clear() { m_array.clear(); }
void clear() {
m_array.clear();
}
FeatureArray& get(size_t idx) {
return m_array.at(idx);
@ -61,13 +63,23 @@ public:
void add(FeatureArray& e);
void add(FeatureStats& e, int sent_idx);
std::size_t size() const { return m_array.size(); }
std::size_t size() const {
return m_array.size();
}
std::size_t NumberOfFeatures() const { return m_num_features; }
void NumberOfFeatures(std::size_t v) { m_num_features = v; }
std::size_t NumberOfFeatures() const {
return m_num_features;
}
void NumberOfFeatures(std::size_t v) {
m_num_features = v;
}
std::string Features() const { return m_features; }
void Features(const std::string& f) { m_features = f; }
std::string Features() const {
return m_features;
}
void Features(const std::string& f) {
m_features = f;
}
void save(const std::string &file, bool bin=false);
void save(std::ostream* os, bool bin=false);

View File

@ -34,7 +34,8 @@ namespace MosesTuning
{
int ParseInt(const StringPiece& str ) {
int ParseInt(const StringPiece& str )
{
char* errIndex;
//could wrap?
int value = static_cast<int>(strtol(str.data(), &errIndex,10));
@ -44,7 +45,8 @@ int ParseInt(const StringPiece& str ) {
return value;
}
float ParseFloat(const StringPiece& str) {
float ParseFloat(const StringPiece& str)
{
char* errIndex;
float value = static_cast<float>(strtod(str.data(), &errIndex));
if (errIndex == str.data()) {
@ -53,11 +55,13 @@ float ParseFloat(const StringPiece& str) {
return value;
}
bool operator==(FeatureDataItem const& item1, FeatureDataItem const& item2) {
bool operator==(FeatureDataItem const& item1, FeatureDataItem const& item2)
{
return item1.dense==item1.dense && item1.sparse==item1.sparse;
}
size_t hash_value(FeatureDataItem const& item) {
size_t hash_value(FeatureDataItem const& item)
{
size_t seed = 0;
boost::hash_combine(seed,item.dense);
boost::hash_combine(seed,item.sparse);
@ -67,14 +71,16 @@ size_t hash_value(FeatureDataItem const& item) {
FeatureDataIterator::FeatureDataIterator() {}
FeatureDataIterator::FeatureDataIterator(const string& filename) {
FeatureDataIterator::FeatureDataIterator(const string& filename)
{
m_in.reset(new FilePiece(filename.c_str()));
readNext();
}
FeatureDataIterator::~FeatureDataIterator() {}
void FeatureDataIterator::readNext() {
void FeatureDataIterator::readNext()
{
m_next.clear();
try {
StringPiece marker = m_in->ReadDelimited();
@ -117,11 +123,13 @@ void FeatureDataIterator::readNext() {
}
}
void FeatureDataIterator::increment() {
void FeatureDataIterator::increment()
{
readNext();
}
bool FeatureDataIterator::equal(const FeatureDataIterator& rhs) const {
bool FeatureDataIterator::equal(const FeatureDataIterator& rhs) const
{
if (!m_in && !rhs.m_in) {
return true;
} else if (!m_in) {
@ -134,7 +142,8 @@ bool FeatureDataIterator::equal(const FeatureDataIterator& rhs) const {
}
}
const vector<FeatureDataItem>& FeatureDataIterator::dereference() const {
const vector<FeatureDataItem>& FeatureDataIterator::dereference() const
{
return m_next;
}

View File

@ -37,7 +37,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "FeatureStats.h"
namespace util { class FilePiece; }
namespace util
{
class FilePiece;
}
namespace MosesTuning
{

View File

@ -7,10 +7,12 @@
using namespace MosesTuning;
namespace {
namespace
{
void CheckFeatureMap(const FeatureData* feature_data,
const char* str, int num_feature, int* cnt) {
const char* str, int num_feature, int* cnt)
{
for (int i = 0; i < num_feature; ++i) {
std::stringstream ss;
ss << str << "_" << i;
@ -23,7 +25,8 @@ void CheckFeatureMap(const FeatureData* feature_data,
} // namespace
BOOST_AUTO_TEST_CASE(set_feature_map) {
BOOST_AUTO_TEST_CASE(set_feature_map)
{
std::string str("d_0 d_1 d_2 d_3 d_4 d_5 d_6 lm_0 lm_1 tm_0 tm_1 tm_2 tm_3 tm_4 w_0 ");
FeatureData feature_data;

View File

@ -18,7 +18,8 @@
using namespace std;
namespace {
namespace
{
const int kAvailableSize = 8;
} // namespace
@ -29,20 +30,23 @@ namespace MosesTuning
SparseVector::name2id_t SparseVector::m_name_to_id;
SparseVector::id2name_t SparseVector::m_id_to_name;
FeatureStatsType SparseVector::get(const string& name) const {
FeatureStatsType SparseVector::get(const string& name) const
{
name2id_t::const_iterator name2id_iter = m_name_to_id.find(name);
if (name2id_iter == m_name_to_id.end()) return 0;
size_t id = name2id_iter->second;
return get(id);
}
FeatureStatsType SparseVector::get(size_t id) const {
FeatureStatsType SparseVector::get(size_t id) const
{
fvector_t::const_iterator fvector_iter = m_fvector.find(id);
if (fvector_iter == m_fvector.end()) return 0;
return fvector_iter->second;
}
void SparseVector::set(const string& name, FeatureStatsType value) {
void SparseVector::set(const string& name, FeatureStatsType value)
{
name2id_t::const_iterator name2id_iter = m_name_to_id.find(name);
size_t id = 0;
if (name2id_iter == m_name_to_id.end()) {
@ -55,7 +59,8 @@ void SparseVector::set(const string& name, FeatureStatsType value) {
m_fvector[id] = value;
}
void SparseVector::write(ostream& out, const string& sep) const {
void SparseVector::write(ostream& out, const string& sep) const
{
for (fvector_t::const_iterator i = m_fvector.begin(); i != m_fvector.end(); ++i) {
if (abs(i->second) < 0.00001) continue;
string name = m_id_to_name[i->first];
@ -63,11 +68,13 @@ void SparseVector::write(ostream& out, const string& sep) const {
}
}
void SparseVector::clear() {
void SparseVector::clear()
{
m_fvector.clear();
}
void SparseVector::load(const string& file) {
void SparseVector::load(const string& file)
{
ifstream in(file.c_str());
if (!in) {
throw runtime_error("Failed to open sparse weights file: " + file);
@ -84,7 +91,8 @@ void SparseVector::load(const string& file) {
}
}
SparseVector& SparseVector::operator-=(const SparseVector& rhs) {
SparseVector& SparseVector::operator-=(const SparseVector& rhs)
{
for (fvector_t::const_iterator i = rhs.m_fvector.begin();
i != rhs.m_fvector.end(); ++i) {
@ -93,7 +101,8 @@ SparseVector& SparseVector::operator-=(const SparseVector& rhs) {
return *this;
}
FeatureStatsType SparseVector::inner_product(const SparseVector& rhs) const {
FeatureStatsType SparseVector::inner_product(const SparseVector& rhs) const
{
FeatureStatsType product = 0.0;
for (fvector_t::const_iterator i = m_fvector.begin();
i != m_fvector.end(); ++i) {
@ -102,13 +111,15 @@ FeatureStatsType SparseVector::inner_product(const SparseVector& rhs) const {
return product;
}
SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs) {
SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs)
{
SparseVector res(lhs);
res -= rhs;
return res;
}
FeatureStatsType inner_product(const SparseVector& lhs, const SparseVector& rhs) {
FeatureStatsType inner_product(const SparseVector& lhs, const SparseVector& rhs)
{
if (lhs.size() >= rhs.size()) {
return rhs.inner_product(lhs);
} else {
@ -116,7 +127,8 @@ FeatureStatsType inner_product(const SparseVector& lhs, const SparseVector& rhs)
}
}
std::vector<std::size_t> SparseVector::feats() const {
std::vector<std::size_t> SparseVector::feats() const
{
std::vector<std::size_t> toRet;
for(fvector_t::const_iterator iter = m_fvector.begin();
iter!=m_fvector.end();
@ -126,7 +138,8 @@ std::vector<std::size_t> SparseVector::feats() const {
return toRet;
}
std::size_t SparseVector::encode(const std::string& name) {
std::size_t SparseVector::encode(const std::string& name)
{
name2id_t::const_iterator name2id_iter = m_name_to_id.find(name);
size_t id = 0;
if (name2id_iter == m_name_to_id.end()) {
@ -139,15 +152,18 @@ std::size_t SparseVector::encode(const std::string& name) {
return id;
}
std::string SparseVector::decode(std::size_t id) {
std::string SparseVector::decode(std::size_t id)
{
return m_id_to_name[id];
}
bool operator==(SparseVector const& item1, SparseVector const& item2) {
bool operator==(SparseVector const& item1, SparseVector const& item2)
{
return item1.m_fvector==item2.m_fvector;
}
std::size_t hash_value(SparseVector const& item) {
std::size_t hash_value(SparseVector const& item)
{
boost::hash<SparseVector::fvector_t> hasher;
return hasher(item.m_fvector);
}
@ -276,7 +292,8 @@ void FeatureStats::savetxt(ostream* os)
*os << *this;
}
void FeatureStats::savetxt() {
void FeatureStats::savetxt()
{
savetxt(&cout);
}
@ -298,7 +315,8 @@ ostream& operator<<(ostream& o, const FeatureStats& e)
return o;
}
bool operator==(const FeatureStats& f1, const FeatureStats& f2) {
bool operator==(const FeatureStats& f1, const FeatureStats& f2)
{
size_t size = f1.size();
if (size != f2.size())

View File

@ -21,7 +21,8 @@ namespace MosesTuning
// Minimal sparse vector
class SparseVector {
class SparseVector
{
public:
typedef std::map<std::size_t,FeatureStatsType> fvector_t;
typedef std::map<std::string, std::size_t> name2id_t;
@ -32,7 +33,9 @@ public:
void set(const std::string& name, FeatureStatsType value);
void clear();
void load(const std::string& file);
std::size_t size() const { return m_fvector.size(); }
std::size_t size() const {
return m_fvector.size();
}
void write(std::ostream& out, const std::string& sep = " ") const;
@ -78,7 +81,9 @@ public:
void Copy(const FeatureStats &stats);
bool isfull() const { return (m_entries < m_available_size) ? 0 : 1; }
bool isfull() const {
return (m_entries < m_available_size) ? 0 : 1;
}
void expand();
void add(FeatureStatsType v);
void addSparse(const std::string& name, FeatureStatsType v);
@ -93,23 +98,37 @@ public:
clear();
}
FeatureStatsType get(std::size_t i) { return m_array[i]; }
FeatureStatsType get(std::size_t i)const { return m_array[i]; }
featstats_t getArray() const { return m_array; }
FeatureStatsType get(std::size_t i) {
return m_array[i];
}
FeatureStatsType get(std::size_t i)const {
return m_array[i];
}
featstats_t getArray() const {
return m_array;
}
const SparseVector& getSparse() const { return m_map; }
const SparseVector& getSparse() const {
return m_map;
}
void set(std::string &theString, const SparseVector& sparseWeights);
inline std::size_t bytes() const { return GetArraySizeWithBytes(); }
inline std::size_t bytes() const {
return GetArraySizeWithBytes();
}
std::size_t GetArraySizeWithBytes() const {
return m_entries * sizeof(FeatureStatsType);
}
std::size_t size() const { return m_entries; }
std::size_t size() const {
return m_entries;
}
std::size_t available() const { return m_available_size; }
std::size_t available() const {
return m_available_size;
}
void savetxt(const std::string &file);
void savetxt(std::ostream* os);

View File

@ -5,8 +5,10 @@
using namespace std;
namespace {
bool IsGzipFile(const std::string &filename) {
namespace
{
bool IsGzipFile(const std::string &filename)
{
return filename.size() > 3 &&
filename.substr(filename.size() - 3, 3) == ".gz";
}

View File

@ -16,7 +16,9 @@ public:
explicit inputfilestream(const std::string &filePath);
virtual ~inputfilestream();
bool good() const { return m_is_good; }
bool good() const {
return m_is_good;
}
void close();
};
@ -30,7 +32,9 @@ public:
explicit outputfilestream(const std::string &filePath);
virtual ~outputfilestream();
bool good() const { return m_is_good; }
bool good() const {
return m_is_good;
}
void close();
};

View File

@ -5,7 +5,8 @@
#include <cstdio>
#include <iostream>
GzFileBuf::GzFileBuf(const char* filename) {
GzFileBuf::GzFileBuf(const char* filename)
{
m_gz_file = gzopen(filename, "rb");
if (m_gz_file == NULL) {
std::cerr << "ERROR: Failed to open " << filename << std::endl;
@ -16,16 +17,19 @@ GzFileBuf::GzFileBuf(const char* filename) {
m_buf + sizeof(int)); // end position
}
GzFileBuf::~GzFileBuf() {
GzFileBuf::~GzFileBuf()
{
gzclose(m_gz_file);
}
int GzFileBuf::overflow(int_type c) {
int GzFileBuf::overflow(int_type c)
{
throw;
}
// read one character
int GzFileBuf::underflow() {
int GzFileBuf::underflow()
{
// is read position before end of m_buf?
if (gptr() < egptr()) {
return traits_type::to_int_type(*gptr());
@ -65,16 +69,19 @@ int GzFileBuf::underflow() {
std::streampos GzFileBuf::seekpos(
std::streampos sp,
std::ios_base::openmode which) {
std::ios_base::openmode which)
{
throw;
}
std::streamsize GzFileBuf::xsgetn(char* s,
std::streamsize num) {
std::streamsize num)
{
return static_cast<std::streamsize>(gzread(m_gz_file,s,num));
}
std::streamsize GzFileBuf::xsputn(const char* s,
std::streamsize num) {
std::streamsize num)
{
throw;
}

View File

@ -34,7 +34,8 @@ StreamingHypPackEnumerator::StreamingHypPackEnumerator
m_iNumDense = -1;
}
size_t StreamingHypPackEnumerator::num_dense() const {
size_t StreamingHypPackEnumerator::num_dense() const
{
if(m_iNumDense<0) {
cerr << "Error: Requested num_dense() for an unprimed StreamingHypPackEnumerator" << endl;
exit(1);
@ -42,7 +43,8 @@ size_t StreamingHypPackEnumerator::num_dense() const {
return (size_t) m_iNumDense;
}
void StreamingHypPackEnumerator::prime(){
void StreamingHypPackEnumerator::prime()
{
m_current_indexes.clear();
m_current_featureVectors.clear();
boost::unordered_set<FeatureDataItem> seen;
@ -84,7 +86,8 @@ void StreamingHypPackEnumerator::prime(){
}
}
void StreamingHypPackEnumerator::reset(){
void StreamingHypPackEnumerator::reset()
{
m_featureDataIters.clear();
m_scoreDataIters.clear();
for (size_t i = 0; i < m_num_lists; ++i) {
@ -95,11 +98,13 @@ void StreamingHypPackEnumerator::reset(){
prime();
}
bool StreamingHypPackEnumerator::finished(){
bool StreamingHypPackEnumerator::finished()
{
return m_featureDataIters[0]==FeatureDataIterator::end();
}
void StreamingHypPackEnumerator::next(){
void StreamingHypPackEnumerator::next()
{
if(!m_primed) {
cerr << "Enumerating an unprimed HypPackEnumerator" << endl;
exit(1);
@ -113,7 +118,8 @@ void StreamingHypPackEnumerator::next(){
if(!finished()) prime();
}
size_t StreamingHypPackEnumerator::cur_size(){
size_t StreamingHypPackEnumerator::cur_size()
{
if(!m_primed) {
cerr << "Querying size from an unprimed HypPackEnumerator" << endl;
exit(1);
@ -121,7 +127,8 @@ size_t StreamingHypPackEnumerator::cur_size(){
return m_current_indexes.size();
}
const MiraFeatureVector& StreamingHypPackEnumerator::featuresAt(size_t index){
const MiraFeatureVector& StreamingHypPackEnumerator::featuresAt(size_t index)
{
if(!m_primed) {
cerr << "Querying features from an unprimed HypPackEnumerator" << endl;
exit(1);
@ -129,7 +136,8 @@ const MiraFeatureVector& StreamingHypPackEnumerator::featuresAt(size_t index){
return m_current_featureVectors[index];
}
const ScoreDataItem& StreamingHypPackEnumerator::scoresAt(size_t index) {
const ScoreDataItem& StreamingHypPackEnumerator::scoresAt(size_t index)
{
if(!m_primed) {
cerr << "Querying scores from an unprimed HypPackEnumerator" << endl;
exit(1);
@ -138,7 +146,8 @@ const ScoreDataItem& StreamingHypPackEnumerator::scoresAt(size_t index) {
return m_scoreDataIters[pij.first]->operator[](pij.second);
}
size_t StreamingHypPackEnumerator::cur_id() {
size_t StreamingHypPackEnumerator::cur_id()
{
return m_sentenceId;
}
@ -165,33 +174,41 @@ RandomAccessHypPackEnumerator::RandomAccessHypPackEnumerator(vector<string> cons
m_num_dense = train.num_dense();
}
size_t RandomAccessHypPackEnumerator::num_dense() const {
size_t RandomAccessHypPackEnumerator::num_dense() const
{
return m_num_dense;
}
void RandomAccessHypPackEnumerator::reset() {
void RandomAccessHypPackEnumerator::reset()
{
m_cur_index = 0;
if(!m_no_shuffle) random_shuffle(m_indexes.begin(),m_indexes.end());
}
bool RandomAccessHypPackEnumerator::finished() {
bool RandomAccessHypPackEnumerator::finished()
{
return m_cur_index >= m_indexes.size();
}
void RandomAccessHypPackEnumerator::next() {
void RandomAccessHypPackEnumerator::next()
{
m_cur_index++;
}
size_t RandomAccessHypPackEnumerator::cur_size() {
size_t RandomAccessHypPackEnumerator::cur_size()
{
assert(m_features[m_indexes[m_cur_index]].size()==m_scores[m_indexes[m_cur_index]].size());
return m_features[m_indexes[m_cur_index]].size();
}
const MiraFeatureVector& RandomAccessHypPackEnumerator::featuresAt(size_t i) {
const MiraFeatureVector& RandomAccessHypPackEnumerator::featuresAt(size_t i)
{
return m_features[m_indexes[m_cur_index]][i];
}
const ScoreDataItem& RandomAccessHypPackEnumerator::scoresAt(size_t i) {
const ScoreDataItem& RandomAccessHypPackEnumerator::scoresAt(size_t i)
{
return m_scores[m_indexes[m_cur_index]][i];
}
size_t RandomAccessHypPackEnumerator::cur_id() {
size_t RandomAccessHypPackEnumerator::cur_id()
{
return m_indexes[m_cur_index];
}
// --Emacs trickery--

View File

@ -24,7 +24,8 @@ namespace MosesTuning
// Start with these abstract classes
class HypPackEnumerator {
class HypPackEnumerator
{
public:
virtual ~HypPackEnumerator() {}
@ -41,7 +42,8 @@ public:
// Instantiation that streams from disk
// Low-memory, low-speed, sequential access
class StreamingHypPackEnumerator : public HypPackEnumerator {
class StreamingHypPackEnumerator : public HypPackEnumerator
{
public:
StreamingHypPackEnumerator(std::vector<std::string> const& featureFiles,
std::vector<std::string> const& scoreFiles);
@ -75,7 +77,8 @@ private:
// Instantiation that reads into memory
// High-memory, high-speed, random access
// (Actually randomizes with each call to reset)
class RandomAccessHypPackEnumerator : public HypPackEnumerator {
class RandomAccessHypPackEnumerator : public HypPackEnumerator
{
public:
RandomAccessHypPackEnumerator(std::vector<std::string> const& featureFiles,
std::vector<std::string> const& scoreFiles,

View File

@ -66,7 +66,8 @@ InterpolatedScorer::InterpolatedScorer(const string& name, const string& config)
cerr <<endl;
}
bool InterpolatedScorer::useAlignment() const {
bool InterpolatedScorer::useAlignment() const
{
//cout << "InterpolatedScorer::useAlignment" << endl;
for (vector<Scorer*>::const_iterator itsc = m_scorers.begin(); itsc < m_scorers.end(); itsc++) {
if ((*itsc)->useAlignment()) {
@ -176,8 +177,7 @@ void InterpolatedScorer::prepareStats(size_t sid, const string& text, ScoreStats
ScoreStats tempEntry;
if ((*itsc)->useAlignment()) {
(*itsc)->prepareStats(sid, text, tempEntry);
}
else {
} else {
(*itsc)->prepareStats(sid, sentence, tempEntry);
}
if (i > 0) buff << " ";

View File

@ -17,8 +17,7 @@ MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
size_t lastFeat = 0;
m_sparseFeats.reserve(sparseFeats.size());
m_sparseVals.reserve(sparseFeats.size());
for(size_t i=0;i<sparseFeats.size();i++)
{
for(size_t i=0; i<sparseFeats.size(); i++) {
size_t feat = m_dense.size() + sparseFeats[i];
m_sparseFeats.push_back(feat);
m_sparseVals.push_back(vec.sparse.get(sparseFeats[i]));
@ -26,8 +25,7 @@ MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
// Check ordered property
if(bFirst) {
bFirst = false;
}
else {
} else {
if(lastFeat>=feat) {
cerr << "Error: Feature indeces must be strictly ascending coming out of SparseVector" << endl;
exit(1);
@ -61,25 +59,29 @@ MiraFeatureVector::MiraFeatureVector(const vector<ValType>& dense,
}
}
ValType MiraFeatureVector::val(size_t index) const {
ValType MiraFeatureVector::val(size_t index) const
{
if(index < m_dense.size())
return m_dense[index];
else
return m_sparseVals[index-m_dense.size()];
}
size_t MiraFeatureVector::feat(size_t index) const {
size_t MiraFeatureVector::feat(size_t index) const
{
if(index < m_dense.size())
return index;
else
return m_sparseFeats[index-m_dense.size()];
}
size_t MiraFeatureVector::size() const {
size_t MiraFeatureVector::size() const
{
return m_dense.size() + m_sparseVals.size();
}
ValType MiraFeatureVector::sqrNorm() const {
ValType MiraFeatureVector::sqrNorm() const
{
ValType toRet = 0.0;
for(size_t i=0; i<m_dense.size(); i++)
toRet += m_dense[i]*m_dense[i];

View File

@ -23,7 +23,8 @@ namespace MosesTuning
typedef FeatureStatsType ValType;
class MiraFeatureVector {
class MiraFeatureVector
{
public:
MiraFeatureVector(const FeatureDataItem& vec);
MiraFeatureVector(const MiraFeatureVector& other);

View File

@ -36,7 +36,8 @@ MiraWeightVector::MiraWeightVector(const vector<ValType>& init)
* \param fv Feature vector to be added to the weights
* \param tau FV will be scaled by this value before update
*/
void MiraWeightVector::update(const MiraFeatureVector& fv, float tau) {
void MiraWeightVector::update(const MiraFeatureVector& fv, float tau)
{
m_numUpdates++;
for(size_t i=0; i<fv.size(); i++) {
update(fv.feat(i), fv.val(i)*tau);
@ -46,7 +47,8 @@ void MiraWeightVector::update(const MiraFeatureVector& fv, float tau) {
/**
* Perform an empty update (affects averaging)
*/
void MiraWeightVector::tick() {
void MiraWeightVector::tick()
{
m_numUpdates++;
}
@ -54,7 +56,8 @@ void MiraWeightVector::tick() {
* Score a feature vector according to the model
* \param fv Feature vector to be scored
*/
ValType MiraWeightVector::score(const MiraFeatureVector& fv) const {
ValType MiraWeightVector::score(const MiraFeatureVector& fv) const
{
ValType toRet = 0.0;
for(size_t i=0; i<fv.size(); i++) {
toRet += weight(fv.feat(i)) * fv.val(i);
@ -65,7 +68,8 @@ ValType MiraWeightVector::score(const MiraFeatureVector& fv) const {
/**
* Return an averaged view of this weight vector
*/
AvgWeightVector MiraWeightVector::avg() {
AvgWeightVector MiraWeightVector::avg()
{
this->fixTotals();
return AvgWeightVector(*this);
}
@ -73,7 +77,8 @@ AvgWeightVector MiraWeightVector::avg() {
/**
* Updates a weight and lazily updates its total
*/
void MiraWeightVector::update(size_t index, ValType delta) {
void MiraWeightVector::update(size_t index, ValType delta)
{
// Handle previously unseen weights
while(index>=m_weights.size()) {
@ -91,23 +96,25 @@ void MiraWeightVector::update(size_t index, ValType delta) {
/**
* Make sure everyone's total is up-to-date
*/
void MiraWeightVector::fixTotals() {
void MiraWeightVector::fixTotals()
{
for(size_t i=0; i<m_weights.size(); i++) update(i,0);
}
/**
* Helper to handle out of range weights
*/
ValType MiraWeightVector::weight(size_t index) const {
ValType MiraWeightVector::weight(size_t index) const
{
if(index < m_weights.size()) {
return m_weights[index];
}
else {
} else {
return 0;
}
}
ValType MiraWeightVector::sqrNorm() const {
ValType MiraWeightVector::sqrNorm() const
{
ValType toRet = 0;
for(size_t i=0; i<m_weights.size(); i++) {
toRet += weight(i) * weight(i);
@ -136,14 +143,14 @@ ValType AvgWeightVector::weight(size_t index) const
else {
if(index < m_wv.m_totals.size()) {
return m_wv.m_totals[index] / m_wv.m_numUpdates;
}
else {
} else {
return 0;
}
}
}
ValType AvgWeightVector::score(const MiraFeatureVector& fv) const {
ValType AvgWeightVector::score(const MiraFeatureVector& fv) const
{
ValType toRet = 0.0;
for(size_t i=0; i<fv.size(); i++) {
toRet += weight(fv.feat(i)) * fv.val(i);
@ -151,7 +158,8 @@ ValType AvgWeightVector::score(const MiraFeatureVector& fv) const {
return toRet;
}
size_t AvgWeightVector::size() const {
size_t AvgWeightVector::size() const
{
return m_wv.m_weights.size();
}

View File

@ -21,7 +21,8 @@ namespace MosesTuning
class AvgWeightVector;
class MiraWeightVector {
class MiraWeightVector
{
public:
/**
* Constructor, initializes to the zero vector
@ -91,7 +92,8 @@ private:
/**
* Averaged view of a weight vector
*/
class AvgWeightVector {
class AvgWeightVector
{
public:
AvgWeightVector(const MiraWeightVector& wv);
ValType score(const MiraFeatureVector& fv) const;

View File

@ -13,7 +13,8 @@ namespace MosesTuning
* typical accessors and mutaors, but we intentionally does not allow
* erasing elements.
*/
class NgramCounts {
class NgramCounts
{
public:
// Used to construct the ngram map
struct NgramComparator {
@ -45,7 +46,9 @@ class NgramCounts {
/**
* If the specified "ngram" is found, we add counts.
* If not, we insert the default count in the container. */
inline void Add(const Key& ngram) { m_counts[ngram]++; }
inline void Add(const Key& ngram) {
m_counts[ngram]++;
}
/**
* Return true iff the specified "ngram" is found in the container.
@ -60,32 +63,56 @@ class NgramCounts {
/**
* Clear all elments in the container.
*/
void clear() { m_counts.clear(); }
void clear() {
m_counts.clear();
}
/**
* Return true iff the container is empty.
*/
bool empty() const { return m_counts.empty(); }
bool empty() const {
return m_counts.empty();
}
/**
* Return the the number of elements in the container.
*/
std::size_t size() const { return m_counts.size(); }
std::size_t size() const {
return m_counts.size();
}
std::size_t max_size() const { return m_counts.max_size(); }
std::size_t max_size() const {
return m_counts.max_size();
}
// Note: This is mainly used by unit tests.
int get_default_count() const { return kDefaultCount; }
int get_default_count() const {
return kDefaultCount;
}
iterator find(const Key& ngram) { return m_counts.find(ngram); }
const_iterator find(const Key& ngram) const { return m_counts.find(ngram); }
iterator find(const Key& ngram) {
return m_counts.find(ngram);
}
const_iterator find(const Key& ngram) const {
return m_counts.find(ngram);
}
Value& operator[](const Key& ngram) { return m_counts[ngram]; }
Value& operator[](const Key& ngram) {
return m_counts[ngram];
}
iterator begin() { return m_counts.begin(); }
const_iterator begin() const { return m_counts.begin(); }
iterator end() { return m_counts.end(); }
const_iterator end() const { return m_counts.end(); }
iterator begin() {
return m_counts.begin();
}
const_iterator begin() const {
return m_counts.begin();
}
iterator end() {
return m_counts.end();
}
const_iterator end() const {
return m_counts.end();
}
private:
const int kDefaultCount;

View File

@ -5,7 +5,8 @@
using namespace MosesTuning;
BOOST_AUTO_TEST_CASE(ngram_basic) {
BOOST_AUTO_TEST_CASE(ngram_basic)
{
NgramCounts counts;
NgramCounts::Key key;
key.push_back(1);
@ -25,7 +26,8 @@ BOOST_AUTO_TEST_CASE(ngram_basic) {
BOOST_CHECK_EQUAL(it->second, 1);
}
BOOST_AUTO_TEST_CASE(ngram_Add) {
BOOST_AUTO_TEST_CASE(ngram_Add)
{
NgramCounts counts;
NgramCounts::Key key;
key.push_back(1);
@ -49,7 +51,8 @@ BOOST_AUTO_TEST_CASE(ngram_Add) {
BOOST_CHECK_EQUAL(counts[key3], counts.get_default_count());
}
BOOST_AUTO_TEST_CASE(ngram_lookup) {
BOOST_AUTO_TEST_CASE(ngram_lookup)
{
NgramCounts counts;
NgramCounts::Key key;
key.push_back(1);

View File

@ -17,7 +17,8 @@ using namespace std;
static const float MIN_FLOAT = -1.0 * numeric_limits<float>::max();
static const float MAX_FLOAT = numeric_limits<float>::max();
namespace {
namespace
{
/**
* Compute the intersection of 2 lines.
@ -405,8 +406,7 @@ statscore_t SimpleOptimizer::TrueRun(Point& P) const
for (unsigned int i = 0; i < Point::getdim(); i++)
direction[i]=0.0;
direction[d]=1.0;
}
else { // random direction update
} else { // random direction update
direction.Randomize();
}
statscore_t curscore = LineOptimize(P, direction, linebest);//find the minimum on the line
@ -443,8 +443,7 @@ statscore_t RandomDirectionOptimizer::TrueRun(Point& P) const
// do specified number of random direction optimizations
unsigned int nrun = 0;
unsigned int nrun_no_change = 0;
for (; nrun_no_change < m_num_random_directions; nrun++, nrun_no_change++)
{
for (; nrun_no_change < m_num_random_directions; nrun++, nrun_no_change++) {
// choose a random direction in which to optimize
Point direction;
direction.Randomize();

View File

@ -31,8 +31,12 @@ protected:
public:
Optimizer(unsigned Pd, const std::vector<unsigned>& i2O, const std::vector<bool>& positive, const std::vector<parameter_t>& start, unsigned int nrandom);
void SetScorer(Scorer *scorer) { m_scorer = scorer; }
void SetFeatureData(FeatureDataHandle feature_data) { m_feature_data = feature_data; }
void SetScorer(Scorer *scorer) {
m_scorer = scorer;
}
void SetFeatureData(FeatureDataHandle feature_data) {
m_feature_data = feature_data;
}
virtual ~Optimizer();
unsigned size() const {

View File

@ -7,21 +7,24 @@
using namespace MosesTuning;
namespace {
namespace
{
inline bool CheckBuildOptimizer(unsigned dim,
const std::vector<unsigned>& to_optimize,
const std::vector<bool>& positive,
const std::vector<parameter_t>& start,
const std::string& type,
unsigned int num_random) {
unsigned int num_random)
{
boost::scoped_ptr<Optimizer> optimizer(OptimizerFactory::BuildOptimizer(dim, to_optimize, positive, start, type, num_random));
return optimizer.get() != NULL;
}
} // namespace
BOOST_AUTO_TEST_CASE(optimizer_type) {
BOOST_AUTO_TEST_CASE(optimizer_type)
{
BOOST_CHECK_EQUAL(OptimizerFactory::GetOptimizerType("powell"),
OptimizerFactory::POWELL);
BOOST_CHECK_EQUAL(OptimizerFactory::GetOptimizerType("random"),
@ -30,7 +33,8 @@ BOOST_AUTO_TEST_CASE(optimizer_type) {
OptimizerFactory::RANDOM_DIRECTION);
}
BOOST_AUTO_TEST_CASE(optimizer_build) {
BOOST_AUTO_TEST_CASE(optimizer_build)
{
const unsigned dim = 3;
std::vector<unsigned> to_optimize;
to_optimize.push_back(1);

View File

@ -27,7 +27,9 @@ public:
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
virtual std::size_t NumberOfScores() const { return 3; }
virtual std::size_t NumberOfScores() const {
return 3;
}
virtual float calculateScore(const std::vector<int>& comps) const;
private:

View File

@ -53,11 +53,19 @@ private:
statscore_t m_score;
public:
static unsigned int getdim() { return m_dim; }
static void setdim(std::size_t d) { m_dim = d; }
static unsigned int getdim() {
return m_dim;
}
static void setdim(std::size_t d) {
m_dim = d;
}
static unsigned int getpdim() { return m_pdim; }
static void setpdim(std::size_t pd) { m_pdim = pd; }
static unsigned int getpdim() {
return m_pdim;
}
static void setpdim(std::size_t pd) {
m_pdim = pd;
}
static void set_optindices(const std::vector<unsigned int>& indices) {
m_opt_indices = indices;
@ -90,7 +98,9 @@ public:
*/
friend std::ostream& operator<<(std::ostream& o,const Point& P);
void Normalize() { NormalizeL2(); }
void Normalize() {
NormalizeL2();
}
void NormalizeL2();
void NormalizeL1();
@ -100,8 +110,12 @@ public:
*/
void GetAllWeights(std::vector<parameter_t>& w) const;
statscore_t GetScore() const { return m_score; }
void SetScore(statscore_t score) { m_score = score; }
statscore_t GetScore() const {
return m_score;
}
void SetScore(statscore_t score) {
m_score = score;
}
};
}

View File

@ -9,7 +9,8 @@
using namespace std;
using namespace MosesTuning;
BOOST_AUTO_TEST_CASE(point_operators) {
BOOST_AUTO_TEST_CASE(point_operators)
{
const unsigned int dim = 5;
vector<float> init(dim);
init[0] = 1.0f;

View File

@ -37,8 +37,7 @@ PreProcessFilter::PreProcessFilter(const string& filterCommand)
action.sa_handler = exec_failed;
sigemptyset(&action.sa_mask);
action.sa_flags = 0;
if (sigaction(SIGUSR1, &action, NULL) < 0)
{
if (sigaction(SIGUSR1, &action, NULL) < 0) {
perror("SIGUSR1 install error");
exit(EXIT_FAILURE);
}
@ -53,15 +52,13 @@ PreProcessFilter::PreProcessFilter(const string& filterCommand)
// the same pipe and they can communicate.
pipe_status = pipe(pipefds_input);
if (pipe_status == -1)
{
if (pipe_status == -1) {
perror("Error creating the pipe");
exit(EXIT_FAILURE);
}
pipe_status = pipe(pipefds_output);
if (pipe_status == -1)
{
if (pipe_status == -1) {
perror("Error creating the pipe");
exit(EXIT_FAILURE);
}
@ -79,8 +76,7 @@ PreProcessFilter::PreProcessFilter(const string& filterCommand)
// Create child process; both processes continue from here
pid = fork();
if (pid == pid_t(0))
{
if (pid == pid_t(0)) {
// Child process
// When the child process finishes sends a SIGCHLD signal
@ -106,9 +102,7 @@ PreProcessFilter::PreProcessFilter(const string& filterCommand)
// Tell the parent the exec failed
kill(getppid(), SIGUSR1);
exit(EXIT_FAILURE);
}
else if (pid > pid_t(0))
{
} else if (pid > pid_t(0)) {
// Parent
// Close in the parent the unused ends of the pipes
@ -118,9 +112,7 @@ PreProcessFilter::PreProcessFilter(const string& filterCommand)
m_toFilter = new ofdstream(CHILD_STDIN_WRITE);
m_fromFilter = new ifdstream(CHILD_STDOUT_READ);
}
else
{
} else {
perror("Error: fork failed");
exit(EXIT_FAILURE);
}

View File

@ -15,26 +15,45 @@ namespace MosesTuning
* Reference class represents reference translations for an output
* translation used in calculating BLEU score.
*/
class Reference {
class Reference
{
public:
// for m_length
typedef std::vector<std::size_t>::iterator iterator;
typedef std::vector<std::size_t>::const_iterator const_iterator;
Reference() : m_counts(new NgramCounts) { }
~Reference() { delete m_counts; }
~Reference() {
delete m_counts;
}
NgramCounts* get_counts() { return m_counts; }
const NgramCounts* get_counts() const { return m_counts; }
NgramCounts* get_counts() {
return m_counts;
}
const NgramCounts* get_counts() const {
return m_counts;
}
iterator begin() { return m_length.begin(); }
const_iterator begin() const { return m_length.begin(); }
iterator end() { return m_length.end(); }
const_iterator end() const { return m_length.end(); }
iterator begin() {
return m_length.begin();
}
const_iterator begin() const {
return m_length.begin();
}
iterator end() {
return m_length.end();
}
const_iterator end() const {
return m_length.end();
}
void push_back(std::size_t len) { m_length.push_back(len); }
void push_back(std::size_t len) {
m_length.push_back(len);
}
std::size_t num_references() const { return m_length.size(); }
std::size_t num_references() const {
return m_length.size();
}
int CalcAverage() const;
int CalcClosest(std::size_t length) const;
@ -49,7 +68,8 @@ class Reference {
// TODO(tetsuok): fix this function and related stuff.
// "average" reference length should not be calculated at sentence-level unlike "closest".
inline int Reference::CalcAverage() const {
inline int Reference::CalcAverage() const
{
int total = 0;
for (std::size_t i = 0; i < m_length.size(); ++i) {
total += m_length[i];
@ -58,7 +78,8 @@ inline int Reference::CalcAverage() const {
static_cast<float>(total) / m_length.size());
}
inline int Reference::CalcClosest(std::size_t length) const {
inline int Reference::CalcClosest(std::size_t length) const
{
int min_diff = INT_MAX;
int closest_ref_id = 0; // an index of the closest reference translation
for (std::size_t i = 0; i < m_length.size(); ++i) {
@ -79,7 +100,8 @@ inline int Reference::CalcClosest(std::size_t length) const {
return static_cast<int>(m_length[closest_ref_id]);
}
inline int Reference::CalcShortest() const {
inline int Reference::CalcShortest() const
{
return *std::min_element(m_length.begin(), m_length.end());
}

View File

@ -5,12 +5,14 @@
using namespace MosesTuning;
BOOST_AUTO_TEST_CASE(refernece_count) {
BOOST_AUTO_TEST_CASE(refernece_count)
{
Reference ref;
BOOST_CHECK(ref.get_counts() != NULL);
}
BOOST_AUTO_TEST_CASE(refernece_length_iterator) {
BOOST_AUTO_TEST_CASE(refernece_length_iterator)
{
Reference ref;
ref.push_back(4);
ref.push_back(2);
@ -24,7 +26,8 @@ BOOST_AUTO_TEST_CASE(refernece_length_iterator) {
BOOST_CHECK(it == ref.end());
}
BOOST_AUTO_TEST_CASE(refernece_length_average) {
BOOST_AUTO_TEST_CASE(refernece_length_average)
{
{
Reference ref;
ref.push_back(4);
@ -49,7 +52,8 @@ BOOST_AUTO_TEST_CASE(refernece_length_average) {
}
}
BOOST_AUTO_TEST_CASE(refernece_length_closest) {
BOOST_AUTO_TEST_CASE(refernece_length_closest)
{
{
Reference ref;
ref.push_back(4);
@ -92,7 +96,8 @@ BOOST_AUTO_TEST_CASE(refernece_length_closest) {
}
}
BOOST_AUTO_TEST_CASE(refernece_length_shortest) {
BOOST_AUTO_TEST_CASE(refernece_length_shortest)
{
{
Reference ref;
ref.push_back(4);

View File

@ -7,17 +7,24 @@ namespace MosesTuning
{
template <class T>
class ScopedVector {
class ScopedVector
{
public:
typedef typename std::vector<T*>::iterator iterator;
typedef typename std::vector<T*>::const_iterator const_iterator;
ScopedVector() {}
virtual ~ScopedVector() { reset(); }
virtual ~ScopedVector() {
reset();
}
bool empty() const { return m_vec.empty(); }
bool empty() const {
return m_vec.empty();
}
void push_back(T *e) { m_vec.push_back(e); }
void push_back(T *e) {
m_vec.push_back(e);
}
void reset() {
for (iterator it = m_vec.begin(); it != m_vec.end(); ++it) {
@ -26,25 +33,51 @@ class ScopedVector {
m_vec.clear();
}
void reserve(std::size_t capacity) { m_vec.reserve(capacity); }
void resize(std::size_t size) { m_vec.resize(size); }
void reserve(std::size_t capacity) {
m_vec.reserve(capacity);
}
void resize(std::size_t size) {
m_vec.resize(size);
}
std::size_t size() const {return m_vec.size(); }
std::size_t size() const {
return m_vec.size();
}
iterator begin() { return m_vec.begin(); }
const_iterator begin() const { return m_vec.begin(); }
iterator begin() {
return m_vec.begin();
}
const_iterator begin() const {
return m_vec.begin();
}
iterator end() { return m_vec.end(); }
const_iterator end() const { return m_vec.end(); }
iterator end() {
return m_vec.end();
}
const_iterator end() const {
return m_vec.end();
}
std::vector<T*>& get() { return m_vec; }
const std::vector<T*>& get() const { return m_vec; }
std::vector<T*>& get() {
return m_vec;
}
const std::vector<T*>& get() const {
return m_vec;
}
std::vector<T*>* operator->() { return &m_vec; }
const std::vector<T*>* operator->() const { return &m_vec; }
std::vector<T*>* operator->() {
return &m_vec;
}
const std::vector<T*>* operator->() const {
return &m_vec;
}
T*& operator[](std::size_t i) { return m_vec[i]; }
const T* operator[](std::size_t i) const { return m_vec[i]; }
T*& operator[](std::size_t i) {
return m_vec[i];
}
const T* operator[](std::size_t i) const {
return m_vec[i];
}
private:
std::vector<T*> m_vec;

View File

@ -63,7 +63,8 @@ void ScoreArray::save(const string &file, const string& score_type, bool bin)
ofs.close();
}
void ScoreArray::save(const string& score_type, bool bin) {
void ScoreArray::save(const string& score_type, bool bin)
{
save(&cout, score_type, bin);
}

View File

@ -38,17 +38,29 @@ public:
ScoreArray();
~ScoreArray() {}
void clear() { m_array.clear(); }
void clear() {
m_array.clear();
}
int getIndex() const { return m_index; }
int getIndex() const {
return m_index;
}
void setIndex(int value) { m_index = value; }
void setIndex(int value) {
m_index = value;
}
ScoreStats& get(std::size_t i) { return m_array.at(i); }
ScoreStats& get(std::size_t i) {
return m_array.at(i);
}
const ScoreStats& get(std::size_t i) const { return m_array.at(i); }
const ScoreStats& get(std::size_t i) const {
return m_array.at(i);
}
void add(const ScoreStats& e) { m_array.push_back(e); }
void add(const ScoreStats& e) {
m_array.push_back(e);
}
//ADDED BY TS
void swap(std::size_t i, std::size_t j) {
@ -62,15 +74,25 @@ public:
void merge(ScoreArray& e);
std::string name() const { return m_score_type; }
std::string name() const {
return m_score_type;
}
void name(std::string &score_type) { m_score_type = score_type; }
void name(std::string &score_type) {
m_score_type = score_type;
}
std::size_t size() const { return m_array.size(); }
std::size_t size() const {
return m_array.size();
}
std::size_t NumberOfScores() const { return m_num_scores; }
std::size_t NumberOfScores() const {
return m_num_scores;
}
void NumberOfScores(std::size_t v) { m_num_scores = v; }
void NumberOfScores(std::size_t v) {
m_num_scores = v;
}
void savetxt(std::ostream* os, const std::string& score_type);
void savebin(std::ostream* os, const std::string& score_type);

View File

@ -50,7 +50,8 @@ void ScoreData::save(const string &file, bool bin)
ofs.close();
}
void ScoreData::save(bool bin) {
void ScoreData::save(bool bin)
{
save(&cout, bin);
}

View File

@ -40,7 +40,9 @@ public:
ScoreData(Scorer* scorer);
~ScoreData() {}
void clear() { m_array.clear(); }
void clear() {
m_array.clear();
}
inline ScoreArray& get(std::size_t idx) {
return m_array.at(idx);
@ -66,7 +68,9 @@ public:
return m_array.at(i).get(j);
}
std::string name() const { return m_score_type; }
std::string name() const {
return m_score_type;
}
std::string name(const std::string &score_type) {
return m_score_type = score_type;
@ -75,8 +79,12 @@ public:
void add(ScoreArray& e);
void add(const ScoreStats& e, int sent_idx);
std::size_t NumberOfScores() const { return m_num_scores; }
std::size_t size() const { return m_array.size(); }
std::size_t NumberOfScores() const {
return m_num_scores;
}
std::size_t size() const {
return m_array.size();
}
void save(const std::string &file, bool bin=false);
void save(std::ostream* os, bool bin=false);

View File

@ -33,14 +33,16 @@ namespace MosesTuning
ScoreDataIterator::ScoreDataIterator() {}
ScoreDataIterator::ScoreDataIterator(const string& filename) {
ScoreDataIterator::ScoreDataIterator(const string& filename)
{
m_in.reset(new FilePiece(filename.c_str()));
readNext();
}
ScoreDataIterator::~ScoreDataIterator() {}
void ScoreDataIterator::readNext() {
void ScoreDataIterator::readNext()
{
m_next.clear();
try {
StringPiece marker = m_in->ReadDelimited();
@ -71,12 +73,14 @@ void ScoreDataIterator::readNext() {
}
}
void ScoreDataIterator::increment() {
void ScoreDataIterator::increment()
{
readNext();
}
bool ScoreDataIterator::equal(const ScoreDataIterator& rhs) const {
bool ScoreDataIterator::equal(const ScoreDataIterator& rhs) const
{
if (!m_in && !rhs.m_in) {
return true;
} else if (!m_in) {
@ -90,7 +94,8 @@ bool ScoreDataIterator::equal(const ScoreDataIterator& rhs) const {
}
const vector<ScoreDataItem>& ScoreDataIterator::dereference() const {
const vector<ScoreDataItem>& ScoreDataIterator::dereference() const
{
return m_next;
}

View File

@ -33,7 +33,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "FeatureDataIterator.h"
namespace util { class FilePiece; }
namespace util
{
class FilePiece;
}
namespace MosesTuning
{

View File

@ -13,7 +13,8 @@
using namespace std;
namespace {
namespace
{
const int kAvailableSize = 8;
} // namespace
@ -123,7 +124,8 @@ void ScoreStats::savetxt(ostream* os)
*os << *this;
}
void ScoreStats::savetxt() {
void ScoreStats::savetxt()
{
savetxt(&cout);
}
@ -140,7 +142,8 @@ ostream& operator<<(ostream& o, const ScoreStats& e)
return o;
}
bool operator==(const ScoreStats& s1, const ScoreStats& s2) {
bool operator==(const ScoreStats& s1, const ScoreStats& s2)
{
size_t size = s1.size();
if (size != s2.size())

View File

@ -41,7 +41,9 @@ public:
void Copy(const ScoreStats &stats);
bool isfull() const { return (m_entries < m_available_size) ? 0 : 1; }
bool isfull() const {
return (m_entries < m_available_size) ? 0 : 1;
}
void expand();
void add(ScoreStatsType v);
@ -55,9 +57,15 @@ public:
clear();
}
ScoreStatsType get(std::size_t i) { return m_array[i]; }
ScoreStatsType get(std::size_t i) const { return m_array[i]; }
scorestats_t getArray() const { return m_array; }
ScoreStatsType get(std::size_t i) {
return m_array[i];
}
ScoreStatsType get(std::size_t i) const {
return m_array[i];
}
scorestats_t getArray() const {
return m_array;
}
void set(const std::string& str);
@ -69,15 +77,21 @@ public:
}
}
std::size_t bytes() const { return GetArraySizeWithBytes(); }
std::size_t bytes() const {
return GetArraySizeWithBytes();
}
std::size_t GetArraySizeWithBytes() const {
return m_entries * sizeof(ScoreStatsType);
}
std::size_t size() const { return m_entries; }
std::size_t size() const {
return m_entries;
}
std::size_t available() const { return m_available_size; }
std::size_t available() const {
return m_available_size;
}
void savetxt(const std::string &file);
void savetxt(std::ostream* os);

View File

@ -12,7 +12,8 @@ using namespace std;
namespace MosesTuning
{
namespace {
namespace
{
// For tokenizing a hypothesis translation, we may encounter unknown tokens which
// do not exist in the corresponding reference translations.
const int kUnknownToken = -1;
@ -23,16 +24,19 @@ Scorer::Scorer(const string& name, const string& config)
m_vocab(mert::VocabularyFactory::GetVocabulary()),
m_filter(NULL),
m_score_data(NULL),
m_enable_preserve_case(true) {
m_enable_preserve_case(true)
{
InitConfig(config);
}
Scorer::~Scorer() {
Scorer::~Scorer()
{
Singleton<mert::Vocabulary>::Delete();
delete m_filter;
}
void Scorer::InitConfig(const string& config) {
void Scorer::InitConfig(const string& config)
{
// cerr << "Scorer config string: " << config << endl;
size_t start = 0;
while (start < config.size()) {
@ -53,7 +57,8 @@ void Scorer::InitConfig(const string& config) {
}
}
void Scorer::TokenizeAndEncode(const string& line, vector<int>& encoded) {
void Scorer::TokenizeAndEncode(const string& line, vector<int>& encoded)
{
for (util::TokenIter<util::AnyCharacter, true> it(line, util::AnyCharacter(" "));
it; ++it) {
if (!m_enable_preserve_case) {
@ -69,7 +74,8 @@ void Scorer::TokenizeAndEncode(const string& line, vector<int>& encoded) {
}
}
void Scorer::TokenizeAndEncodeTesting(const string& line, vector<int>& encoded) {
void Scorer::TokenizeAndEncodeTesting(const string& line, vector<int>& encoded)
{
for (util::TokenIter<util::AnyCharacter, true> it(line, util::AnyCharacter(" "));
it; ++it) {
if (!m_enable_preserve_case) {
@ -103,8 +109,7 @@ void Scorer::setFactors(const string& factors)
if (factors.empty()) return;
vector<string> factors_vec;
split(factors, '|', factors_vec);
for(vector<string>::iterator it = factors_vec.begin(); it != factors_vec.end(); ++it)
{
for(vector<string>::iterator it = factors_vec.begin(); it != factors_vec.end(); ++it) {
int factor = atoi(it->c_str());
m_factors.push_back(factor);
}
@ -130,8 +135,7 @@ string Scorer::applyFactors(const string& sentence) const
split(sentence, ' ', tokens);
stringstream sstream;
for (size_t i = 0; i < tokens.size(); ++i)
{
for (size_t i = 0; i < tokens.size(); ++i) {
if (tokens[i] == "") continue;
vector<string> factors;
@ -141,8 +145,7 @@ string Scorer::applyFactors(const string& sentence) const
if (i > 0) sstream << " ";
for (size_t j = 0; j < m_factors.size(); ++j)
{
for (size_t j = 0; j < m_factors.size(); ++j) {
int findex = m_factors[j];
if (findex < 0 || findex >= fsize) throw runtime_error("Factor index is out of range.");
@ -158,17 +161,15 @@ string Scorer::applyFactors(const string& sentence) const
*/
string Scorer::applyFilter(const string& sentence) const
{
if (m_filter)
{
if (m_filter) {
return m_filter->ProcessSentence(sentence);
}
else
{
} else {
return sentence;
}
}
float Scorer::score(const candidates_t& candidates) const {
float Scorer::score(const candidates_t& candidates) const
{
diffs_t diffs;
statscores_t scores;
score(candidates, diffs, scores);

View File

@ -10,7 +10,8 @@
#include "Types.h"
#include "ScoreData.h"
namespace mert {
namespace mert
{
class Vocabulary;
@ -117,7 +118,9 @@ class Scorer
*/
virtual void setFactors(const std::string& factors);
mert::Vocabulary* GetVocab() const { return m_vocab; }
mert::Vocabulary* GetVocab() const {
return m_vocab;
}
/**
* Set unix filter, which will be used to preprocess the sentences
@ -173,14 +176,14 @@ class Scorer
/**
* Every inherited scorer should call this function for each sentence
*/
std::string preprocessSentence(const std::string& sentence) const
{
std::string preprocessSentence(const std::string& sentence) const {
return applyFactors(applyFilter(sentence));
}
};
namespace {
namespace
{
//regularisation strategies
inline float score_min(const statscores_t& scores, size_t start, size_t end)

View File

@ -16,7 +16,8 @@ namespace MosesTuning
{
vector<string> ScorerFactory::getTypes() {
vector<string> ScorerFactory::getTypes()
{
vector<string> types;
types.push_back(string("BLEU"));
types.push_back(string("PER"));
@ -29,7 +30,8 @@ vector<string> ScorerFactory::getTypes() {
return types;
}
Scorer* ScorerFactory::getScorer(const string& type, const string& config) {
Scorer* ScorerFactory::getScorer(const string& type, const string& config)
{
if (type == "BLEU") {
return new BleuScorer(config);
} else if (type == "PER") {
@ -48,8 +50,7 @@ Scorer* ScorerFactory::getScorer(const string& type, const string& config) {
} else {
if (type.find(',') != string::npos) {
return new InterpolatedScorer(type, config);
}
else {
} else {
throw runtime_error("Unknown scorer type: " + type);
}
}

View File

@ -6,7 +6,8 @@
using namespace std;
namespace {
namespace
{
MosesTuning::SemposOverlapping* g_overlapping = NULL;
@ -16,7 +17,8 @@ namespace MosesTuning
{
SemposOverlapping* SemposOverlappingFactory::GetOverlapping(const string& str, const SemposScorer* sempos) {
SemposOverlapping* SemposOverlappingFactory::GetOverlapping(const string& str, const SemposScorer* sempos)
{
if (str == "cap-micro") {
return new CapMicroOverlapping(sempos);
} else if (str == "cap-macro") {
@ -26,7 +28,8 @@ SemposOverlapping* SemposOverlappingFactory::GetOverlapping(const string& str, c
}
}
void SemposOverlappingFactory::SetOverlapping(SemposOverlapping* ovr) {
void SemposOverlappingFactory::SetOverlapping(SemposOverlapping* ovr)
{
g_overlapping = ovr;
}
@ -41,14 +44,12 @@ vector<int> CapMicroOverlapping::prepareStats(const sentence_t& cand, const sent
int multCoeff = 1000;
float interSum = 0;
for (sentence_t::iterator it = intersection.begin(); it != intersection.end(); it++)
{
for (sentence_t::iterator it = intersection.begin(); it != intersection.end(); it++) {
interSum += semposScorer->weight(it->first);
}
float refSum = 0;
for (sentence_t::iterator it = ref.begin(); it != ref.end(); it++)
{
for (sentence_t::iterator it = ref.begin(); it != ref.end(); it++) {
refSum += semposScorer->weight(it->first);
}

View File

@ -36,7 +36,8 @@ public:
virtual std::size_t NumberOfScores() const = 0;
};
class SemposOverlappingFactory {
class SemposOverlappingFactory
{
public:
static SemposOverlapping* GetOverlapping(const std::string& str, const SemposScorer* sempos);
@ -62,7 +63,9 @@ public:
virtual std::vector<int> prepareStats(const sentence_t& cand, const sentence_t& ref);
virtual float calculateScore(const std::vector<int>& stats) const;
virtual std::size_t NumberOfScores() const { return 2; }
virtual std::size_t NumberOfScores() const {
return 2;
}
private:
// no copying allowed.
@ -82,7 +85,9 @@ public:
virtual std::vector<int> prepareStats(const sentence_t& cand, const sentence_t& ref);
virtual float calculateScore(const std::vector<int>& stats) const;
virtual std::size_t NumberOfScores() const { return kMaxNOC * 2; }
virtual std::size_t NumberOfScores() const {
return kMaxNOC * 2;
}
private:
// no copying allowed.

View File

@ -25,8 +25,7 @@ SemposScorer::SemposScorer(const string& config)
m_semposMap.clear();
string weightsfile = getConfig("weightsfile", "");
if (weightsfile != "")
{
if (weightsfile != "") {
loadWeights(weightsfile);
}
}
@ -145,12 +144,9 @@ int SemposScorer::encodeSempos(const string& sempos)
float SemposScorer::weight(int item) const
{
std::map<int,float>::const_iterator it = weightsMap.find(item);
if (it == weightsMap.end())
{
if (it == weightsMap.end()) {
return 1.0f;
}
else
{
} else {
return it->second;
}
}
@ -160,10 +156,8 @@ void SemposScorer::loadWeights(const string& weightsfile)
string line;
ifstream myfile;
myfile.open(weightsfile.c_str(), ifstream::in);
if (myfile.is_open())
{
while ( myfile.good() )
{
if (myfile.is_open()) {
while ( myfile.good() ) {
getline (myfile,line);
vector<string> fields;
if (line == "") continue;
@ -174,9 +168,7 @@ void SemposScorer::loadWeights(const string& weightsfile)
weightsMap[encoded] = weight;
}
myfile.close();
}
else
{
} else {
cerr << "Unable to open file "<< weightsfile << endl;
exit(1);
}

View File

@ -32,12 +32,16 @@ public:
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
virtual void prepareStats(std::size_t sindex, const std::string& text, ScoreStats& entry);
virtual std::size_t NumberOfScores() const { return m_ovr->NumberOfScores(); }
virtual std::size_t NumberOfScores() const {
return m_ovr->NumberOfScores();
}
virtual float calculateScore(const std::vector<int>& comps) const {
return m_ovr->calculateScore(comps);
}
bool EnableDebug() const { return m_enable_debug; }
bool EnableDebug() const {
return m_enable_debug;
}
float weight(int item) const;

View File

@ -19,13 +19,15 @@ namespace MosesTuning
SentenceLevelScorer::SentenceLevelScorer(const string& name, const string& config)
: Scorer(name, config),
m_regularisationStrategy(REG_NONE),
m_regularisationWindow(0) {
m_regularisationWindow(0)
{
Init();
}
SentenceLevelScorer::~SentenceLevelScorer() {}
void SentenceLevelScorer::Init() {
void SentenceLevelScorer::Init()
{
// Configure regularisation.
static string KEY_TYPE = "regtype";
static string KEY_WINDOW = "regwin";

View File

@ -10,7 +10,8 @@ namespace MosesTuning
// thread *un*safe singleton.
// TODO: replace this with thread-safe singleton.
template <typename T>
class Singleton {
class Singleton
{
public:
static T* GetInstance() {
if (m_instance == NULL) {

View File

@ -5,19 +5,24 @@
using namespace MosesTuning;
namespace {
namespace
{
static int g_count = 0;
class Instance {
class Instance
{
public:
Instance() { ++g_count; }
Instance() {
++g_count;
}
~Instance() {}
};
} // namespace
BOOST_AUTO_TEST_CASE(singleton_basic) {
BOOST_AUTO_TEST_CASE(singleton_basic)
{
Instance* instance1 = Singleton<Instance>::GetInstance();
Instance* instance2 = Singleton<Instance>::GetInstance();
Instance* instance3 = Singleton<Instance>::GetInstance();

View File

@ -16,7 +16,8 @@ namespace MosesTuning
StatisticsBasedScorer::StatisticsBasedScorer(const string& name, const string& config)
: Scorer(name,config) {
: Scorer(name,config)
{
//configure regularisation
static string KEY_TYPE = "regtype";
static string KEY_WINDOW = "regwin";

View File

@ -6,14 +6,17 @@
#include <sys/time.h>
#endif
namespace {
namespace
{
#if !defined(_WIN32) && !defined(_WIN64)
uint64_t GetMicroSeconds(const struct timeval& tv) {
uint64_t GetMicroSeconds(const struct timeval& tv)
{
return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
}
uint64_t GetTimeOfDayMicroSeconds() {
uint64_t GetTimeOfDayMicroSeconds()
{
struct timeval tv;
gettimeofday(&tv, NULL);
return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
@ -26,7 +29,8 @@ namespace MosesTuning
{
void Timer::GetCPUTimeMicroSeconds(Timer::CPUTime* cpu_time) const {
void Timer::GetCPUTimeMicroSeconds(Timer::CPUTime* cpu_time) const
{
#if !defined(_WIN32) && !defined(_WIN64)
struct rusage usage;
if (getrusage(RUSAGE_SELF, &usage)) {
@ -41,22 +45,26 @@ void Timer::GetCPUTimeMicroSeconds(Timer::CPUTime* cpu_time) const {
#endif
}
double Timer::get_elapsed_cpu_time() const {
double Timer::get_elapsed_cpu_time() const
{
return static_cast<double>(get_elapsed_cpu_time_microseconds()) * 1e-6;
}
uint64_t Timer::get_elapsed_cpu_time_microseconds() const {
uint64_t Timer::get_elapsed_cpu_time_microseconds() const
{
CPUTime e;
GetCPUTimeMicroSeconds(&e);
return (e.user_time - m_start_time.user_time) +
(e.sys_time - m_start_time.sys_time);
}
double Timer::get_elapsed_wall_time() const {
double Timer::get_elapsed_wall_time() const
{
return static_cast<double>(get_elapsed_wall_time_microseconds()) * 1e-6;
}
uint64_t Timer::get_elapsed_wall_time_microseconds() const {
uint64_t Timer::get_elapsed_wall_time_microseconds() const
{
return GetTimeOfDayMicroSeconds() - m_wall;
}
@ -92,7 +100,8 @@ void Timer::check(const char* msg)
}
}
std::string Timer::ToString() const {
std::string Timer::ToString() const
{
std::string res;
const double wall = get_elapsed_wall_time();
CPUTime e;

View File

@ -61,7 +61,9 @@ class Timer
/**
*/
bool is_running() const { return m_is_running; }
bool is_running() const {
return m_is_running;
}
/**
* Return the total time in seconds that the timer has been in the
@ -97,7 +99,8 @@ class Timer
* for an ostream 'os' and a timer 't'. For example, "cout << t" will
* print out the total amount of time 't' has been "running".
*/
inline std::ostream& operator<<(std::ostream& os, const Timer& t) {
inline std::ostream& operator<<(std::ostream& os, const Timer& t)
{
if (t.is_running()) {
os << t.ToString();
} else {

View File

@ -8,7 +8,8 @@
using namespace MosesTuning;
BOOST_AUTO_TEST_CASE(timer_basic_test) {
BOOST_AUTO_TEST_CASE(timer_basic_test)
{
Timer timer;
const int sleep_time_microsec = 40; // ad-hoc microseconds to pass unit tests.

View File

@ -11,7 +11,8 @@
using namespace std;
namespace {
namespace
{
MosesTuning::Timer g_timer;
int g_verbose = 0;
@ -56,7 +57,8 @@ size_t getNextPound(std::string &str, std::string &substr,
return pos;
}
void split(const std::string &s, char delim, std::vector<std::string> &elems) {
void split(const std::string &s, char delim, std::vector<std::string> &elems)
{
std::stringstream ss(s);
std::string item;
while(std::getline(ss, item, delim)) {
@ -65,7 +67,8 @@ void split(const std::string &s, char delim, std::vector<std::string> &elems) {
}
void Tokenize(const char *str, const char delim,
std::vector<std::string> *res) {
std::vector<std::string> *res)
{
while (1) {
const char *begin = str;
while (*str != delim && *str) str++;

View File

@ -40,7 +40,8 @@ int setverboselevel(int v);
const float kEPS = 0.0001f;
template <typename T>
bool IsAlmostEqual(T expected, T actual, float round=kEPS) {
bool IsAlmostEqual(T expected, T actual, float round=kEPS)
{
if (std::abs(expected - actual) < round) {
return true;
} else {
@ -86,7 +87,8 @@ inline T Scan(const std::string &input)
* Returns true iff "str" ends with "suffix".
* e.g., Given str = "abc:" and suffix = ":", this function returns true.
*/
inline bool EndsWith(const std::string& str, const char* suffix) {
inline bool EndsWith(const std::string& str, const char* suffix)
{
return str.find_last_of(suffix) == str.size() - 1;
}

View File

@ -5,7 +5,8 @@
using namespace MosesTuning;
BOOST_AUTO_TEST_CASE(util_get_next_pound_test) {
BOOST_AUTO_TEST_CASE(util_get_next_pound_test)
{
{
std::string str("9 9 7 ");
std::string substr;
@ -38,7 +39,8 @@ BOOST_AUTO_TEST_CASE(util_get_next_pound_test) {
}
}
BOOST_AUTO_TEST_CASE(util_tokenize_test) {
BOOST_AUTO_TEST_CASE(util_tokenize_test)
{
{
std::vector<std::string> res;
Tokenize("9 9 7", ' ', &res);
@ -66,7 +68,8 @@ BOOST_AUTO_TEST_CASE(util_tokenize_test) {
}
}
BOOST_AUTO_TEST_CASE(util_ends_with_test) {
BOOST_AUTO_TEST_CASE(util_ends_with_test)
{
BOOST_CHECK(EndsWith("abc:", ":"));
BOOST_CHECK(EndsWith("a b c:", ":"));
BOOST_CHECK(!EndsWith("a", ":"));

View File

@ -1,12 +1,15 @@
#include "Vocabulary.h"
#include "Singleton.h"
namespace mert {
namespace {
namespace mert
{
namespace
{
Vocabulary* g_vocab = NULL;
} // namespace
int Vocabulary::Encode(const std::string& token) {
int Vocabulary::Encode(const std::string& token)
{
iterator it = m_vocab.find(token);
int encoded_token;
if (it == m_vocab.end()) {
@ -20,7 +23,8 @@ int Vocabulary::Encode(const std::string& token) {
return encoded_token;
}
bool Vocabulary::Lookup(const std::string&str , int* v) const {
bool Vocabulary::Lookup(const std::string&str , int* v) const
{
const_iterator it = m_vocab.find(str);
if (it == m_vocab.end()) return false;
@ -28,7 +32,8 @@ bool Vocabulary::Lookup(const std::string&str , int* v) const {
return true;
}
Vocabulary* VocabularyFactory::GetVocabulary() {
Vocabulary* VocabularyFactory::GetVocabulary()
{
if (g_vocab == NULL) {
return MosesTuning::Singleton<Vocabulary>::GetInstance();
} else {
@ -36,7 +41,8 @@ Vocabulary* VocabularyFactory::GetVocabulary() {
}
}
void VocabularyFactory::SetVocabulary(Vocabulary* vocab) {
void VocabularyFactory::SetVocabulary(Vocabulary* vocab)
{
g_vocab = vocab;
}

View File

@ -4,7 +4,8 @@
#include <boost/unordered_map.hpp>
#include <string>
namespace mert {
namespace mert
{
/**
* A map to handle vocabularies to calculate
@ -12,7 +13,8 @@ namespace mert {
*
* TODO: replace this with more efficient data structure.
*/
class Vocabulary {
class Vocabulary
{
public:
typedef boost::unordered_map<std::string, int>::iterator iterator;
typedef boost::unordered_map<std::string, int>::const_iterator const_iterator;
@ -28,27 +30,48 @@ class Vocabulary {
*/
bool Lookup(const std::string&str , int* v) const;
void clear() { m_vocab.clear(); }
void clear() {
m_vocab.clear();
}
bool empty() const { return m_vocab.empty(); }
bool empty() const {
return m_vocab.empty();
}
std::size_t size() const { return m_vocab.size(); }
std::size_t size() const {
return m_vocab.size();
}
iterator find(const std::string& str) { return m_vocab.find(str); }
const_iterator find(const std::string& str) const { return m_vocab.find(str); }
iterator find(const std::string& str) {
return m_vocab.find(str);
}
const_iterator find(const std::string& str) const {
return m_vocab.find(str);
}
int& operator[](const std::string& str) { return m_vocab[str]; }
int& operator[](const std::string& str) {
return m_vocab[str];
}
iterator begin() { return m_vocab.begin(); }
const_iterator begin() const { return m_vocab.begin(); }
iterator end() { return m_vocab.end(); }
const_iterator end() const { return m_vocab.end(); }
iterator begin() {
return m_vocab.begin();
}
const_iterator begin() const {
return m_vocab.begin();
}
iterator end() {
return m_vocab.end();
}
const_iterator end() const {
return m_vocab.end();
}
private:
boost::unordered_map<std::string, int> m_vocab;
};
class VocabularyFactory {
class VocabularyFactory
{
public:
static Vocabulary* GetVocabulary();
static void SetVocabulary(Vocabulary* vocab);

View File

@ -6,16 +6,20 @@
using namespace MosesTuning;
namespace mert {
namespace {
namespace mert
{
namespace
{
void TearDown() {
void TearDown()
{
Singleton<Vocabulary>::Delete();
}
} // namespace
BOOST_AUTO_TEST_CASE(vocab_basic) {
BOOST_AUTO_TEST_CASE(vocab_basic)
{
Vocabulary vocab;
BOOST_REQUIRE(vocab.empty());
vocab.clear();
@ -39,7 +43,8 @@ BOOST_AUTO_TEST_CASE(vocab_basic) {
BOOST_CHECK(!vocab.Lookup("world", &v));
}
BOOST_AUTO_TEST_CASE(vocab_factory_test) {
BOOST_AUTO_TEST_CASE(vocab_factory_test)
{
Vocabulary* vocab1 = VocabularyFactory::GetVocabulary();
Vocabulary* vocab2 = VocabularyFactory::GetVocabulary();
Vocabulary* vocab3 = VocabularyFactory::GetVocabulary();

View File

@ -14,7 +14,8 @@
using namespace std;
using namespace MosesTuning;
namespace {
namespace
{
Scorer* g_scorer = NULL;
bool g_has_more_files = false;
@ -22,7 +23,8 @@ bool g_has_more_scorers = false;
const float g_alpha = 0.05;
class EvaluatorUtil {
class EvaluatorUtil
{
public:
static void evaluate(const string& candFile, int bootstrap);
static float average(const vector<float>& list);
@ -43,22 +45,18 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap)
// Loading sentences and preparing statistics
ScoreStats scoreentry;
string line;
while (getline(cand, line))
{
while (getline(cand, line)) {
g_scorer->prepareStats(entries.size(), line, scoreentry);
entries.push_back(scoreentry);
}
int n = entries.size();
if (bootstrap)
{
if (bootstrap) {
vector<float> scores;
for (int i = 0; i < bootstrap; ++i)
{
for (int i = 0; i < bootstrap; ++i) {
// TODO: Use smart pointer for exceptional-safety.
ScoreData* scoredata = new ScoreData(g_scorer);
for (int j = 0; j < n; ++j)
{
for (int j = 0; j < n; ++j) {
int randomIndex = random() % n;
scoredata->add(entries[randomIndex], j);
}
@ -85,13 +83,10 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap)
cout.setf(ios::fixed, ios::floatfield);
cout.precision(4);
cout << avg << "\t[" << lb << "," << rb << "]" << endl;
}
else
{
} else {
// TODO: Use smart pointer for exceptional-safety.
ScoreData* scoredata = new ScoreData(g_scorer);
for (int sid = 0; sid < n; ++sid)
{
for (int sid = 0; sid < n; ++sid) {
scoredata->add(entries[sid], sid);
}
g_scorer->setScoreData(scoredata);
@ -191,7 +186,8 @@ struct ProgramOption {
has_seed(false) { }
};
void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
void ParseCommandOptions(int argc, char** argv, ProgramOption* opt)
{
int c;
int option_index;
int last_scorer_index = -1;
@ -232,8 +228,7 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
}
// Add default scorer if no scorer provided
if (opt->scorer_types.size() == 0)
{
if (opt->scorer_types.size() == 0) {
opt->scorer_types.push_back(string("BLEU"));
opt->scorer_configs.push_back(string(""));
opt->scorer_factors.push_back(string(""));
@ -241,7 +236,8 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
}
}
void InitSeed(const ProgramOption *opt) {
void InitSeed(const ProgramOption *opt)
{
if (opt->has_seed) {
cerr << "Seeding random numbers with " << opt->seed << endl;
srandom(opt->seed);
@ -260,8 +256,7 @@ int main(int argc, char** argv)
ProgramOption option;
ParseCommandOptions(argc, argv, &option);
if (option.bootstrap)
{
if (option.bootstrap) {
InitSeed(&option);
}
@ -278,10 +273,8 @@ int main(int argc, char** argv)
if (candFiles.size() > 1) g_has_more_files = true;
if (option.scorer_types.size() > 1) g_has_more_scorers = true;
for (vector<string>::const_iterator fileIt = candFiles.begin(); fileIt != candFiles.end(); ++fileIt)
{
for (size_t i = 0; i < option.scorer_types.size(); i++)
{
for (vector<string>::const_iterator fileIt = candFiles.begin(); fileIt != candFiles.end(); ++fileIt) {
for (size_t i = 0; i < option.scorer_types.size(); i++) {
g_scorer = ScorerFactory::getScorer(option.scorer_types[i], option.scorer_configs[i]);
g_scorer->setFactors(option.scorer_factors[i]);
g_scorer->setFilter(option.scorer_filter[i]);

View File

@ -20,7 +20,8 @@
using namespace std;
using namespace MosesTuning;
namespace {
namespace
{
void usage()
{
@ -93,7 +94,8 @@ struct ProgramOption {
verbosity(0) { }
};
void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
void ParseCommandOptions(int argc, char** argv, ProgramOption* opt)
{
int c;
int option_index;

View File

@ -49,7 +49,8 @@ using namespace MosesTuning;
namespace po = boost::program_options;
ValType evaluate(HypPackEnumerator* train, const AvgWeightVector& wv) {
ValType evaluate(HypPackEnumerator* train, const AvgWeightVector& wv)
{
vector<ValType> stats(kBleuNgramOrder*2+1,0);
for(train->reset(); !train->finished(); train->next()) {
// Find max model
@ -165,7 +166,8 @@ int main(int argc, char** argv)
exit(3);
}
int sparseCount=0;
parameter_t val; std::string name;
parameter_t val;
std::string name;
while(opt >> name >> val) {
size_t id = SparseVector::encode(name) + initDenseSize;
while(initParams.size()<=id) initParams.push_back(0.0);
@ -194,8 +196,7 @@ int main(int argc, char** argv)
train.reset(new RandomAccessHypPackEnumerator(featureFiles, scoreFiles, no_shuffle));
cerr << "Initial BLEU = " << evaluate(train.get(), wv.avg()) << endl;
ValType bestBleu = 0;
for(int j=0;j<n_iters;j++)
{
for(int j=0; j<n_iters; j++) {
// MIRA train for one epoch
int iNumHyps = 0;
int iNumExamples = 0;

View File

@ -30,7 +30,8 @@
using namespace std;
using namespace MosesTuning;
namespace {
namespace
{
const char kDefaultOptimizer[] = "powell";
const char kDefaultScorer[] = "BLEU";
@ -46,7 +47,8 @@ const char kOutputFile[] = "weights.txt";
/**
* Runs an optimisation, or a random restart.
*/
class OptimizationTask : public Moses::Task {
class OptimizationTask : public Moses::Task
{
public:
OptimizationTask(Optimizer* optimizer, const Point& point)
: m_optimizer(optimizer), m_point(point) {}
@ -85,7 +87,8 @@ class OptimizationTask : public Moses::Task {
statscore_t m_score;
};
bool WriteFinalWeights(const char* filename, const Point& point) {
bool WriteFinalWeights(const char* filename, const Point& point)
{
ofstream ofs(filename);
if (!ofs) {
cerr << "Cannot open " << filename << endl;
@ -184,7 +187,8 @@ struct ProgramOption {
shard_count(0) { }
};
void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
void ParseCommandOptions(int argc, char** argv, ProgramOption* opt)
{
int c;
int option_index;

View File

@ -51,7 +51,8 @@ namespace po = boost::program_options;
namespace MosesTuning
{
class SampledPair {
class SampledPair
{
private:
pair<size_t,size_t> m_translation1;
pair<size_t,size_t> m_translation2;
@ -70,12 +71,19 @@ public:
}
}
float getDiff() const { return m_score_diff; }
const pair<size_t,size_t>& getTranslation1() const { return m_translation1; }
const pair<size_t,size_t>& getTranslation2() const { return m_translation2; }
float getDiff() const {
return m_score_diff;
}
const pair<size_t,size_t>& getTranslation1() const {
return m_translation1;
}
const pair<size_t,size_t>& getTranslation2() const {
return m_translation2;
}
};
static void outputSample(ostream& out, const FeatureDataItem& f1, const FeatureDataItem& f2) {
static void outputSample(ostream& out, const FeatureDataItem& f1, const FeatureDataItem& f2)
{
// difference in score in regular features
for(unsigned int j=0; j<f1.dense.size(); j++)
if (abs(f1.dense[j]-f2.dense[j]) > 0.00001)

View File

@ -31,19 +31,22 @@ using namespace std;
using namespace Moses;
namespace Mira {
namespace Mira
{
/**
* Allocates a char* and copies string into it.
**/
static char* strToChar(const string& s) {
static char* strToChar(const string& s)
{
char* c = new char[s.size()+1];
strcpy(c,s.c_str());
return c;
}
MosesDecoder::MosesDecoder(const string& inifile, int debuglevel, int argc, vector<string> decoder_params)
: m_manager(NULL) {
: m_manager(NULL)
{
static int BASE_ARGC = 8;
Parameter* params = new Parameter();
char ** mosesargv = new char*[BASE_ARGC + argc];
@ -77,7 +80,8 @@ namespace Mira {
assert(false);
}
void MosesDecoder::cleanup(bool chartDecoding) {
void MosesDecoder::cleanup(bool chartDecoding)
{
delete m_manager;
if (chartDecoding)
delete m_chartManager;
@ -109,8 +113,7 @@ namespace Mira {
if (chartDecoding) {
return runChartDecoder(source, sentenceid, nBestSize, bleuObjectiveWeight, bleuScoreWeight,
featureValues, bleuScores, modelScores, numReturnedTranslations, realBleu, distinct, rank, epoch);
}
else {
} else {
SearchAlgorithm search = staticData.GetSearchAlgorithm();
return runDecoder(source, sentenceid, nBestSize, bleuObjectiveWeight, bleuScoreWeight,
featureValues, bleuScores, modelScores, numReturnedTranslations, realBleu, distinct, rank, epoch,
@ -132,7 +135,8 @@ namespace Mira {
size_t rank,
size_t epoch,
SearchAlgorithm& search,
string filename) {
string filename)
{
// run the decoder
m_manager = new Moses::Manager(0,*m_sentence, search);
m_manager->ProcessSentence();
@ -208,7 +212,8 @@ namespace Mira {
bool realBleu,
bool distinct,
size_t rank,
size_t epoch) {
size_t epoch)
{
// run the decoder
m_chartManager = new ChartManager(*m_sentence);
m_chartManager->ProcessSentence();
@ -261,7 +266,8 @@ namespace Mira {
void MosesDecoder::outputNBestList(const std::string& source, size_t sentenceid,
size_t nBestSize, float bleuObjectiveWeight, float bleuScoreWeight,
bool distinctNbest, bool avgRefLength, string filename, ofstream& streamOut) {
bool distinctNbest, bool avgRefLength, string filename, ofstream& streamOut)
{
StaticData &staticData = StaticData::InstanceNonConst();
bool chartDecoding = (staticData.GetSearchAlgorithm() == ChartDecoding);
initialize(staticData, source, sentenceid, bleuObjectiveWeight, bleuScoreWeight, avgRefLength, chartDecoding);
@ -286,12 +292,10 @@ namespace Mira {
// OutputNBestList(const ChartTrellisPathList &nBestList, const ChartHypothesis *bestHypo, const TranslationSystem* system, long translationId, false)
// OutputNBest(out, nBestList, StaticData::Instance().GetOutputFactorOrder(),m_manager->GetTranslationSystem(), 0, false);
out.close();
}
else {
} else {
// OutputNBest(streamOut, nBestList, StaticData::Instance().GetOutputFactorOrder(),m_manager->GetTranslationSystem(), sentenceid, false);
}
}
else {
} else {
// run the decoder
m_manager = new Moses::Manager(0,*m_sentence, staticData.GetSearchAlgorithm());
m_manager->ProcessSentence();
@ -308,8 +312,7 @@ namespace Mira {
// TODO: handle sentence id (for now always 0)
//OutputNBest(out, nBestList, StaticData::Instance().GetOutputFactorOrder(),m_manager->GetTranslationSystem(), 0, false);
out.close();
}
else {
} else {
//OutputNBest(streamOut, nBestList, StaticData::Instance().GetOutputFactorOrder(),m_manager->GetTranslationSystem(), sentenceid, false);
streamOut.flush();
}
@ -317,7 +320,8 @@ namespace Mira {
}
void MosesDecoder::initialize(StaticData& staticData, const std::string& source, size_t sentenceid,
float bleuObjectiveWeight, float bleuScoreWeight, bool avgRefLength, bool chartDecoding) {
float bleuObjectiveWeight, float bleuScoreWeight, bool avgRefLength, bool chartDecoding)
{
m_sentence = new Sentence();
stringstream in(source + "\n");
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder();
@ -340,45 +344,55 @@ namespace Mira {
m_bleuScoreFeature->SetCurrReferenceNgrams(sentenceid);
}
float MosesDecoder::getBleuScore(const ScoreComponentCollection& scores) {
float MosesDecoder::getBleuScore(const ScoreComponentCollection& scores)
{
return scores.GetScoreForProducer(m_bleuScoreFeature);
}
void MosesDecoder::setBleuScore(ScoreComponentCollection& scores, float bleu) {
void MosesDecoder::setBleuScore(ScoreComponentCollection& scores, float bleu)
{
scores.Assign(m_bleuScoreFeature, bleu);
}
ScoreComponentCollection MosesDecoder::getWeights() {
ScoreComponentCollection MosesDecoder::getWeights()
{
return StaticData::Instance().GetAllWeights();
}
void MosesDecoder::setWeights(const ScoreComponentCollection& weights) {
void MosesDecoder::setWeights(const ScoreComponentCollection& weights)
{
StaticData::InstanceNonConst().SetAllWeights(weights);
}
void MosesDecoder::updateHistory(const vector<const Word*>& words) {
void MosesDecoder::updateHistory(const vector<const Word*>& words)
{
m_bleuScoreFeature->UpdateHistory(words);
}
void MosesDecoder::updateHistory(const vector< vector< const Word*> >& words, vector<size_t>& sourceLengths, vector<size_t>& ref_ids, size_t rank, size_t epoch) {
void MosesDecoder::updateHistory(const vector< vector< const Word*> >& words, vector<size_t>& sourceLengths, vector<size_t>& ref_ids, size_t rank, size_t epoch)
{
m_bleuScoreFeature->UpdateHistory(words, sourceLengths, ref_ids, rank, epoch);
}
void MosesDecoder::printBleuFeatureHistory(std::ostream& out) {
void MosesDecoder::printBleuFeatureHistory(std::ostream& out)
{
m_bleuScoreFeature->PrintHistory(out);
}
size_t MosesDecoder::getClosestReferenceLength(size_t ref_id, int hypoLength) {
size_t MosesDecoder::getClosestReferenceLength(size_t ref_id, int hypoLength)
{
return m_bleuScoreFeature->GetClosestRefLength(ref_id, hypoLength);
}
size_t MosesDecoder::getShortestReferenceIndex(size_t ref_id) {
size_t MosesDecoder::getShortestReferenceIndex(size_t ref_id)
{
return m_bleuScoreFeature->GetShortestRefIndex(ref_id);
}
void MosesDecoder::setBleuParameters(bool disable, bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
bool scaleByInverseLength, bool scaleByAvgInverseLength,
float scaleByX, float historySmoothing, size_t scheme, bool simpleHistoryBleu) {
float scaleByX, float historySmoothing, size_t scheme, bool simpleHistoryBleu)
{
m_bleuScoreFeature->SetBleuParameters(disable, sentenceBleu, scaleByInputLength, scaleByAvgInputLength,
scaleByInverseLength, scaleByAvgInverseLength,
scaleByX, historySmoothing, scheme, simpleHistoryBleu);

View File

@ -36,12 +36,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
// Wrapper functions and objects for the decoder.
//
namespace Mira {
namespace Mira
{
/**
* Wraps moses decoder.
**/
class MosesDecoder {
class MosesDecoder
{
public:
/**
* Initialise moses (including StaticData) using the given ini file and debuglevel, passing through any
@ -114,14 +116,22 @@ class MosesDecoder {
void setBleuParameters(bool disable, bool sentenceBleu, bool scaleByInputLength, bool scaleByAvgInputLength,
bool scaleByInverseLength, bool scaleByAvgInverseLength,
float scaleByX, float historySmoothing, size_t scheme, bool simpleHistoryBleu);
void setAvgInputLength (float l) { m_bleuScoreFeature->SetAvgInputLength(l); }
void setAvgInputLength (float l) {
m_bleuScoreFeature->SetAvgInputLength(l);
}
Moses::ScoreComponentCollection getWeights();
void setWeights(const Moses::ScoreComponentCollection& weights);
void cleanup(bool chartDecoding);
float getSourceLengthHistory() { return m_bleuScoreFeature->GetSourceLengthHistory(); }
float getTargetLengthHistory() { return m_bleuScoreFeature->GetTargetLengthHistory(); }
float getAverageInputLength() { return m_bleuScoreFeature->GetAverageInputLength(); }
float getSourceLengthHistory() {
return m_bleuScoreFeature->GetSourceLengthHistory();
}
float getTargetLengthHistory() {
return m_bleuScoreFeature->GetTargetLengthHistory();
}
float getAverageInputLength() {
return m_bleuScoreFeature->GetAverageInputLength();
}
private:
float getBleuScore(const Moses::ScoreComponentCollection& scores);

View File

@ -3,9 +3,11 @@
using namespace Moses;
using namespace std;
namespace Mira {
namespace Mira
{
vector<float> Hildreth::optimise (const vector<ScoreComponentCollection>& a, const vector<float>& b) {
vector<float> Hildreth::optimise (const vector<ScoreComponentCollection>& a, const vector<float>& b)
{
size_t i;
int max_iter = 10000;
@ -22,8 +24,7 @@ namespace Mira {
float A[K][K];
bool is_computed[K];
for ( i = 0; i < K; i++ )
{
for ( i = 0; i < K; i++ ) {
A[i][i] = a[i].InnerProduct(a[i]);
is_computed[i] = false;
}
@ -31,12 +32,10 @@ namespace Mira {
int max_kkt_i = -1;
for ( i = 0; i < b.size(); i++ )
{
for ( i = 0; i < b.size(); i++ ) {
F[i] = b[i];
kkt[i] = F[i];
if ( kkt[i] > max_kkt )
{
if ( kkt[i] > max_kkt ) {
max_kkt = kkt[i];
max_kkt_i = i;
}
@ -47,8 +46,7 @@ namespace Mira {
float try_alpha;
float add_alpha;
while ( max_kkt >= eps && iter < max_iter )
{
while ( max_kkt >= eps && iter < max_iter ) {
diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
try_alpha = alpha[max_kkt_i] + diff_alpha;
@ -61,18 +59,15 @@ namespace Mira {
alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
if ( !is_computed[max_kkt_i] )
{
for ( i = 0; i < K; i++ )
{
if ( !is_computed[max_kkt_i] ) {
for ( i = 0; i < K; i++ ) {
A[i][max_kkt_i] = a[i].InnerProduct(a[max_kkt_i] ); // for version 1
//A[i][max_kkt_i] = 0; // for version 1
is_computed[max_kkt_i] = true;
}
}
for ( i = 0; i < F.size(); i++ )
{
for ( i = 0; i < F.size(); i++ ) {
F[i] -= add_alpha * A[i][max_kkt_i];
kkt[i] = F[i];
if ( alpha[i] > zero )
@ -81,8 +76,7 @@ namespace Mira {
max_kkt = -1e100;
max_kkt_i = -1;
for ( i = 0; i < F.size(); i++ )
if ( kkt[i] > max_kkt )
{
if ( kkt[i] > max_kkt ) {
max_kkt = kkt[i];
max_kkt_i = i;
}
@ -93,7 +87,8 @@ namespace Mira {
return alpha;
}
vector<float> Hildreth::optimise (const vector<ScoreComponentCollection>& a, const vector<float>& b, float C) {
vector<float> Hildreth::optimise (const vector<ScoreComponentCollection>& a, const vector<float>& b, float C)
{
size_t i;
int max_iter = 10000;
@ -110,8 +105,7 @@ namespace Mira {
float A[K][K];
bool is_computed[K];
for ( i = 0; i < K; i++ )
{
for ( i = 0; i < K; i++ ) {
A[i][i] = a[i].InnerProduct(a[i]);
is_computed[i] = false;
}
@ -119,12 +113,10 @@ namespace Mira {
int max_kkt_i = -1;
for ( i = 0; i < b.size(); i++ )
{
for ( i = 0; i < b.size(); i++ ) {
F[i] = b[i];
kkt[i] = F[i];
if ( kkt[i] > max_kkt )
{
if ( kkt[i] > max_kkt ) {
max_kkt = kkt[i];
max_kkt_i = i;
}
@ -135,8 +127,7 @@ namespace Mira {
float try_alpha;
float add_alpha;
while ( max_kkt >= eps && iter < max_iter )
{
while ( max_kkt >= eps && iter < max_iter ) {
diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
try_alpha = alpha[max_kkt_i] + diff_alpha;
@ -151,18 +142,15 @@ namespace Mira {
alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
if ( !is_computed[max_kkt_i] )
{
for ( i = 0; i < K; i++ )
{
if ( !is_computed[max_kkt_i] ) {
for ( i = 0; i < K; i++ ) {
A[i][max_kkt_i] = a[i].InnerProduct(a[max_kkt_i] ); // for version 1
//A[i][max_kkt_i] = 0; // for version 1
is_computed[max_kkt_i] = true;
}
}
for ( i = 0; i < F.size(); i++ )
{
for ( i = 0; i < F.size(); i++ ) {
F[i] -= add_alpha * A[i][max_kkt_i];
kkt[i] = F[i];
if (alpha[i] > C - zero)
@ -174,8 +162,7 @@ namespace Mira {
max_kkt = -1e100;
max_kkt_i = -1;
for ( i = 0; i < F.size(); i++ )
if ( kkt[i] > max_kkt )
{
if ( kkt[i] > max_kkt ) {
max_kkt = kkt[i];
max_kkt_i = i;
}

View File

@ -1,9 +1,11 @@
#include "moses/FeatureVector.h"
#include "moses/ScoreComponentCollection.h"
namespace Mira {
namespace Mira
{
class Hildreth {
class Hildreth
{
public :
static std::vector<float> optimise (const std::vector<Moses::ScoreComponentCollection>& a, const std::vector<float>& b );
static std::vector<float> optimise (const std::vector<Moses::ScoreComponentCollection>& a, const std::vector<float>& b, float C);

View File

@ -34,22 +34,31 @@ using namespace Mira;
namespace MosesTest
{
class MockSingleFeature : public StatelessFeatureFunction {
class MockSingleFeature : public StatelessFeatureFunction
{
public:
MockSingleFeature(): StatelessFeatureFunction("MockSingle",1) {}
std::string GetScoreProducerWeightShortName(unsigned) const {return "sf";}
std::string GetScoreProducerWeightShortName(unsigned) const {
return "sf";
}
};
class MockMultiFeature : public StatelessFeatureFunction {
class MockMultiFeature : public StatelessFeatureFunction
{
public:
MockMultiFeature(): StatelessFeatureFunction("MockMulti",5) {}
std::string GetScoreProducerWeightShortName(unsigned) const {return "mf";}
std::string GetScoreProducerWeightShortName(unsigned) const {
return "mf";
}
};
class MockSparseFeature : public StatelessFeatureFunction {
class MockSparseFeature : public StatelessFeatureFunction
{
public:
MockSparseFeature(): StatelessFeatureFunction("MockSparse", ScoreProducer::unlimited) {}
std::string GetScoreProducerWeightShortName(unsigned) const {return "sf";}
std::string GetScoreProducerWeightShortName(unsigned) const {
return "sf";
}
};
struct MockProducers {

View File

@ -22,13 +22,16 @@
using namespace std;
namespace Moses {
namespace Moses
{
HypothesisQueue::~HypothesisQueue() {
HypothesisQueue::~HypothesisQueue()
{
m_queue.clear();
}
void HypothesisQueue::Push(BleuIndexPair hypo) {
void HypothesisQueue::Push(BleuIndexPair hypo)
{
//pair<set<BleuIndexPair>::iterator,bool> ret;
if (m_capacity == 0 || m_queue.size() < m_capacity) {
@ -52,7 +55,8 @@ void HypothesisQueue::Push(BleuIndexPair hypo) {
}
}
BleuIndexPair HypothesisQueue::Pop() {
BleuIndexPair HypothesisQueue::Pop()
{
HypoQueueType::iterator p = m_queue.begin();
BleuIndexPair top = *p;
m_queue.erase(p);

View File

@ -21,7 +21,8 @@
#include <set>
namespace Moses {
namespace Moses
{
// pair of Bleu score and index
typedef std::pair<float, size_t> BleuIndexPair;
@ -30,14 +31,17 @@ typedef std::pair<float, size_t> BleuIndexPair;
// the best scoring hypothesis. The queue assumes ownership of pushed items and
// relinquishes ownership when they are popped. Any remaining items at the
// time of the queue's destruction are deleted.
class HypothesisQueue {
class HypothesisQueue
{
public:
// Create empty queue with fixed capacity of c. Capacity 0 means unbounded.
HypothesisQueue(size_t c) : m_capacity(c) {}
~HypothesisQueue();
bool Empty() { return m_queue.empty(); }
bool Empty() {
return m_queue.empty();
}
// Add the hypo to the queue or delete it if the queue is full and the
// score is no better than the queue's worst score.

Some files were not shown because too many files have changed in this diff Show More