Imported version 3493 from local repo.

Includes merges from trunk up to 3842.


git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/branches/samplerank@4045 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
bhaddow 2011-06-28 14:55:53 +00:00
parent ac13074816
commit 465442340a
211 changed files with 21866 additions and 488 deletions

27
.gitignore vendored
View File

@ -1,33 +1,38 @@
*.[oa]
*.o
*~
Makefile
Makefile.in
aclocal.m4
autom4te.cache/
autom4te.cache
config.h
config.log
config.status
configure
mert/.deps/
mert/Makefile
mert/Makefile.in
mert/extractor
mert/mert
misc/.deps/
misc/.deps
misc/Makefile
misc/Makefile.in
misc/processLexicalTable
misc/processPhraseTable
misc/queryLexicalTable
moses-cmd/src/.deps/
moses-cmd/src/.deps
moses-cmd/src/Makefile
moses-cmd/src/Makefile.in
moses-cmd/src/moses
moses/src/.deps/
moses/src/.deps
moses/src/Makefile
moses/src/Makefile.in
moses/src/libmoses.a
stamp-h1
josiah/josiah
josiah/.deps
josiah/m1
scripts/Makefile
moses-release.tar.gz
release/
scripts-20090213-0027/
scripts/training/cmert-0.5/mert
scripts/training/mbr/mbr
scripts/training/phrase-extract/extract
scripts/training/phrase-extract/score
scripts/training/symal/symal
stamp-h1

View File

@ -11,4 +11,4 @@ endif
if WITH_SERVER
SERVER = server
endif
SUBDIRS = kenlm moses/src moses-chart/src OnDiskPt/src moses-cmd/src misc moses-chart-cmd/src CreateOnDisk/src $(MERT) $(SERVER)
SUBDIRS = kenlm moses/src moses-chart/src OnDiskPt/src moses-cmd/src misc moses-chart-cmd/src CreateOnDisk/src josiah $(MERT) $(SERVER)

View File

@ -1,10 +1,19 @@
/* config.h.in. Generated from configure.in by autoheader. */
/* define if the Boost library is available */
/* Defined if the requested minimum BOOST version is satisfied */
#undef HAVE_BOOST
/* define if the Boost::Thread library is available */
#undef HAVE_BOOST_THREAD
/* Define to 1 if you have <boost/archive/text_oarchive.hpp> */
#undef HAVE_BOOST_ARCHIVE_TEXT_OARCHIVE_HPP
/* Define to 1 if you have <boost/mpi/communicator.hpp> */
#undef HAVE_BOOST_MPI_COMMUNICATOR_HPP
/* Define to 1 if you have <boost/program_options.hpp> */
#undef HAVE_BOOST_PROGRAM_OPTIONS_HPP
/* Define to 1 if you have <boost/thread.hpp> */
#undef HAVE_BOOST_THREAD_HPP
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
@ -61,6 +70,9 @@
*/
#undef LT_OBJDIR
/* Define if compiling with MPI. */
#undef MPI_ENABLED
/* Name of package */
#undef PACKAGE

View File

@ -14,7 +14,29 @@ AC_PROG_LIBTOOL
# Shared library are disabled for default
#LT_INIT([disable-shared])
AX_XMLRPC_C
BOOST_REQUIRE([1.36.0])
BOOST_PROGRAM_OPTIONS
ac_have_mpi=no
AC_ARG_WITH(mpi,
AC_HELP_STRING([--with-mpi],
[Force compilation with MPI]),
[ if test $withval != no ; then
ac_have_mpi=yes
fi ] )
if test $ac_have_mpi = yes ; then
AC_PATH_PROG(CXX, mpic++, none)
if test $CXX = none ; then
AC_MSG_ERROR([Cannot locate MPI compiler drivers])
fi
BOOST_MPI
BOOST_SERIALIZATION
AC_DEFINE(MPI_ENABLED,1,[Define if compiling with MPI.])
CPPFLAGS="$CPPFLAGS -DMPI_ENABLED"
fi
#AX_XMLRPC_C
AC_ARG_WITH(protobuf,
[AC_HELP_STRING([--with-protobuf=PATH], [(optional) path to Google protobuf])],
@ -249,6 +271,6 @@ fi
LIBS="$LIBS -lz"
AC_CONFIG_FILES(Makefile OnDiskPt/src/Makefile moses/src/Makefile moses-chart/src/Makefile moses-cmd/src/Makefile moses-chart-cmd/src/Makefile misc/Makefile mert/Makefile server/Makefile CreateOnDisk/src/Makefile kenlm/Makefile)
AC_CONFIG_FILES(Makefile OnDiskPt/src/Makefile moses/src/Makefile moses-chart/src/Makefile moses-cmd/src/Makefile moses-chart-cmd/src/Makefile misc/Makefile josiah/Makefile mert/Makefile server/Makefile CreateOnDisk/src/Makefile kenlm/Makefile)
AC_OUTPUT()

View File

@ -0,0 +1,36 @@
#include "AnnealingSchedule.h"
#include "StaticData.h"
using namespace Moses;
using namespace std;
namespace Josiah {
AnnealingSchedule::~AnnealingSchedule() {}
LinearAnnealingSchedule::LinearAnnealingSchedule(int len, float max_temp) :
AnnealingSchedule(len), starting_temp(max_temp) {
VERBOSE(2, "Created LinearAnnealingSchedule:\n len=" << len << ", starting temp=" << max_temp << endl);
}
float LinearAnnealingSchedule::GetTemperatureAtTime(int time) const {
const float temp = max(1.0f, (starting_temp -
(static_cast<float>(time) * (starting_temp - 0.5f)) / static_cast<float>(GetLength())));
VERBOSE(3, "Time " << time << ": temp=" << temp << endl);
return temp;
}
ExponentialAnnealingSchedule::ExponentialAnnealingSchedule(float start_temp, float stop_temp, float floor_temp, float ratio) :
AnnealingSchedule(0), m_startTemp(start_temp), m_stopTemp(stop_temp), m_floorTemp(floor_temp) , m_ratio (ratio){
VERBOSE(2, "Created ExponentialAnnealingSchedule:\n starting temp=" << start_temp << ", stopping temp=" << stop_temp << ", floor temp=" << m_floorTemp << ", ratio: " << ratio << endl);
}
float ExponentialAnnealingSchedule::GetTemperatureAtTime(int time) const {
float curTemp = m_startTemp * pow(m_ratio, time);
if (curTemp < m_stopTemp)
return m_floorTemp;
return curTemp;
}
}

View File

@ -0,0 +1,39 @@
#pragma once
namespace Josiah {
class AnnealingSchedule {
public:
AnnealingSchedule(int length) : m_len(length) {}
virtual ~AnnealingSchedule();
inline int GetLength() const { return m_len; }
virtual float GetTemperatureAtTime(int time) const = 0;
private:
int m_len;
};
// cools linearly
class LinearAnnealingSchedule : public AnnealingSchedule {
public:
LinearAnnealingSchedule(int len, float max_temp);
virtual float GetTemperatureAtTime(int time) const;
private:
float starting_temp;
};
// cools exponentially
class ExponentialAnnealingSchedule : public AnnealingSchedule {
public:
ExponentialAnnealingSchedule(float start_temp, float stop_temp, float floor_temp, float ratio);
virtual float GetTemperatureAtTime(int time) const;
float GetFloorTemp() {return m_floorTemp;}
void SetFloorTemp(float f) { m_floorTemp = f;}
private:
float m_startTemp;
float m_stopTemp;
float m_floorTemp;
float m_ratio;
};
};

312
josiah/Bleu.cpp Normal file
View File

@ -0,0 +1,312 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "Bleu.h"
using namespace Moses;
using namespace std;
namespace Josiah {
/**
* Extract the ngrams in the given sentence, up to the BLEU_ORDER,
* clipping using the existing ngrams as necessary. */
static void ExtractNGrams(const Translation& sentence, NGramMap& ngrams) {
NGramMap newNgrams;
for (size_t start = 0; start < sentence.size(); ++start) {
Translation ngram;
for (size_t length = 1; length <= BLEU_ORDER; ++length) {
size_t position = start + length-1;
if (position < sentence.size()) {
ngram.push_back(sentence[position]);
++newNgrams[ngram];
} else {
break;
}
}
}
//clipping
for (NGramMap::const_iterator i = newNgrams.begin(); i != newNgrams.end(); ++i) {
Translation ngram = i->first;
if (ngrams[i->first] < i->second) {
ngrams[i->first] = i->second;
}
}
}
static BleuStats ExtractStats(const NGramMap& ref, const NGramMap& hyp) {
/*
cerr << "ref ngrams" << endl;
for (NGramMap::const_iterator ref_iter = ref.begin(); ref_iter != ref.end(); ++ref_iter) {
const Translation& ngram = ref_iter->first;
size_t count = ref_iter->second;
for (size_t i = 0; i < ngram.size(); ++i) {
cerr << *(ngram[i]) << " ";
}
cerr << count << endl;
}*/
BleuStats stats;
for (NGramMap::const_iterator hyp_iter = hyp.begin(); hyp_iter != hyp.end(); ++hyp_iter) {
const Translation& ngram = hyp_iter->first;
size_t count = hyp_iter->second;
size_t order = ngram.size();
stats.total(order, stats.total(order) + count);
NGramMap::const_iterator ref_iter = ref.find(ngram);
if (ref_iter != ref.end()) {
size_t matches = min(count, ref_iter->second);
stats.tp(order,stats.tp(order) + matches);
}
}
return stats;
}
Bleu::Bleu() : m_smoothingWeight(0) {
for (size_t i = 1; i <= BLEU_ORDER; ++i) {
m_smoothingStats.tp(i, BLEU_SMOOTHING);
m_smoothingStats.total(i, BLEU_SMOOTHING);
}
}
void Bleu::SetSmoothingWeight(float smoothingWeight) {
m_smoothingWeight = smoothingWeight;
}
float Bleu::GetSmoothingWeight() const {
return m_smoothingWeight;
}
GainFunctionHandle Bleu::GetGainFunction(const std::vector<size_t>& sentenceIds) {
return GainFunctionHandle(new BleuFunction(*this,sentenceIds));
}
void Bleu::AddReferences(const std::vector<Translation>& refs, const Translation& source) {
if (m_referenceLengths.size()) {
assert(m_referenceLengths[0].size() == refs.size());
}
m_sourceLengths.push_back(source.size());
m_referenceLengths.push_back(vector<size_t>());
m_referenceStats.push_back(NGramMap());
for (size_t i = 0; i < refs.size(); ++i) {
m_referenceLengths.back().push_back(refs[i].size());
ExtractNGrams(refs[i],m_referenceStats.back());
}
}
const NGramMap& Bleu::GetReferenceStats(size_t sentenceId) const {
return m_referenceStats.at(sentenceId);
}
const vector<size_t>& Bleu::GetReferenceLengths(size_t sentenceId) const {
return m_referenceLengths.at(sentenceId);
}
float Bleu::GetAverageReferenceLength(size_t sentenceId) const {
const vector<size_t>& lengths = GetReferenceLengths(sentenceId);
float total = 0.0f;
for (size_t i = 0; i < lengths.size(); ++i) {
total += lengths[i];
}
return total/lengths.size();
}
size_t Bleu::GetSourceLength(size_t sentenceId) const {
return m_sourceLengths.at(sentenceId);
}
const BleuStats& Bleu::GetSmoothingStats() const {
return m_smoothingStats;
}
void Bleu::AddSmoothingStats(const BleuStats& stats) {
//Chiang's update rule.
if (m_smoothingWeight) {
m_smoothingStats += stats;
m_smoothingStats *= m_smoothingWeight;
}
}
BleuFunction::BleuFunction(Bleu& bleu, const vector<size_t>& sentenceIds):
m_stats(bleu),m_sentenceIds(sentenceIds), m_smoothingStatsCount(0), m_cachedStats(sentenceIds.size())
{}
float BleuFunction::Evaluate(const std::vector<Translation>& hypotheses) const {
assert(hypotheses.size() == m_sentenceIds.size());
BleuStats totalStats;
for (size_t i = 0; i < hypotheses.size(); ++i) {
if (m_cachedStats[i].first != hypotheses[i]) {
//don't have this sentence cached
NGramMap hypNgrams;
ExtractNGrams(hypotheses[i], hypNgrams);
const NGramMap& refNgrams = m_stats.GetReferenceStats(m_sentenceIds[i]);
m_cachedStats[i] = pair<Translation,BleuStats>
(hypotheses[i],ExtractStats(refNgrams,hypNgrams));
//cerr << "SID " << m_sentenceIds[i] << " " << m_cachedStats[i].second << endl;
}
totalStats += m_cachedStats[i].second;
float src_len = m_stats.GetSourceLength(m_sentenceIds[i]);
float hyp_len = hypotheses[i].size();
const vector<size_t>& ref_lens = m_stats.GetReferenceLengths(m_sentenceIds[i]);
//closest length
float ref_len = ref_lens[0];
for (size_t j = 1; j < ref_lens.size(); ++j) {
if (abs(ref_len - hyp_len) > abs(ref_lens[j] - hyp_len)) {
ref_len = ref_lens[j];
}
}
totalStats.ref_len(totalStats.ref_len() + ref_len);
totalStats.hyp_len(totalStats.hyp_len() + hyp_len);
totalStats.src_len(totalStats.src_len() + src_len);
//cerr << totalStats << endl;
}
float log_bleu = 0;
const BleuStats& smoothing = m_stats.GetSmoothingStats();
for (size_t i = 1; i <= BLEU_ORDER; ++i) {
log_bleu = log_bleu + log(totalStats.tp(i) + smoothing.tp(i)) -
log(totalStats.total(i) + smoothing.total(i));
}
log_bleu /= BLEU_ORDER;
float ref_len = totalStats.ref_len() + smoothing.ref_len();
float hyp_len = totalStats.hyp_len() + smoothing.hyp_len();
float bp = 0;
if (hyp_len < ref_len) {
bp = 1 - ref_len / hyp_len;
}
log_bleu += bp;
//cerr << totalStats << endl;
//cerr << "bleu before scale: " << exp(log_bleu);
if (m_stats.GetSmoothingWeight()) {
//cerr << "smoothing " << smoothing << endl;
//cerr << "lb " << log_bleu;
//doing approx doc bleu
log_bleu += log(totalStats.src_len() + smoothing.src_len()) - log(hypotheses.size());
//cerr << " " << log_bleu << endl;
} else {
log_bleu += log(100);
}
//cerr << " After " << exp(log_bleu) << endl;
//cerr << totalStats << " " << exp(log_bleu) << endl;
return exp(log_bleu);
}
void BleuFunction::AddSmoothingStats(size_t sentenceId, const Translation& hypothesis) {
//Only calculating stats for one sentence
sentenceId = m_sentenceIds[sentenceId];
NGramMap hypNgrams;
ExtractNGrams(hypothesis,hypNgrams);
const NGramMap refNgrams = m_stats.GetReferenceStats(sentenceId);
BleuStats smoothStats = ExtractStats(refNgrams,hypNgrams);
smoothStats.src_len(m_stats.GetSourceLength(sentenceId));
smoothStats.hyp_len(hypothesis.size());
smoothStats.ref_len(m_stats.GetAverageReferenceLength(sentenceId));
m_smoothingStats += smoothStats;
++m_smoothingStatsCount;
}
void BleuFunction::UpdateSmoothingStats() {
m_smoothingStats /= m_smoothingStatsCount;
m_stats.AddSmoothingStats(m_smoothingStats);
m_smoothingStatsCount = 0;
m_smoothingStats.clear();
}
BleuStats::BleuStats() :
m_data(BLEU_ORDER*2+3) {}
void BleuStats::clear() {
m_data = valarray<float>(BLEU_ORDER*2+3);
}
float BleuStats::tp(size_t order) const {
return m_data[order*2-2];
}
void BleuStats::tp(size_t order, float val) {
m_data[order*2-2] = val;
}
float BleuStats::total(size_t order) const {
return m_data[order*2-1];
}
void BleuStats::total(size_t order, float val) {
m_data[order*2-1] = val;
}
float BleuStats::src_len() const {
return m_data[BLEU_ORDER*2];
}
void BleuStats::src_len(float val) {
m_data[BLEU_ORDER*2] = val;
}
float BleuStats::ref_len() const {
return m_data[BLEU_ORDER*2+1];
}
void BleuStats::ref_len(float val) {
m_data[BLEU_ORDER*2+1] = val;
}
float BleuStats::hyp_len() const {
return m_data[BLEU_ORDER*2+2];
}
void BleuStats::hyp_len(float val) {
m_data[BLEU_ORDER*2+2] = val;
}
void BleuStats::operator+=(const BleuStats& rhs) {
m_data += rhs.m_data;
}
void BleuStats::operator*=(float scalar) {
m_data *= scalar;
}
void BleuStats::operator/=(float scalar) {
m_data /= scalar;
}
void BleuStats::write(ostream& out) const {
out << "{";
for (size_t i = 0; i < m_data.size(); ++i) {
out << m_data[i];
if (i < m_data.size()-1) out << ",";
}
out << "}";
}
ostream& operator<<(ostream& out, const BleuStats& stats) {
stats.write(out);
return out;
}
}

117
josiah/Bleu.h Normal file
View File

@ -0,0 +1,117 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <map>
#include <valarray>
#include <vector>
#include <boost/unordered_map.hpp>
#include "Gain.h"
#define BLEU_ORDER 4
#define BLEU_SMOOTHING 0.01
namespace Josiah {
typedef boost::unordered_map<std::vector<const Moses::Factor*>, size_t> NGramMap;
class BleuStats {
public:
BleuStats();
void clear();
float tp(size_t order) const;
void tp(size_t order, float val);
float total(size_t order) const;
void total(size_t order, float val);
float src_len() const;
void src_len(float val);
float ref_len() const;
void ref_len(float val);
float hyp_len() const;
void hyp_len(float val);
void operator+=(const BleuStats& rhs);
void operator*=(float scalar);
void operator/=(float scalar);
void write(std::ostream& out) const;
private:
std::valarray<float> m_data;
};
class Bleu : public Gain {
public:
Bleu();
virtual GainFunctionHandle GetGainFunction(const std::vector<size_t>& sentenceIds);
virtual void AddReferences(const std::vector<Translation>& refs, const Translation& source);
virtual float GetAverageReferenceLength(size_t sentenceId) const;
const NGramMap& GetReferenceStats(size_t sentenceId) const;
const std::vector<size_t>& GetReferenceLengths(size_t sentenceId) const;
size_t GetSourceLength(size_t sentenceId) const;
/** Update the overall smoothing stats */
const BleuStats& GetSmoothingStats() const;
/** Get the stats for smoothing the current sentence */
void AddSmoothingStats(const BleuStats& stats);
/** The decay constant for Chiang smoothing. Zero indicates no smoothing */
void SetSmoothingWeight(float smoothingWeight);
float GetSmoothingWeight() const;
private:
std::vector<NGramMap> m_referenceStats;
std::vector<std::vector<size_t> > m_referenceLengths;
std::vector<size_t> m_sourceLengths;
BleuStats m_smoothingStats;
float m_smoothingWeight;
};
class BleuFunction : public GainFunction {
public:
BleuFunction(Bleu& bleu, const std::vector<size_t>& sentenceIds);
virtual float Evaluate(const std::vector<Translation>& hypotheses) const;
/** Add the stats for this hypothesis to the smoothing stats being collected */
virtual void AddSmoothingStats(size_t sentenceId, const Translation& hypothesis);
/** Inform the GainFunction that we've finished with this sentence, and it can now
update the parent's stats */
virtual void UpdateSmoothingStats();
private:
Bleu& m_stats;
std::vector<size_t> m_sentenceIds;
//smoothing stats collected for this batch
BleuStats m_smoothingStats;
size_t m_smoothingStatsCount;
mutable std::vector<std::pair<Translation, BleuStats> > m_cachedStats;
};
std::ostream& operator<<(std::ostream& out, const BleuStats& stats);
}

286
josiah/CorpusSampler.cpp Normal file
View File

@ -0,0 +1,286 @@
#include "CorpusSampler.h"
#include "Decoder.h"
#include "Hypothesis.h"
#include "GibbsOperator.h"
#ifdef MPI_ENABLED
#include <boost/mpi/communicator.hpp>
#include <boost/mpi/collectives.hpp>
#include <boost/serialization/vector.hpp>
namespace mpi = boost::mpi;
#endif
using namespace std;
namespace Josiah {
void CorpusSamplerCollector::collect(Sample& s) {
//nothing to do
}
//Resample based on derivation distribution
void CorpusSamplerCollector::resample(int sent) {
std::map<const Derivation*,double> m_p, m_resampled_p;
m_derivationCollector.getDistribution(m_p); //fetch the distribution
//copy it to a vector, will be easier for further processing
vector<const Derivation*> derivations;
vector<double> scores;
for (map<const Derivation*,double>::iterator it = m_p.begin(); it != m_p.end(); ++it) {
derivations.push_back(it->first);
scores.push_back(it->second);
}
//Printing out distribution
IFVERBOSE(2) {
for (size_t i = 0; i < derivations.size();++i) {
cerr << *derivations[i] << " has score " << scores[i] <<endl;
}
}
double sum = scores[0];
for (size_t i = 1; i < scores.size(); ++i) {
sum = log_sum(sum,scores[i]);
}
transform(scores.begin(),scores.end(),scores.begin(),bind2nd(minus<double>(),sum));
//now sample from this
for (int j = 0; j < m_samples; ++j) {
//random number between 0 and 1
double random = RandomNumberGenerator::instance().next();//(double)rand() / RAND_MAX;
random = log(random);
//now figure out which sample
size_t position = 1;
sum = scores[0];
for (; position < scores.size() && sum < random; ++position) {
sum = log_sum(sum,scores[position]);
}
size_t chosen = position-1;
MPI_VERBOSE(2, "Chosen derivation " << chosen << endl)
//Store chosen derivation's feature values and length
const Derivation* chosenDeriv = derivations[chosen];
m_resampled_p[chosenDeriv] += 1.0/m_samples;
MPI_VERBOSE(2, "Chosen deriv " << *chosenDeriv << endl)
MPI_VERBOSE(2, "Chosen deriv size" << chosenDeriv->getTargetSentenceSize() << endl)
m_featureVectors.at(j) += chosenDeriv->getFeatureValues();
MPI_VERBOSE(2, "Feature vector : " << m_featureVectors.at(j) << endl)
m_lengths[j] += chosenDeriv->getTargetSentenceSize();
MPI_VERBOSE(2, "Lengths : " << m_lengths.at(j) << endl)
//Store chosen derivation's gain sufficient stats
SufficientStats *stats = new BleuSufficientStats(4);
std::vector<const Factor*> yield;
chosenDeriv->getTargetFactors(yield);
g[sent]->GetSufficientStats(yield, stats);
m_sufficientStats[j] += *(static_cast<BleuSufficientStats*>(stats));
MPI_VERBOSE(2, "Stats : " << m_sufficientStats.at(j) << endl)
delete stats;
}
IFVERBOSE(2) {
cerr << "After resampling, distribution is : " << endl;
for (map<const Derivation*,double>::iterator it = m_resampled_p.begin(); it != m_resampled_p.end(); ++it) {
cerr << *(it->first) << "has score " << it->second << endl;
}
}
setRegularisation(m_p);
setRegularisationGradientFactor(m_p);
//Now reset the derivation collector
m_derivationCollector.reset();
m_numSents++;
}
#ifdef MPI_ENABLED
template<class T>
struct VectorPlus {
vector<T> operator()(const vector<T>& lhs, const vector<T>& rhs) const {
assert(lhs.size() == rhs.size());
vector<T> sum(lhs.size());
for (size_t i = 0; i < lhs.size(); ++i) sum.push_back(lhs[i] + rhs[i]);
return sum;
}
};
void CorpusSamplerCollector::AggregateSamples(int rank) {
AggregateSuffStats(rank);
}
void CorpusSamplerCollector::AggregateSuffStats(int rank) {
/*what do we need to store?
1. Feature Vectors
2. Lengths
3. Bleu Stats
*/
vector <int> lengths (m_lengths.size());
FVector featsVecs, recFeatsVecs;
FVector suffStats, recSuffStats;
int numSents;
mpi::communicator world;
//Reduce length
mpi::reduce(world,m_lengths,lengths,VectorPlus<int>(),0);
// if (MPI_SUCCESS != MPI_Reduce(const_cast<int*>(&m_lengths[0]), &lengths[0], m_lengths.size(), MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
//Reduce numSents
// if (MPI_SUCCESS != MPI_Reduce(&m_numSents, &numSents, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
mpi::reduce(world,m_numSents,numSents,std::plus<int>(),0);
//Reduce feature vectors and sufficient stats
mpi::reduce(world,m_featureVectors,m_featureVectors,VectorPlus<FVector>(),0);
mpi::reduce(world,m_sufficientStats,m_sufficientStats,VectorPlus<BleuSufficientStats>(),0);
//MPI can't handle vector of vectors, so first concatenate elements together
/*
//Concatenate feature vectors
for (size_t i = 0; i < m_featureVectors.size(); ++i) {
for (size_t j = 0; j < m_featureVectors[i].size(); ++j) {
featsVecs.push_back(m_featureVectors[i][j]);
}
}
//Concatenate sufficient stats
for (size_t i = 0; i < m_sufficientStats.size(); ++i) {
vector < float > bleuStats = m_sufficientStats[i].data();
for (size_t j = 0; j < bleuStats.size(); ++j) {
suffStats.push_back(bleuStats[j]);
}
}
if (rank == 0) {
recFeatsVecs.resize(featsVecs.size());
recSuffStats.resize(suffStats.size());
}
//Reduce FVs and SStats
if (MPI_SUCCESS != MPI_Reduce(const_cast<float*>(&featsVecs[0]), &recFeatsVecs[0], featsVecs.size(), MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
if (MPI_SUCCESS != MPI_Reduce(const_cast<float*>(&suffStats[0]), &recSuffStats[0], suffStats.size(), MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
//Unpack FVs and SStats,
if (rank == 0 ) {
//FVs
size_t numFeats = recFeatsVecs.size() / m_featureVectors.size();
m_featureVectors.clear();
for (size_t i = 0; i < recFeatsVecs.size(); i += numFeats) {
vector<float> features(recFeatsVecs.begin() + i, recFeatsVecs.begin() + i + numFeats);
ScoreComponentCollection feats(features);
m_featureVectors.push_back(feats);
}
//Suff Stats
size_t sizeStats = recSuffStats.size() / m_sufficientStats.size();
m_sufficientStats.clear();
for (size_t i = 0; i < recSuffStats.size(); i += sizeStats) {
vector<float> _stats(recSuffStats.begin() + i, recSuffStats.begin() + i + sizeStats);
BleuSufficientStats stats(_stats);
m_sufficientStats.push_back(stats);
}
*/
//Transfer lengths back
if (rank == 0) {
m_lengths = lengths;
m_numSents = numSents;
}
}
#endif
float CorpusSamplerCollector::UpdateGradient(FVector* gradient,FValue *exp_len, FValue *unreg_exp_gain) {
FVector feature_expectations = getFeatureExpectations();
MPI_VERBOSE(1,"FEXP: " << feature_expectations << endl)
//gradient computation
FVector grad;
FValue exp_gain = 0;
FValue gain = 0.0;
for (size_t i = 0; i < m_featureVectors.size() ; ++i) {
FVector fv = m_featureVectors[i];
MPI_VERBOSE(2,"FV: " << fv)
gain = SentenceBLEU::CalcBleu(m_sufficientStats[i], false);
fv -= feature_expectations;
MPI_VERBOSE(2,"DIFF: " << fv)
fv *= gain;
MPI_VERBOSE(2,"GAIN: " << gain << endl);
exp_gain += gain;
grad += fv;
MPI_VERBOSE(2,"grad: " << grad << endl);
}
grad /= m_featureVectors.size();
exp_gain /= m_featureVectors.size();
cerr << "Gradient without reg " << grad << endl;
FVector regularizationGrad = getRegularisationGradientFactor();
regularizationGrad /= GetNumSents();
grad += regularizationGrad;
cerr << "Exp gain without reg term : " << exp_gain << endl;
*unreg_exp_gain = exp_gain;
exp_gain += getRegularisation()/GetNumSents();
cerr << "Exp gain with reg term: " << exp_gain << endl;
*gradient += grad;
MPI_VERBOSE(1,"Gradient: " << grad << endl)
cerr << "Gradient: " << grad << endl;
//expected length
if (exp_len) {
*exp_len = 0;
for (size_t j = 0; j < m_sufficientStats.size(); ++j) {
*exp_len += m_sufficientStats[j].hyp_len;
}
}
return exp_gain;
}
FVector CorpusSamplerCollector::getFeatureExpectations() const {
FVector sum;
for (size_t i = 0; i < m_featureVectors.size(); ++i) {
sum += m_featureVectors[i];
}
return sum;
}
void CorpusSamplerCollector::reset() {
m_featureVectors.clear(); m_featureVectors.resize(m_samples);
m_lengths.clear(); m_lengths.resize(m_samples);
m_sufficientStats.clear(); m_sufficientStats.resize(m_samples);
m_numSents = 0;
}
float CorpusSamplerCollector::getReferenceLength() {
float refLen(0.0);
for (size_t j = 0; j < m_sufficientStats.size(); ++j) {
refLen += m_sufficientStats[j].ref_len;
}
return refLen;
}
}

62
josiah/CorpusSampler.h Normal file
View File

@ -0,0 +1,62 @@
#pragma once
#include <map>
#include <utility>
#include "MpiDebug.h"
#include "FeatureVector.h"
#include "GibblerExpectedLossTraining.h"
#include "GibblerMaxDerivDecoder.h"
#include "Phrase.h"
#include "Sampler.h"
#ifdef MPI_ENABLED
#include <mpi.h>
#endif
using namespace Moses;
namespace Josiah {
class Sampler;
class Derivation;
class CorpusSamplerCollector : public ExpectedLossCollector {
public:
CorpusSamplerCollector(int samples, Sampler &sampler): ExpectedLossCollector(),
m_samples(samples), m_numSents(0) {
sampler.AddCollector(&m_derivationCollector);
m_featureVectors.resize(m_samples);
m_lengths.resize(m_samples);
m_sufficientStats.resize(m_samples);
}
virtual ~CorpusSamplerCollector() {}
virtual void collect(Sample& sample);
virtual void resample(int);
virtual FValue UpdateGradient(FVector* gradient, FValue* exp_len, FValue* unreg_exp_gain);
#ifdef MPI_ENABLED
virtual void AggregateSamples(int);
#endif
virtual void reset();
float getReferenceLength();
virtual void setRegularisationGradientFactor(std::map<const Derivation*,double>& m_p) {}
virtual void setRegularisation(std::map<const Derivation*,double>& m_p) {}
virtual FVector getRegularisationGradientFactor() {return FVector();}
virtual FValue getRegularisation() {return 0.0;}
private:
std::vector<FVector> m_featureVectors;
std::vector <int> m_lengths;
std::vector <BleuSufficientStats> m_sufficientStats;
DerivationCollector m_derivationCollector;
const int m_samples;
FVector getFeatureExpectations() const;
int m_numSents;
int GetNumSents() { return m_numSents;}
protected:
void AggregateSuffStats(int);
};
}

View File

@ -0,0 +1,81 @@
#include "CorpusSamplerAnnealed.h"
#include "Hypothesis.h"
#include "Derivation.h"
#ifdef MPI_ENABLED
#include <boost/mpi/communicator.hpp>
#include <boost/mpi/collectives.hpp>
namespace mpi=boost::mpi;
#endif
using namespace std;
namespace Josiah {
FVector CorpusSamplerAnnealedCollector::getExpectedFeatureValue(std::map<const Derivation*,double>& m_p) {
FVector expFV;
for (std::map<const Derivation*,double>::const_iterator it = m_p.begin(); it != m_p.end(); ++it) {
const Derivation* deriv = it->first;
expFV += (deriv->getFeatureValues() * (it->second));
}
return expFV;
}
void CorpusSamplerAnnealedCollector::setRegularisationGradientFactor(std::map<const Derivation*,double>& m_p) {
double temperature = GetTemperature();
FVector expFV = getExpectedFeatureValue(m_p);
//cerr << "Expected FV " << expFV << endl;
float entropy_factor;
for (std::map<const Derivation*,double>::const_iterator it = m_p.begin(); it != m_p.end(); ++it) {
entropy_factor = -temperature * it->second * (log (it->second)+1);
//cerr << "Entropy factor " << entropy_factor << endl;
FVector fv = it->first->getFeatureValues();
fv -= expFV;
fv *= entropy_factor;
m_gradient += fv;
}
//cerr << "Gradient regularization " << m_gradient << endl;
}
void CorpusSamplerAnnealedCollector:: setRegularisation(std::map<const Derivation*,double>& m_p) {
float entropy(0.0);
for (std::map<const Derivation*,double>::const_iterator it = m_p.begin(); it != m_p.end(); ++it) {
entropy -= it->second*log(it->second);
}
m_regularisation += GetTemperature() * entropy;
}
#ifdef MPI_ENABLED
void CorpusSamplerAnnealedCollector::AggregateSamples(int rank) {
AggregateRegularisationStats(rank);
AggregateSuffStats(rank);
}
void CorpusSamplerAnnealedCollector::AggregateRegularisationStats(int rank) {
FVector regularizationGrad;
float regularizationFactor;
FVector recvRegGrad;
//Reduce regularization
float reg = getRegularisation();
MPI_VERBOSE(1, "Regualarization for rank " << rank << " = " << reg << endl);
mpi::communicator world;
mpi::reduce(world,reg,regularizationFactor,std::plus<float>(),0);
//if (MPI_SUCCESS != MPI_Reduce(&reg, &regularizationFactor, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
//Reduce regularization gradient
MPI_VERBOSE(1, "Regualarization grad for rank " << rank << " = " << getRegularisationGradientFactor() << endl);
mpi::reduce(world,getRegularisationGradientFactor(), m_gradient ,FVectorPlus(),0);
//if (MPI_SUCCESS != MPI_Reduce(const_cast<float*>(&regGrad[0]), &recvRegGrad[0], regGrad.size(), MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
if (rank == 0 ) {
m_regularisation = regularizationFactor;
}
MPI_VERBOSE(1, "After agg, Regualarization for rank " << rank << " = " << m_regularisation << endl);
MPI_VERBOSE(1, "After agg, Regualarization grad for rank " << rank << " = " << m_gradient << endl);
}
#endif
}

View File

@ -0,0 +1,56 @@
#pragma once
#include <map>
#include <utility>
#include <ext/hash_map>
#include "FeatureVector.h"
#include "CorpusSampler.h"
#include "Phrase.h"
#ifdef MPI_ENABLED
#include <mpi.h>
#endif
using namespace Moses;
namespace Josiah {
class Derivation;
class GainFunction;
class CorpusSamplerCollector;
class CorpusSamplerAnnealedCollector : public CorpusSamplerCollector {
public:
CorpusSamplerAnnealedCollector(int samples, Sampler &sampler)
: CorpusSamplerCollector(samples, sampler), m_regularisation(0.0) {
}
float GetTemperature() { return m_temp;}
void SetTemperature(float temp) {m_temp = temp;}
virtual FVector getRegularisationGradientFactor() {
return m_gradient;
}
virtual float getRegularisation() {
return m_regularisation;
}
virtual void reset() {
CorpusSamplerCollector::reset();
m_regularisation = 0.0;
m_gradient.clear();
}
virtual void setRegularisationGradientFactor(std::map<const Derivation*,double>& m_p);
virtual void setRegularisation(std::map<const Derivation*,double>& m_p);
#ifdef MPI_ENABLED
virtual void AggregateSamples(int rank);
#endif
private:
FValue m_temp, m_regularisation;
FVector m_gradient;
FVector getExpectedFeatureValue(std::map<const Derivation*,double>& m_p);
#ifdef MPI_ENABLED
void AggregateRegularisationStats(int rank);
#endif
};
}

486
josiah/CorpusTrainer.cpp Normal file
View File

@ -0,0 +1,486 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <algorithm>
#include <functional>
#include <iostream>
#include <iomanip>
#include <fstream>
#ifdef MPI_ENABLED
#include <mpi.h>
#endif
#include <boost/program_options.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/algorithm/string.hpp>
#include "AnnealingSchedule.h"
#include "Decoder.h"
#include "Derivation.h"
#include "Gibbler.h"
#include "InputSource.h"
#include "TrainingSource.h"
#include "FeatureVector.h"
#include "GibbsOperator.h"
#include "SentenceBleu.h"
#include "GainFunction.h"
#include "CorpusSampler.h"
#include "CorpusSamplerAnnealed.h"
#include "GibblerMaxTransDecoder.h"
#include "MpiDebug.h"
#include "StaticData.h"
#include "Optimizer.h"
#include "Selector.h"
#include "TranslationDelta.h"
#include "Utils.h"
using namespace std;
using namespace Josiah;
using namespace Moses;
using boost::lexical_cast;
using boost::bad_lexical_cast;
using boost::split;
using boost::is_any_of;
namespace po = boost::program_options;
/**
* Main for Josiah - the Gibbs sampler for moses.
**/
int main(int argc, char** argv) {
int rank = 0, size = 1;
#ifdef MPI_ENABLED
MPI_Init(&argc,&argv);
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Comm_rank(comm,&rank);
MPI_Comm_size(comm,&size);
cerr << "MPI rank: " << rank << endl;
cerr << "MPI size: " << size << endl;
#endif
GibbsTimer timer;
size_t iterations;
unsigned int topn;
int debug;
int mpidebug;
string mpidebugfile;
string feature_file;
int burning_its;
int mbr_size;
string inputfile;
string outputfile;
string mosesini;
bool decode;
bool translate;
bool translation_distro;
bool derivation_distro;
bool help;
bool expected_cbleu;
unsigned training_batch_size;
bool mbr_decoding;
bool do_timing;
int max_training_iterations;
int num_samples;
uint32_t seed;
int lineno;
bool randomize;
FValue scalefactor;
FValue eta;
FValue mu;
string weightfile;
vector<string> ref_files;
int periodic_decode;
bool collect_dbyt;
bool output_max_change;
bool anneal;
unsigned int reheatings;
float max_temp;
float prior_variance;
float prior_mean;
string prev_gradient_file;
bool expected_cbleu_da;
float start_temp_expda;
float stop_temp_expda;
float floor_temp_expda;
float anneal_ratio_da;
float gamma;
bool use_metanormalized_egd;
int optimizerFreq;
float brev_penalty_scaling_factor;
bool hack_bp_denum;
int weight_dump_freq;
string weight_dump_stem;
int init_iteration_number;
bool greedy, fixedTemp;
float fixed_temperature;
vector<string> ngramorders;
size_t lag;
float flip_prob, merge_split_prob, retrans_prob;
float log_base_factor;
po::options_description desc("Allowed options");
desc.add_options()
("help",po::value( &help )->zero_tokens()->default_value(false), "Print this help message and exit")
("config,f",po::value<string>(&mosesini),"Moses ini file")
("verbosity,v", po::value<int>(&debug)->default_value(0), "Verbosity level")
("mpi-debug", po::value<int>(&MpiDebug::verbosity)->default_value(0), "Verbosity level for debugging messages used in mpi.")
("mpi-debug-file", po::value<string>(&mpidebugfile), "Debug file stem for use by mpi processes")
("random-seed,e", po::value<uint32_t>(&seed), "Random seed")
("timing,m", po::value(&do_timing)->zero_tokens()->default_value(false), "Display timing information.")
("max-samples", po::value<size_t>(&iterations)->default_value(5),
"How many samples to gather initially (before resampling step)")
("samples,s", po::value<int>(&num_samples)->default_value(5), "Number of samples used for training")
("burn-in,b", po::value<int>(&burning_its)->default_value(1), "Duration (in sampling iterations) of burn-in period")
("scale-factor,c", po::value<float>(&scalefactor)->default_value(1.0), "Scale factor for model weights.")
("input-file,i",po::value<string>(&inputfile),"Input file containing tokenised source")
("output-file-prefix,o",po::value<string>(&outputfile),"Output file prefix for translations, MBR output, etc")
("nbest-drv,n",po::value<unsigned int>(&topn)->default_value(0),"Write the top n derivations to stdout")
("weights,w",po::value<string>(&weightfile),"Weight file")
("decode-derivation,d",po::value( &decode)->zero_tokens()->default_value(false),"Write the most likely derivation to stdout")
("decode-translation,t",po::value(&translate)->zero_tokens()->default_value(false),"Write the most likely translation to stdout")
("distro-derivation", po::value(&derivation_distro)->zero_tokens()->default_value(false), "Print derivation probability distribution")
("distro-translation", po::value(&translation_distro)->zero_tokens()->default_value(false), "Print translation probability distribution")
("periodic-derivation,p",po::value(&periodic_decode)->default_value(0), "Periodically write the max derivation to stderr")
("max-change", po::value(&output_max_change)->zero_tokens()->default_value(false), "Whenever the max deriv or max trans changes, write it to stderr")
("collect-dbyt",po::value(&collect_dbyt)->zero_tokens()->default_value(false), "Collect derivations per translation")
("line-number,L", po::value(&lineno)->default_value(0), "Starting reference/line number")
("randomize-batches,R", po::value(&randomize)->zero_tokens()->default_value(false), "Randomize training batches")
("gaussian-prior-variance", po::value<float>(&prior_variance)->default_value(0.0f), "Gaussian prior variance (0 for no prior)")
("gaussian-prior-mean,P", po::value<float>(&prior_mean), "Gaussian prior mean")
("expected-bleu-training,T", po::value(&expected_cbleu)->zero_tokens()->default_value(false), "Train to maximize expected corpus BLEU")
("max-training-iterations,M", po::value(&max_training_iterations)->default_value(30), "Maximum training iterations")
("training-batch-size,S", po::value(&training_batch_size)->default_value(0), "Batch size to use during xpected bleu training, 0 = full corpus")
("reheatings", po::value<unsigned int>(&reheatings)->default_value(1), "Number of times to reheat the sampler")
("anneal,a", po::value(&anneal)->default_value(false)->zero_tokens(), "Use annealing during the burn in period")
("max-temp", po::value<float>(&max_temp)->default_value(4.0), "Annealing maximum temperature")
("eta", po::value<FValue>(&eta)->default_value(0.0), "Default learning rate for SGD/EGD")
("prev-gradient", po::value<string>(&prev_gradient_file), "File containing previous gradient for restarting SGD/EGD")
("mu", po::value<FValue>(&mu)->default_value(1.0f), "Metalearning rate for EGD")
("gamma", po::value<FValue>(&gamma)->default_value(0.9f), "Smoothing parameter for Metanormalized EGD ")
("ref,r", po::value<vector<string> >(&ref_files), "Reference translation files for training")
("extra-feature-config,X", po::value<string>(&feature_file), "Configuration file for extra (non-Moses) features")
("use-metanormalized-egd,N", po::value(&use_metanormalized_egd)->zero_tokens()->default_value(false), "Use metanormalized EGD")
("expected-bleu-deterministic-annealing-training,D", po::value(&expected_cbleu_da)->zero_tokens()->default_value(false), "Train to maximize expected corpus BLEU using deterministic annealing")
("optimizer-freq", po::value<int>(&optimizerFreq)->default_value(1),"Number of optimization to perform at given temperature")
("initial-det-anneal-temp", po::value<float>(&start_temp_expda)->default_value(1000.0f), "Initial deterministic annealing entropy temperature")
("final-det-anneal-temp", po::value<float>(&stop_temp_expda)->default_value(0.001f), "Final deterministic annealing entropy temperature")
("floor-temp", po::value<float>(&floor_temp_expda)->default_value(0.0f), "Floor temperature for det annealing")
("det-annealing-ratio,A", po::value<float>(&anneal_ratio_da)->default_value(0.5f), "Deterministc annealing ratio")
("hack-bp-denum,H", po::value(&hack_bp_denum)->default_value(false), "Use a predefined scalar as denum in BP computation")
("bp-scale,B", po::value<float>(&brev_penalty_scaling_factor)->default_value(1.0f), "Scaling factor for sent level brevity penalty for BLEU - default is 1.0")
("weight-dump-freq", po::value<int>(&weight_dump_freq)->default_value(0), "Frequency to dump weight files during training")
("weight-dump-stem", po::value<string>(&weight_dump_stem)->default_value("weights"), "Stem of filename to use for dumping weights")
("init-iteration-number", po::value<int>(&init_iteration_number)->default_value(0), "First training iteration will be one after this (useful for restarting)")
("greedy", po::value(&greedy)->zero_tokens()->default_value(false), "Greedy sample acceptor")
("fixed-temp-accept", po::value(&fixedTemp)->zero_tokens()->default_value(false), "Fixed temperature sample acceptor")
("fixed-temperature", po::value<float>(&fixed_temperature)->default_value(1.0f), "Temperature for fixed temp sample acceptor")
("lag", po::value<size_t>(&lag)->default_value(10), "Lag between collecting samples")
("flip-prob", po::value<float>(&flip_prob)->default_value(0.6f), "Probability of applying flip operator during random scan")
("merge-split-prob", po::value<float>(&merge_split_prob)->default_value(0.2f), "Probability of applying merge-split operator during random scan")
("retrans-prob", po::value<float>(&retrans_prob)->default_value(0.2f), "Probability of applying retrans operator during random scan")
("log-base-factor", po::value<float>(&log_base_factor)->default_value(1.0f), "Scaling factor for log probabilities in translation and language models");
po::options_description cmdline_options;
cmdline_options.add(desc);
po::variables_map vm;
po::store(po::command_line_parser(argc,argv).
options(cmdline_options).run(), vm);
po::notify(vm);
if (help) {
std::cout << "Usage: " + string(argv[0]) + " -f mosesini-file [options]" << std::endl;
std::cout << desc << std::endl;
return 0;
}
if (weightfile.empty()) {
std::cerr << "Setting all feature weights to zero" << std::endl;
WeightManager::init();
} else {
std::cerr << "Loading feature weights from " << weightfile << std::endl;
WeightManager::init(weightfile);
}
if (expected_cbleu && expected_cbleu_da) {
std::cerr << "Incorrect usage: Cannot do both expected bleu training and expected bleu deterministic annealing training" << std::endl;
return 0;
}
float opProb = flip_prob + merge_split_prob + retrans_prob;
if (fabs(1.0 - opProb) > 0.00001) {
std::cerr << "Incorrect usage: specified operator probs should sum up to 1" << std::endl;
return 0;
}
if (translation_distro) translate = true;
if (derivation_distro) decode = true;
if (mosesini.empty()) {
cerr << "Error: No moses ini file specified" << endl;
return 1;
}
if (mpidebugfile.length()) {
MpiDebug::init(mpidebugfile,rank);
}
cerr << "optimizer freq " << optimizerFreq << endl;
assert(optimizerFreq != 0);
if (do_timing) {
timer.on();
}
if (log_base_factor != 1.0) {
cerr << "Setting log base factor to " << log_base_factor << endl;
SetLogBaseFactor(log_base_factor);
}
//set up moses
initMoses(mosesini,debug);
auto_ptr<Decoder> decoder(new RandomDecoder());
feature_vector extra_features;
configure_features_from_file(feature_file, extra_features);
std::cerr << "Using " << extra_features.size() << " features" << std::endl;
//scale model weights
vector<float> weights = StaticData::Instance().GetAllWeights();
transform(weights.begin(),weights.end(),weights.begin(),bind2nd(multiplies<float>(),scalefactor));
const_cast<StaticData&>(StaticData::Instance()).SetAllWeights(weights);
VERBOSE(1,"Scaled weights by factor of " << scalefactor << endl);
if (vm.count("random-seed")) {
RandomNumberGenerator::instance().setSeed(seed + rank);
}
GainFunctionVector g;
if (ref_files.size() > 0) LoadReferences(ref_files, inputfile, &g, brev_penalty_scaling_factor, hack_bp_denum);
ostream* out = &cout;
if (!outputfile.empty()) {
ostringstream os;
os << setfill('0');
os << outputfile << '.' << setw(3) << rank << "_of_" << size;
VERBOSE(1, "Writing output to: " << os.str() << endl);
out = new ofstream(os.str().c_str());
}
auto_ptr<istream> in;
auto_ptr<InputSource> input;
auto_ptr<Optimizer> optimizer;
FVector etaVector(eta);
FVector prev_gradient;
if (!prev_gradient_file.empty()) {
prev_gradient.load(prev_gradient_file);
}
if (use_metanormalized_egd) {
optimizer.reset(new MetaNormalizedExponentiatedGradientDescent(
etaVector,
mu,
0.1f, // minimal step scaling factor
gamma,
max_training_iterations,
prev_gradient));
} else {
optimizer.reset(new ExponentiatedGradientDescent(
etaVector,
mu,
0.1f, // minimal step scaling factor
max_training_iterations,
prev_gradient));
}
if (optimizer.get()) {
optimizer->SetIteration(init_iteration_number);
}
if (prior_variance != 0.0f) {
assert(prior_variance > 0);
std::cerr << "Using Gaussian prior: \\sigma^2=" << prior_variance << " \\mu=" << prior_mean << endl;
optimizer->SetUseGaussianPrior(prior_mean, prior_variance);
}
ExpectedBleuTrainer* trainer = NULL;
vector<string> input_lines;
ifstream infiles(inputfile.c_str());
assert (infiles);
while(infiles) {
string line;
getline(infiles, line);
if (line.empty() && infiles.eof()) break;
assert(!line.empty());
input_lines.push_back(line);
}
VERBOSE(1, "Loaded " << input_lines.size() << " lines in training mode" << endl);
if (!training_batch_size || training_batch_size > input_lines.size())
training_batch_size = input_lines.size();
VERBOSE(1, "Batch size: " << training_batch_size << endl);
trainer = new ExpectedBleuTrainer(rank, size, training_batch_size, &input_lines, seed, randomize, optimizer.get(), weight_dump_freq, weight_dump_stem);
input.reset(trainer);
auto_ptr<SamplingSelector> selector(new SamplingSelector());
auto_ptr<AnnealingSchedule> annealingSchedule;
if (anneal) {
annealingSchedule.reset(new LinearAnnealingSchedule(burning_its, max_temp));
selector->SetAnnealingSchedule(annealingSchedule.get());
}
auto_ptr<AnnealingSchedule> detAnnealingSchedule;
if (expected_cbleu_da) {
detAnnealingSchedule.reset(new ExponentialAnnealingSchedule(start_temp_expda, stop_temp_expda, floor_temp_expda, anneal_ratio_da));
}
auto_ptr<CorpusSamplerCollector> elCollector;
Sampler sampler;
//configure the sampler
sampler.SetSelector(selector.get());
VERBOSE(2,"Reheatings: " << reheatings << endl);
sampler.SetReheatings(reheatings);
sampler.SetLag(lag); //thinning factor for sample collection
MergeSplitOperator mso(merge_split_prob);
FlipOperator fo(flip_prob);
TranslationSwapOperator tso(retrans_prob);
sampler.AddOperator(&mso);
sampler.AddOperator(&tso);
sampler.AddOperator(&fo);
//Acceptor
if (greedy || fixed_temperature == 0) {
assert(!"greedy not supported");
}
else if (fixedTemp){
assert(!"fixed temp not supported");
}
sampler.SetIterations(iterations);
sampler.SetBurnIn(burning_its);
if (expected_cbleu) {
elCollector.reset(new CorpusSamplerCollector(num_samples, sampler));
sampler.AddCollector(elCollector.get());
}
else if (expected_cbleu_da) {
elCollector.reset(new CorpusSamplerAnnealedCollector(num_samples, sampler));
sampler.AddCollector(elCollector.get());
}
timer.check("Processing input file");
int sentCtr = 0;
while (input->HasMore()) {
string line;
input->GetSentence(&line, &lineno);
if (line.empty()) {
if (!input->HasMore()) continue;
assert(!"I don't like empty lines");
}
elCollector->addGainFunction(&(g[lineno]));
//Set the annealing temperature
if (expected_cbleu_da) {
int it = optimizer->GetIteration() / optimizerFreq ;
float temp = detAnnealingSchedule->GetTemperatureAtTime(it);
CorpusSamplerAnnealedCollector* annealedELCollector = static_cast<CorpusSamplerAnnealedCollector*>(elCollector.get());
annealedELCollector->SetTemperature(temp);
cerr << "Annealing temperature " << annealedELCollector->GetTemperature() << endl;
}
Hypothesis* hypothesis;
TranslationOptionCollection* toc;
timer.check("Running decoder");
std::vector<Word> source;
decoder->decode(line,hypothesis,toc,source);
timer.check("Running sampler");
sampler.Run(hypothesis,toc,source,extra_features);
timer.check("Outputting results");
//Now resample
elCollector->resample(sentCtr);
//cerr << "curr " << trainer->GetCurr() << ", end " << trainer->GetCurrEnd() << endl;
if (trainer && trainer->GetCurr() == trainer->GetCurrEnd()) {//Now need to aggregate the feature vectors and bleu stats
#ifdef MPI_ENABLED
elCollector->AggregateSamples(rank);
#endif
FVector gradient;
float exp_trans_len = 0;
float unreg_exp_gain = 0;
float exp_gain = 0;
#ifdef MPI_ENABLED
if (rank == 0) {
exp_gain = elCollector->UpdateGradient(&gradient, &exp_trans_len, &unreg_exp_gain);
}
#else
exp_gain = elCollector->UpdateGradient(&gradient, &exp_trans_len, &unreg_exp_gain);
#endif
if (trainer)
trainer->IncorporateCorpusGradient(
exp_trans_len,
elCollector->getReferenceLength(),
exp_gain,
unreg_exp_gain,
gradient,
decoder.get());
elCollector->reset();
}
++lineno;
++sentCtr;
}
#ifdef MPI_ENABLED
MPI_Finalize();
#endif
(*out) << flush;
if (!outputfile.empty())
delete out;
return 0;
}

191
josiah/Decoder.cpp Normal file
View File

@ -0,0 +1,191 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <algorithm>
#include "Decoder.h"
#include "DummyScoreProducers.h"
#include "Manager.h"
#include "PhraseFeature.h"
#include "TranslationSystem.h"
#include "TrellisPathCollection.h"
#include "TrellisPath.h"
using namespace std;
using namespace Moses;
namespace Josiah {
/**
* Allocates a char* and copies string into it.
**/
static char* strToChar(const string& s) {
char* c = new char[s.size()+1];
strcpy(c,s.c_str());
return c;
}
void initMoses(const string& inifile, int debuglevel, const vector<string>& extraArgs) {
static int BASE_ARGC = 6;
Parameter* params = new Parameter();
char ** mosesargv = new char*[BASE_ARGC + extraArgs.size()];
mosesargv[0] = strToChar("-f");
mosesargv[1] = strToChar(inifile);
mosesargv[2] = strToChar("-max-trans-opt-per-coverage");
mosesargv[3] = strToChar("0");
mosesargv[4] = strToChar("-v");
stringstream dbgin;
dbgin << debuglevel;
mosesargv[5] = strToChar(dbgin.str());
for (size_t i = 0; i < extraArgs.size(); ++i) {
mosesargv[BASE_ARGC+i] = strToChar(extraArgs[i]);
}
params->LoadParam(BASE_ARGC + extraArgs.size(),mosesargv);
StaticData::LoadDataStatic(params);
for (int i = 0; i < BASE_ARGC; ++i) {
delete[] mosesargv[i];
}
delete[] mosesargv;
}
void setMosesWeights(const FVector& currentWeights) {
PhraseFeature::updateWeights(currentWeights);
StaticData& staticData =
const_cast<StaticData&>(StaticData::Instance());
TranslationSystem& system =
const_cast<TranslationSystem&>(staticData.GetTranslationSystem(
TranslationSystem::DEFAULT));
ScoreComponentCollection mosesWeights = staticData.GetAllWeights();
for (LMList::const_iterator i = system.GetLanguageModels().begin();
i != system.GetLanguageModels().end(); ++i) {
LanguageModel* lm = const_cast<LanguageModel*>(*i);
float lmWeight = currentWeights[lm->GetScoreProducerDescription()];
//lm->SetWeight(lmWeight);
mosesWeights.Assign(lm,lmWeight);
}
const ScoreProducer* wp = system.GetWordPenaltyProducer();
const string wpName = wp->GetScoreProducerDescription();
//staticData.SetWeightWordPenalty(currentWeights[wpName]);
mosesWeights.Assign(wp,currentWeights[wpName]);
const ScoreProducer* dp = system.GetDistortionProducer();
string distName = dp->GetScoreProducerDescription();
//staticData.SetWeightDistortion(currentWeights[distName]);
mosesWeights.Assign(dp, currentWeights[distName]);
staticData.SetAllWeights(mosesWeights);
}
struct TOptCompare {
bool operator()(const TranslationOption* lhs, const TranslationOption* rhs) {
return lhs->GetFutureScore() > rhs->GetFutureScore();
}
};
static const TargetPhrase& emptyTarget() {
static TargetPhrase* tp = new TargetPhrase(Input);
return *tp;
}
//Ensures that cleanup is not run the first time around
bool TranslationHypothesis::m_cleanup = false;
TranslationHypothesis::TranslationHypothesis(const string& source)
{
const StaticData &staticData = StaticData::Instance();
const TranslationSystem& system =
staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
//clean up previous sentence
if (m_cleanup) {
system.CleanUpAfterSentenceProcessing();
} else {
m_cleanup = true;
}
//the sentence
Sentence sentence(Input);
stringstream in(source + "\n");
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder();
sentence.Read(in,inputFactorOrder);
for (size_t i=0; i<sentence.GetSize(); ++i){ m_words.push_back(sentence.GetWord(i)); }
//translation options
m_manager.reset(new Manager(sentence, Normal, &system));
m_manager->ResetSentenceStats(sentence);
system.InitializeBeforeSentenceProcessing(sentence);
m_toc.reset(sentence.CreateTranslationOptionCollection(&system));
//const vector <DecodeGraph*>
// &decodeStepVL = staticData.GetDecodeGraphs();
m_toc->CreateTranslationOptions();
//sort the options
size_t maxPhraseSize = staticData.GetMaxPhraseLength();
for (size_t start = 0; start < m_words.size(); ++start) {
for (size_t end = start; end < start + maxPhraseSize && end < m_words.size(); ++end) {
TranslationOptionList& options =
m_toc->GetTranslationOptionList(start,end);
sort(options.begin(), options.end(), TOptCompare());
/*
while (options.size() > ttableLimit) {
size_t pos = options.size() - 1;
delete options.Get(pos);
options.Remove(pos);
}*/
}
}
//hypothesis
m_hypothesis.reset(Hypothesis::Create(*m_manager,sentence, emptyTarget()));
for (size_t i = 0; i < m_words.size(); ++i) {
m_allHypos.push_back(m_hypothesis);
WordsRange segment(i,i);
const TranslationOptionList& options =
m_toc->GetTranslationOptionList(segment);
/*
cerr << "Options for " << *(options.Get(0)->GetSourcePhrase()) << endl;
for (size_t j = 0; j < options.size(); ++j) {
cerr << *(options.Get(j)) << endl;
}*/
assert(options.size());
m_hypothesis.reset(
Hypothesis::Create(*m_hypothesis, *(options.Get(0)), NULL));
}
}
TranslationOptionCollection* TranslationHypothesis::getToc() const {
return m_toc.get();
}
Hypothesis* TranslationHypothesis::getHypothesis() const {
return m_hypothesis.get();
}
const vector<Word>& TranslationHypothesis::getWords() const {
return m_words;
}
}

81
josiah/Decoder.h Normal file
View File

@ -0,0 +1,81 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <iostream>
#include <cstring>
#include <sstream>
#include <boost/shared_ptr.hpp>
#include "FeatureVector.h"
#include "Hypothesis.h"
#include "Parameter.h"
#include "Sentence.h"
#include "SearchNormal.h"
#include "StaticData.h"
#include "TrellisPathList.h"
#include "TranslationOptionCollectionText.h"
//
// Wrapper functions and objects for the decoder.
//
namespace Josiah {
typedef std::vector<const Moses::Factor*> Translation;
typedef boost::shared_ptr<Moses::Hypothesis> HypothesisHandle;
typedef boost::shared_ptr<Moses::Manager> ManagerHandle;
typedef boost::shared_ptr<Moses::TranslationOptionCollection> TOCHandle;
typedef std::vector<HypothesisHandle> HypothesisVector;
/**
* Initialise moses (including StaticData) using the given ini file and
* debuglevel, passing through any * other command line arguments.
**/
void initMoses(const std::string& inifile, int debuglevel, const std::vector<std::string>& = std::vector<std::string>());
/** Update all the core moses weights */
void setMosesWeights(const Moses::FVector& weights);
/**
* Generates random translation hypotheses.
**/
class TranslationHypothesis {
public:
TranslationHypothesis(const std::string& source);
Moses::TranslationOptionCollection* getToc() const;
Moses::Hypothesis* getHypothesis() const;
//source sentence
const std::vector<Moses::Word>& getWords() const;
private:
static bool m_cleanup;
HypothesisHandle m_hypothesis;
TOCHandle m_toc;
HypothesisVector m_allHypos;
ManagerHandle m_manager;
std::vector<Moses::Word> m_words;
};
} //namespace

441
josiah/Dependency.cpp Normal file
View File

@ -0,0 +1,441 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "Dependency.h"
#include "Derivation.h"
using namespace Moses;
using namespace std;
namespace Josiah {
static void addChildren(vector<set<size_t> >& tree, size_t parent, set<size_t>& children) {
for (set<size_t>::const_iterator i = tree[parent].begin(); i!= tree[parent].end(); ++i) {
children.insert(*i);
addChildren(tree,*i,children);
}
}
DependencyTree::DependencyTree(const vector<Word>& words, FactorType parentFactor) {
vector<set<size_t> > tree(words.size()); // map parents to their immediate children
int root = -1;
for (size_t child = 0; child < words.size(); ++child) {
int parent = atoi(words[child][parentFactor]->GetString().c_str());
if (parent < 0) {
root = child;
} else {
tree[(size_t)parent].insert(child);
}
m_parents.push_back(parent);
}
m_spans.resize(words.size());
for (size_t i = 0; i < m_parents.size(); ++i) {
addChildren(tree,i,m_spans[i]);
m_spans[i].insert(i); // the head covers itself
}
}
static string ToString(const DependencyTree& t)
{
ostringstream os;
for (size_t i = 0; i < t.getLength(); ++i) {
os << i << "->" << t.getParent(i) << ", ";
}
return os.str();
}
ostream& operator<<(ostream& out, const DependencyTree& t)
{
out << ToString(t);
return out;
}
/** Parent of this index, -1 if root*/
int DependencyTree::getParent(size_t index) const {
return m_parents[index];
}
/** Does the parent word cover the child word? */
bool DependencyTree::covers(size_t parent, size_t descendent) const {
return m_spans[parent].count(descendent);
}
float CherrySyntacticCohesionFeatureFunction::computeScore() {
float interruptionCount = 0.0;
Hypothesis *prev = const_cast<Hypothesis*>(const_cast<Sample&>(getSample()).GetTargetTail()->GetNextHypo()); //first hypo in tgt order
for (Hypothesis* h = const_cast<Hypothesis*>(prev->GetNextHypo()); h; h = const_cast<Hypothesis*>(h->GetNextHypo())) {
Context context = { &(prev->GetCurrSourceWordsRange()), &(h->GetCurrSourceWordsRange()), &(prev->GetCurrTargetWordsRange()), &(h->GetCurrTargetWordsRange()) };
interruptionCount += getInterruptions(prev->GetCurrSourceWordsRange(), &(h->GetTranslationOption()), h->GetCurrTargetWordsRange(), context);
prev = h;
}
VERBOSE(2,"In compute score, interr cnt = " << interruptionCount << endl);
return interruptionCount;
}
/** Score due to one segment */
//NB : Target Segment is the old one
float CherrySyntacticCohesionFeatureFunction::getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap) {
const Hypothesis* prevTgt = gap.leftHypo;
if (!prevTgt->GetPrevHypo()) { //dummy hyp at start of sent, no cohesion violation
return 0.0;
}
Context context = { &(prevTgt->GetCurrSourceWordsRange()), &(option->GetSourceWordsRange()), &(prevTgt->GetCurrTargetWordsRange()), &(gap.segment) };
float interruptionCnt = getInterruptions(prevTgt->GetCurrSourceWordsRange(), option, gap.segment, context);
VERBOSE(2, "In single upd, int cnt " << interruptionCnt << endl);
return interruptionCnt;
}
/** Score due to flip */
float CherrySyntacticCohesionFeatureFunction::getFlipUpdateScore(
const TranslationOption* leftTgtOption, const TranslationOption* rightTgtOption,
const TargetGap& leftGap, const TargetGap& rightGap) {
float interruptionCnt = 0.0;
//Let's sort out the order of the segments
WordsRange* leftTgtSegment = const_cast<WordsRange*> (&leftGap.segment);
WordsRange* rightTgtSegment = const_cast<WordsRange*> (&rightGap.segment);
assert(*leftTgtSegment < *rightTgtSegment); //should already be in target order!
const Hypothesis* leftTgtHypPred = leftGap.leftHypo;
const Hypothesis* rightTgtHypSucc = rightGap.rightHypo;
Context context = { &(leftTgtOption->GetSourceWordsRange()), &(rightTgtOption->GetSourceWordsRange()), leftTgtSegment, rightTgtSegment };
//Left tgt option and its predecessor
if (leftTgtHypPred && leftTgtHypPred->GetPrevHypo()) {
interruptionCnt += getInterruptions(leftTgtHypPred->GetCurrSourceWordsRange(), leftTgtOption, *leftTgtSegment, context);
}
//Right tgt option and its successor
if (rightTgtHypSucc) {
interruptionCnt += getInterruptions(rightTgtOption->GetSourceWordsRange() ,&(rightTgtHypSucc->GetTranslationOption()), rightTgtHypSucc->GetCurrTargetWordsRange(), context);
}
//Are the options contiguous on the target side?
bool contiguous = (leftTgtSegment->GetEndPos() + 1 == rightTgtSegment->GetStartPos()) ;
if (contiguous) {
interruptionCnt += getInterruptions(leftTgtOption->GetSourceWordsRange(), rightTgtOption, *rightTgtSegment, context);
}
else {
//Left tgt option and its successor
const Hypothesis* leftTgtSuccessorHyp = leftGap.rightHypo;
if (leftTgtSuccessorHyp) {
interruptionCnt += getInterruptions(leftTgtOption->GetSourceWordsRange(), &(leftTgtSuccessorHyp->GetTranslationOption()), leftTgtSuccessorHyp->GetCurrTargetWordsRange(), context );
}
//Right tgt option and its predecessor
const Hypothesis* rightTgtPredecessorHyp = rightGap.leftHypo;
if (rightTgtPredecessorHyp) {
interruptionCnt += getInterruptions(rightTgtPredecessorHyp->GetCurrSourceWordsRange(), rightTgtOption, *rightTgtSegment, context);
}
//Everything in between
if (leftTgtSuccessorHyp != rightTgtPredecessorHyp) {
TranslationOption *prevOption = const_cast<TranslationOption*>(&(leftTgtSuccessorHyp->GetTranslationOption()));
for (Hypothesis *hyp = const_cast<Hypothesis*>(leftTgtSuccessorHyp->GetNextHypo()); ; hyp = const_cast<Hypothesis*>(hyp->GetNextHypo())) {
if (hyp) {
interruptionCnt += getInterruptions(prevOption->GetSourceWordsRange(), &(hyp->GetTranslationOption()), hyp->GetCurrTargetWordsRange(), context);
prevOption = const_cast<TranslationOption*>(&(hyp->GetTranslationOption()));
}
if (hyp == rightGap.leftHypo) {
break;
}
}
}
}
VERBOSE (2, "In flip, interr cnt = " << interruptionCnt << endl);
return interruptionCnt;
}
/** Score due to two segments **/
float CherrySyntacticCohesionFeatureFunction::getContiguousPairedUpdateScore(
const TranslationOption* leftTgtOption,const TranslationOption* rightTgtOption,
const TargetGap& gap) {
float interruptionCnt = 0.0;
const Hypothesis* leftTgtHypPred = gap.leftHypo;
const Hypothesis* rightTgtHypSucc = gap.rightHypo;
Context context = { &(leftTgtOption->GetSourceWordsRange()), &(rightTgtOption->GetSourceWordsRange()), &(gap.segment), &(gap.segment) };
//Left tgt option and its predecessor
if (gap.segment.GetStartPos() > 0) {
if (leftTgtHypPred) {
interruptionCnt += getInterruptions(leftTgtHypPred->GetCurrSourceWordsRange(), leftTgtOption, gap.segment, context);
}
}
//Right tgt option and its successor
if (rightTgtHypSucc) {
interruptionCnt += getInterruptions(rightTgtOption->GetSourceWordsRange() ,&(rightTgtHypSucc->GetTranslationOption()), rightTgtHypSucc->GetCurrTargetWordsRange(), context);
}
interruptionCnt += getInterruptions(leftTgtOption->GetSourceWordsRange(), rightTgtOption, gap.segment, context);
VERBOSE(2, "In paired update, interr cnt = " << interruptionCnt << endl);
return interruptionCnt;
}
float CherrySyntacticCohesionFeatureFunction::getDiscontiguousPairedUpdateScore(
const TranslationOption* leftTgtOption,const TranslationOption* rightTgtOption,
const TargetGap& leftGap, const TargetGap& rightGap) {
float interruptionCnt = 0.0;
WordsRange* leftTgtSegment = const_cast<WordsRange*> (&leftGap.segment);
WordsRange* rightTgtSegment = const_cast<WordsRange*> (&rightGap.segment);
assert(*leftTgtSegment < *rightTgtSegment); //should already be in target order!
const Hypothesis* leftTgtHypPred = leftGap.leftHypo;
const Hypothesis* leftTgtSuccessorHyp = leftGap.rightHypo;
const Hypothesis* rightTgtPredecessorHyp = rightGap.leftHypo;
const Hypothesis* rightTgtHypSucc = rightGap.rightHypo;
Context context = { &(leftTgtOption->GetSourceWordsRange()), &(rightTgtOption->GetSourceWordsRange()), leftTgtSegment, rightTgtSegment };
//Left tgt option and its predecessor
if (leftTgtSegment->GetStartPos() > 0) {
if (leftTgtHypPred) {
interruptionCnt += getInterruptions(leftTgtHypPred->GetCurrSourceWordsRange(), leftTgtOption, *leftTgtSegment, context);
}
}
//Right tgt option and its successor
if (rightTgtHypSucc) {
interruptionCnt += getInterruptions(rightTgtOption->GetSourceWordsRange() ,&(rightTgtHypSucc->GetTranslationOption()), rightTgtHypSucc->GetCurrTargetWordsRange(), context);
}
//Left tgt option and its successor
if (leftTgtSuccessorHyp) {
interruptionCnt += getInterruptions(leftTgtOption->GetSourceWordsRange(), &(leftTgtSuccessorHyp->GetTranslationOption()), leftTgtSuccessorHyp->GetCurrTargetWordsRange(), context);
}
//Right tgt option and its predecessor
if (rightTgtPredecessorHyp) {
interruptionCnt += getInterruptions(rightTgtPredecessorHyp->GetCurrSourceWordsRange(), rightTgtOption, *rightTgtSegment, context);
}
//Everything in between
if (leftTgtSuccessorHyp != rightTgtPredecessorHyp) {
TranslationOption *prevOption = const_cast<TranslationOption*>(&(leftTgtSuccessorHyp->GetTranslationOption()));
for (Hypothesis *hyp = const_cast<Hypothesis*>(leftTgtSuccessorHyp->GetNextHypo()); ; hyp = const_cast<Hypothesis*>(hyp->GetNextHypo())) {
if (hyp) {
interruptionCnt += getInterruptions(prevOption->GetSourceWordsRange(), &(hyp->GetTranslationOption()), hyp->GetCurrTargetWordsRange(), context);
prevOption = const_cast<TranslationOption*>(&(hyp->GetTranslationOption()));
}
if (hyp == rightGap.leftHypo) {
break;
}
}
}
VERBOSE (2, "In paired update, interr cnt = " << interruptionCnt << endl);
return interruptionCnt;
}
/**Helper method */
float CherrySyntacticCohesionFeatureFunction::getInterruptions(const WordsRange& prevSourceRange, const TranslationOption *option, const WordsRange& targetSegment, const Context& context) {
float interruptionCnt = 0.0;
size_t f_L = prevSourceRange.GetStartPos();
size_t f_R = prevSourceRange.GetEndPos();
interruptionCnt = getInterruptionCount(option, targetSegment, f_L, context);
if (interruptionCnt == 0 && f_L != f_R)
interruptionCnt = getInterruptionCount(option, targetSegment, f_R, context);
return interruptionCnt;
}
float CherrySyntacticCohesionFeatureFunction::getInterruptionCount(const TranslationOption *option, const WordsRange& targetSegment, size_t f, const Context& context) {
size_t r_prime = f;
size_t r = NOT_FOUND;
while (notAllWordsCoveredByTree(option, r_prime)) {
r = r_prime;
r_prime = m_sourceTree->getParent(r_prime);
}
if (r == NOT_FOUND)
return 0.0;
const set<size_t> & children = m_sourceTree->getChildren(r);
for (set<size_t>::const_iterator it = children.begin(); it != children.end(); ++it) {
size_t child = *it;
const WordsRange* otherSegment;
if (context.leftSrcRange->covers(child)) {
otherSegment = context.leftTgtRange ;
}
else if (context.rightSrcRange->covers(child)) {
otherSegment = context.rightTgtRange ;
}
else {
Hypothesis* hyp = const_cast<Sample&>(getSample()).GetHypAtSourceIndex(child);
otherSegment = &(hyp->GetCurrTargetWordsRange());
}
if (isInterrupting(*otherSegment, targetSegment)) {
return 1.0;
}
}
return 0.0;
}
bool CherrySyntacticCohesionFeatureFunction::isInterrupting(const WordsRange& otherSegment, const WordsRange& targetSegment) {
return otherSegment > targetSegment;
}
bool CherrySyntacticCohesionFeatureFunction::notAllWordsCoveredByTree(const TranslationOption* option, size_t parent) {
for (size_t s = option->GetStartPos(); s <= option->GetEndPos(); ++s) {
if (!m_sourceTree->covers(parent, s))
return true;
}
return false;
}
//new sample
DependencyDistortionFeatureFunction::DependencyDistortionFeatureFunction(const Sample& sample, Moses::FactorType parentFactor) :
DependencyFeatureFunction(sample,"DependencyDistortion",parentFactor) {
size_t sourceSize = getSample().GetSourceSize();
size_matrix_t::extent_gen extents;
m_distances.resize(extents[sourceSize][sourceSize]);
//Use Floyd-Warshall to compute all the distances
//Initialise with the (undirected) tree structure
for (size_t i = 0; i < sourceSize; ++i) {
size_t iparent = (size_t)m_sourceTree->getParent(i);
for (size_t j = 0; j < sourceSize; ++j) {
size_t jparent = (size_t)m_sourceTree->getParent(j);
if (i == j) {
m_distances[i][j] = 0;
} else if (iparent == j || jparent == i) {
m_distances[i][j] = 1;
} else {
m_distances[i][j] = sourceSize*2; //no path - infinity
}
}
}
//run algorithm
for (size_t k = 0; k < sourceSize; ++k) {
for (size_t i = 0; i < sourceSize; ++i) {
for (size_t j = 0; j < sourceSize; ++j) {
m_distances[i][j] = min(m_distances[i][j], m_distances[i][k] + m_distances[k][j]);
}
}
}
/*for (size_t i = 0; i < sourceSize; ++i) {
for (size_t j = 0; j < sourceSize; ++j) {
cerr << "p[" << i << "][" << j << "] = " << m_distances[i][j] << " ";
}
cerr << endl;
}*/
}
size_t DependencyDistortionFeatureFunction::getDistortionDistance(const WordsRange& leftRange, const WordsRange& rightRange) {
size_t leftSourcePos = leftRange.GetEndPos();
size_t rightSourcePos = rightRange.GetStartPos();
return m_distances[leftSourcePos][rightSourcePos] - 1;
}
/** Compute full score of a sample from scratch **/
float DependencyDistortionFeatureFunction::computeScore() {
//
// The score for each pair of adjacent target phrases is the tree distance of the corresponding edge source words
//
float score = 0;
const Hypothesis* currHypo = getSample().GetTargetTail();
while ((currHypo = (currHypo->GetNextHypo()))) {
const Hypothesis* nextHypo = currHypo->GetNextHypo();
if (nextHypo) {
score += getDistortionDistance(currHypo->GetCurrSourceWordsRange(),nextHypo->GetCurrSourceWordsRange());
}
}
return score;
}
/** Score due to one segment */
float DependencyDistortionFeatureFunction::getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap) {
float score = 0;
if (gap.leftHypo->GetPrevHypo()) {
score += getDistortionDistance(gap.leftHypo->GetCurrSourceWordsRange(), option->GetSourceWordsRange());
}
if (gap.rightHypo) {
score += getDistortionDistance(option->GetSourceWordsRange(), gap.rightHypo->GetCurrSourceWordsRange());
}
return score;
}
/** Score due to two segments **/
float DependencyDistortionFeatureFunction::getContiguousPairedUpdateScore
(const TranslationOption* leftOption, const TranslationOption* rightOption, const TargetGap& gap) {
float score = 0;
if (gap.leftHypo->GetPrevHypo()) {
score += getDistortionDistance(gap.leftHypo->GetCurrSourceWordsRange(), leftOption->GetSourceWordsRange());
}
score += getDistortionDistance(leftOption->GetSourceWordsRange(), rightOption->GetSourceWordsRange());
if (gap.rightHypo) {
score += getDistortionDistance(rightOption->GetSourceWordsRange(), gap.rightHypo->GetCurrSourceWordsRange());
}
return score;
}
float DependencyDistortionFeatureFunction::getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap) {
return getSingleUpdateScore(leftOption,leftGap) + getSingleUpdateScore(rightOption,rightGap);
}
/** Score due to flip */
float DependencyDistortionFeatureFunction::getFlipUpdateScore(
const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap) {
float score = 0;
if (leftGap.leftHypo->GetPrevHypo()) {
score += getDistortionDistance(leftGap.leftHypo->GetCurrSourceWordsRange(), leftOption->GetSourceWordsRange());
}
bool contiguous = (leftGap.segment.GetEndPos() + 1 == rightGap.segment.GetStartPos());
if (contiguous) {
score += getDistortionDistance(leftOption->GetSourceWordsRange(), rightOption->GetSourceWordsRange());
} else {
score += getDistortionDistance(leftOption->GetSourceWordsRange(),leftGap.rightHypo->GetCurrSourceWordsRange());
score += getDistortionDistance(rightGap.leftHypo->GetCurrSourceWordsRange(), rightOption->GetSourceWordsRange());
}
if (rightGap.rightHypo) {
score += getDistortionDistance(rightOption->GetSourceWordsRange(), rightGap.rightHypo->GetCurrSourceWordsRange());
}
return score;
}
}

177
josiah/Dependency.h Normal file
View File

@ -0,0 +1,177 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <climits>
#include <set>
#include <vector>
#include <boost/multi_array.hpp>
#include "Factor.h"
#include "Gibbler.h"
#include "FeatureFunction.h"
namespace Josiah {
class DependencyTree {
public:
DependencyTree(const std::vector<Word>& words, Moses::FactorType parentFactor);
/** Parent of this index, -1 if root*/
int getParent(size_t index) const;
/** Does the parent word cover the child word? */
bool covers(size_t parent, size_t child) const;
/** length of sentence */
size_t getLength() const {return m_parents.size();}
const std::set<size_t> & getChildren(size_t parent) const { return m_spans[parent];}
private:
std::vector<int> m_parents;
std::vector<std::set<size_t> > m_spans;
};
std::ostream& operator<<(std::ostream& out, const DependencyTree& t);
class DependencyFeatureFunction: public SingleValuedFeatureFunction {
public:
DependencyFeatureFunction(const Sample& sample, const std::string& name, Moses::FactorType parentFactor):
SingleValuedFeatureFunction(sample,name), m_parentFactor(parentFactor)
{
m_sourceTree.reset(new DependencyTree(sample.GetSourceWords(), m_parentFactor));
//cerr << "New Tree: " << *(m_sourceTree.get()) << endl;
for (size_t parent = 0; parent < m_sourceTree->getLength(); ++parent) {
for (size_t child = 0; child < m_sourceTree->getLength(); ++child) {
//cerr << "parent " << parent << " child " << child << " covers " << m_sourceTree->covers(parent,child) << endl;
}
}
updateTarget();
}
protected:
std::auto_ptr<DependencyTree> m_sourceTree;
Moses::FactorType m_parentFactor; //which factor is the parent index?
};
/**
* Feature based on Colin Cherry's Soft Syntactic Constraint (ACL 2008).
**/
class CherrySyntacticCohesionFeatureFunction : public DependencyFeatureFunction {
public:
CherrySyntacticCohesionFeatureFunction(const Sample& sample,Moses::FactorType parentFactor):
DependencyFeatureFunction(sample,"Cherry",parentFactor) {}
/** Compute full score of a sample from scratch **/
virtual float computeScore();
/** Score due to one segment */
virtual float getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap);
/** Score due to two segments **/
virtual float getContiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& gap);
virtual float getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap);
/** Score due to flip */
virtual float getFlipUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap);
virtual ~CherrySyntacticCohesionFeatureFunction() {}
private:
struct Context {
const WordsRange* leftSrcRange, *rightSrcRange;
const WordsRange* leftTgtRange, *rightTgtRange;
} ;
float getInterruptions(const WordsRange& prevSourceRange, const TranslationOption *option, const WordsRange& targetSegment, const Context &);
float getInterruptionCount(const TranslationOption* option, const WordsRange& targetSegment, size_t f, const Context &);
bool notAllWordsCoveredByTree(const TranslationOption* option, size_t parent);
bool isInterrupting(const WordsRange& otherSegment, const WordsRange& targetSegment);
};
class CherrySyntacticCohesionFeature : public Feature {
public:
CherrySyntacticCohesionFeature(Moses::FactorType parentFactor) :
m_parentFactor(parentFactor) {}
virtual FeatureFunctionHandle getFunction(const Sample& sample) const {
return FeatureFunctionHandle(new CherrySyntacticCohesionFeatureFunction(sample, m_parentFactor));
}
private:
Moses::FactorType m_parentFactor;
};
/**
* Feature which measures distortion using distance in the dependency tree.
**/
class DependencyDistortionFeatureFunction : public DependencyFeatureFunction {
public:
DependencyDistortionFeatureFunction(const Sample& sample,Moses::FactorType parentFactor);
/** Compute full score of a sample from scratch **/
virtual float computeScore();
/** Score due to one segment */
virtual float getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap);
/** Score due to two segments **/
virtual float getContiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& gap);
virtual float getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap);
/** Score due to flip */
virtual float getFlipUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap);
virtual ~DependencyDistortionFeatureFunction() {}
private:
//the distance in the dependency tree between any given pair of source words
typedef boost::multi_array<size_t, 2> size_matrix_t;
size_matrix_t m_distances;
/** Compute dependency distortion between two target adjacent source-ranges */
size_t getDistortionDistance(const WordsRange& leftRange, const WordsRange& rightRange);
};
class DependencyDistortionFeature : public Feature {
public:
DependencyDistortionFeature(Moses::FactorType parentFactor) :
m_parentFactor(parentFactor) {}
FeatureFunctionHandle getFunction(const Sample& sample) const {
return FeatureFunctionHandle(new DependencyDistortionFeatureFunction(sample, m_parentFactor));
}
private:
Moses::FactorType m_parentFactor;
};
}

96
josiah/Derivation.cpp Normal file
View File

@ -0,0 +1,96 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "Derivation.h"
#include "Gibbler.h"
#include "DummyScoreProducers.h"
#include "WeightManager.h"
using namespace std;
using namespace Moses;
namespace Josiah {
bool Derivation::PhraseAlignment::operator<(const PhraseAlignment& other) const {
if (_sourceSegment < other._sourceSegment) return true;
if (other._sourceSegment < _sourceSegment) return false;
return _target < other._target;
}
Derivation::Derivation(const Sample& sample) {
m_featureValues = sample.GetFeatureValues();
const Hypothesis* currHypo = sample.GetTargetTail();
while ((currHypo = (currHypo->GetNextHypo()))) {
TargetPhrase targetPhrase = currHypo->GetTargetPhrase();
m_alignments.push_back(
PhraseAlignment(currHypo->GetCurrSourceWordsRange(), Phrase(targetPhrase)));
}
const FVector& weights = WeightManager::instance().get();
m_score = inner_product(m_featureValues, weights);
}
//FIXME: This may not be the most efficient way of mapping derivations, but will do for now
bool Derivation::operator <(const Derivation & other) const {
bool result = m_alignments < other.m_alignments;
return result;
}
void Derivation::getTargetFactors(std::vector<const Factor*>& sentence) const {
for (vector<PhraseAlignment>::const_iterator i = m_alignments.begin(); i != m_alignments.end(); ++i) {
const Phrase& targetPhrase = i->_target;
for (size_t j = 0; j < targetPhrase.GetSize(); ++j) {
sentence.push_back(targetPhrase.GetFactor(j,0));
}
}
}
int Derivation::getTargetSentenceSize() const { //shortcut, extract tgt size from feature vector
std::vector<std::string> words;
getTargetSentence(words);
return words.size();
}
void Derivation::getTargetSentence(std::vector<std::string>& targetWords ) const {
for (vector<PhraseAlignment>::const_iterator i = m_alignments.begin(); i != m_alignments.end(); ++i) {
const Phrase& targetPhrase = i->_target;
for (size_t j = 0; j < targetPhrase.GetSize(); ++j) {
targetWords.push_back(targetPhrase.GetWord(j).GetFactor(0)->GetString());
}
}
}
ostream& operator<<(ostream& out, const Derivation& d) {
out << "Target: << ";
for (size_t i = 0; i < d.m_alignments.size(); ++i) {
out << d.m_alignments[i]._target;
out << d.m_alignments[i]._sourceSegment << " ";
}
out << ">> Feature values: ";
out << d.m_featureValues;
out << " Score: ";
out << d.m_score;
return out;
}
}
//namespace

88
josiah/Derivation.h Normal file
View File

@ -0,0 +1,88 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <algorithm>
#include <functional>
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include "FeatureVector.h"
#include "WordsRange.h"
#include "Phrase.h"
#include "Factor.h"
using namespace Moses;
namespace Josiah {
class Sample;
/**
* Represents a derivation, ie a way of getting from e to f.
**/
class Derivation {
public:
Derivation(const Sample& sample);
void getTargetSentence(std::vector<std::string>&) const;
int getTargetSentenceSize() const;
const FVector& getFeatureValues() const {return m_featureValues;}
float getScore() const {return m_score;}
void getTargetFactors(std::vector<const Factor*>& sentence) const;
bool operator<(const Derivation& other) const;
struct PhraseAlignment {
//since these are stored in target order, no need to retain the source Segment
WordsRange _sourceSegment;
Phrase _target;
PhraseAlignment(const WordsRange& sourceSegment,const Phrase& target)
: _sourceSegment(sourceSegment),_target(target) {}
bool operator<(const PhraseAlignment& other) const;
};
friend std::ostream& operator<<(std::ostream&, const Derivation&);
friend struct DerivationProbLessThan;
private:
std::vector<PhraseAlignment> m_alignments; //in target order
FVector m_featureValues;
FValue m_score;
//std::vector<std::string> m_targetWords;
};
struct DerivationLessThan {
bool operator()(const Derivation& d1, const Derivation& d2) {
return d1 < d2;
}
};
typedef std::pair<const Derivation*,float> DerivationProbability;
std::ostream& operator<<(std::ostream&, const Derivation&);
} //namespace

View File

@ -0,0 +1,173 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <fstream>
#include "DiscriminativeLMFeature.h"
using namespace Moses;
using namespace std;
namespace Josiah {
const string DiscriminativeLMBigramFeature::ROOTNAME = "dlmb";
DiscriminativeLMBigramFeature::DiscriminativeLMBigramFeature
(FactorType factorId,const std::string& vocabFile) : m_factorId(factorId) {
if (!vocabFile.empty()) {
ifstream in(vocabFile.c_str());
assert(in);
string line;
while (getline(in,line)) {
m_vocab.insert(line);
}
m_vocab.insert(EOS_);
m_vocab.insert(BOS_);
}
//create BOS and EOS
FactorCollection& factorCollection = FactorCollection::Instance();
const Factor* bosFactor = factorCollection.AddFactor(Input,m_factorId,BOS_);
BOS.SetFactor(m_factorId,bosFactor);
const Factor* eosFactor = factorCollection.AddFactor(Input,m_factorId,EOS_);
EOS.SetFactor(m_factorId,eosFactor);
}
FeatureFunctionHandle DiscriminativeLMBigramFeature::getFunction(const Sample& sample) const {
return FeatureFunctionHandle(new DiscriminativeLMBigramFeatureFunction(sample,*this));
}
const Word& DiscriminativeLMBigramFeature::bos() const {
return BOS;
}
const Word& DiscriminativeLMBigramFeature::eos() const {
return EOS;
}
const std::set<std::string>& DiscriminativeLMBigramFeature::vocab() const {
return m_vocab;
}
Moses::FactorType DiscriminativeLMBigramFeature::factorId() const {
return m_factorId;
}
DiscriminativeLMBigramFeatureFunction::DiscriminativeLMBigramFeatureFunction
(const Sample& sample, const DiscriminativeLMBigramFeature& parent):
FeatureFunction(sample), m_parent(parent)
{}
void DiscriminativeLMBigramFeatureFunction::updateTarget() {
m_targetWords = getSample().GetTargetWords();
}
void DiscriminativeLMBigramFeatureFunction::scoreBigram(const Word& word1, const Word& word2, FVector& scores) {
const string& text1 = word1[m_parent.factorId()]->GetString();
if (!m_parent.vocab().empty() && m_parent.vocab().find(text1) == m_parent.vocab().end()) {
return;
}
const string& text2 = word2[m_parent.factorId()]->GetString();
if (!m_parent.vocab().empty() && m_parent.vocab().find(text2) == m_parent.vocab().end()) {
return;
}
FName name(m_parent.ROOTNAME, text1 + ":" + text2);
++scores[name];
}
/** Assign the total score of this feature on the current hypo */
void DiscriminativeLMBigramFeatureFunction::assignScore(FVector& scores)
{
for (size_t i = 0; i < m_targetWords.size()-1; ++i) {
scoreBigram(m_targetWords[i],m_targetWords[i+1],scores);
}
}
void DiscriminativeLMBigramFeatureFunction::doUpdate(const Phrase& gapPhrase, const TargetGap& gap, FVector& scores)
{
if (gap.leftHypo->GetPrevHypo()) {
//left edge
const TargetPhrase& leftPhrase = gap.leftHypo->GetTargetPhrase();
scoreBigram(leftPhrase.GetWord(leftPhrase.GetSize()-1), gapPhrase.GetWord(0),scores);
} else {
scoreBigram(m_parent.bos(), gapPhrase.GetWord(0),scores);
}
//gap phrase
size_t i = 0;
for (; i < gapPhrase.GetSize()-1; ++i) {
scoreBigram(gapPhrase.GetWord(i), gapPhrase.GetWord(i+1),scores);
}
//right edge
if (gap.rightHypo) {
scoreBigram(gapPhrase.GetWord(i),gap.rightHypo->GetTargetPhrase().GetWord(0), scores);
} else {
scoreBigram(gapPhrase.GetWord(i),m_parent.eos(),scores);
}
}
/** Score due to one segment */
void DiscriminativeLMBigramFeatureFunction::doSingleUpdate
(const TranslationOption* option, const TargetGap& gap, FVector& scores)
{
doUpdate(option->GetTargetPhrase(),gap, scores);
}
/** Score due to two segments. The left and right refer to the target positions.**/
void DiscriminativeLMBigramFeatureFunction::doContiguousPairedUpdate
(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores)
{
Phrase gapPhrase(leftOption->GetTargetPhrase());
gapPhrase.Append(rightOption->GetTargetPhrase());
doUpdate(gapPhrase,gap,scores);
}
void DiscriminativeLMBigramFeatureFunction::doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
{
doUpdate(leftOption->GetTargetPhrase(), leftGap, scores);
doUpdate(rightOption->GetTargetPhrase(), rightGap, scores);
}
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
void DiscriminativeLMBigramFeatureFunction::doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
{
if (leftGap.segment.GetEndPos()+1 == rightGap.segment.GetStartPos()) {
//contiguous
Phrase gapPhrase(leftOption->GetTargetPhrase());
gapPhrase.Append(rightOption->GetTargetPhrase());
TargetGap gap(leftGap.leftHypo, rightGap.rightHypo,
WordsRange(leftGap.segment.GetStartPos(), rightGap.segment.GetEndPos()));
doUpdate(gapPhrase,gap,scores);
} else {
//discontiguous
doUpdate(leftOption->GetTargetPhrase(), leftGap,scores);
doUpdate(rightOption->GetTargetPhrase(), rightGap,scores);
}
}
}

View File

@ -0,0 +1,90 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <map>
#include <sstream>
#include <string>
#include <vector>
#include "FeatureFunction.h"
#include "Gibbler.h"
namespace Josiah {
/**
* Feature based on target bigrams.
**/
class DiscriminativeLMBigramFeature : public Feature {
public:
DiscriminativeLMBigramFeature(Moses::FactorType factorId, const std::string& vocabFile="");
virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
const Word& bos() const;
const Word& eos() const;
const std::set<std::string>& vocab() const;
Moses::FactorType factorId() const;
static const std::string ROOTNAME;
private:
std::set<std::string> m_vocab;
Moses::FactorType m_factorId;
Word BOS;
Word EOS;
};
class DiscriminativeLMBigramFeatureFunction : public FeatureFunction {
public:
DiscriminativeLMBigramFeatureFunction(const Sample& sample, const DiscriminativeLMBigramFeature& parent);
/** Update the target words.*/
virtual void updateTarget();
/** Assign the total score of this feature on the current hypo */
virtual void assignScore(FVector& scores);
/** Score due to one segment */
virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores);
/** Score due to two segments. The left and right refer to the target positions.**/
virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores);
virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) ;
private:
void scoreBigram(const Word& word1, const Word& word2, FVector& scores);
/** Score change due to filling in the gapPhrase in the gap.*/
void doUpdate(const Phrase& gapPhrase, const TargetGap& gap, FVector& scores);
std::vector<Word> m_targetWords;
const DiscriminativeLMBigramFeature m_parent;
};
}

View File

@ -0,0 +1,76 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "DistortionPenaltyFeature.h"
#include "DummyScoreProducers.h"
#include "Derivation.h"
#include "Gibbler.h"
#include "GibbsOperator.h"
namespace Josiah {
FeatureFunctionHandle DistortionPenaltyFeature::getFunction( const Sample& sample ) const {
return FeatureFunctionHandle(new DistortionPenaltyFeatureFunction(sample));
}
FValue DistortionPenaltyFeatureFunction::computeScore() {
FValue distortion = 0;
//cerr << Derivation(*m_sample) << endl;
const Hypothesis* currHypo = getSample().GetTargetTail(); //target tail
//step through in target order
int lastSrcEnd = -1;
while ((currHypo = (currHypo->GetNextHypo()))) {
int srcStart = currHypo->GetCurrSourceWordsRange().GetStartPos();
distortion -= abs(srcStart - (lastSrcEnd+1));
lastSrcEnd = currHypo->GetCurrSourceWordsRange().GetEndPos();
}
//cerr << "distortion " << distortion << endl;
return distortion;
}
FValue DistortionPenaltyFeatureFunction::getFlipUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap)
{
FValue distortion;
const Hypothesis* leftTgtNextHypo = leftGap.rightHypo;
const Hypothesis* rightTgtPrevHypo = rightGap.leftHypo;
//if the segments are contiguous and we're swapping, then these hypos have to be swapped so
//that they're in the order they'd appear in in the proposed target
if (leftGap.segment.GetEndPos()+1 == rightGap.segment.GetStartPos()) {
if (leftTgtNextHypo->GetCurrSourceWordsRange() != rightOption->GetSourceWordsRange()) {
const Hypothesis* tmp = leftTgtNextHypo;
leftTgtNextHypo = rightTgtPrevHypo;
rightTgtPrevHypo = tmp;
}
}
CheckValidReordering(leftOption->GetSourceWordsRange(), rightOption->GetSourceWordsRange(),
leftGap.leftHypo, leftTgtNextHypo,
rightTgtPrevHypo, rightGap.rightHypo, distortion);
//cerr << leftOption->GetSourceWordsRange() << " " << rightOption->GetSourceWordsRange() << " " << distortion << endl;
//cerr << "lg.rh" << leftTgtNextHypo->GetCurrSourceWordsRange() << " rg.lh" << rightTgtPrevHypo->GetCurrSourceWordsRange() << endl;
return distortion;
}
}

View File

@ -0,0 +1,65 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include "FeatureFunction.h"
namespace Josiah {
class DistortionPenaltyFeature : public Feature {
public:
virtual FeatureFunctionHandle getFunction( const Sample& sample ) const;
};
class DistortionPenaltyFeatureFunction : public SingleValuedFeatureFunction {
public:
DistortionPenaltyFeatureFunction(const Sample& sample)
: SingleValuedFeatureFunction(sample,"Distortion") {}
protected:
virtual FValue computeScore();
/** Score due to one segment */
virtual FValue getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap) {return 0;}
/** Score due to two segments. The left and right refer to the target positions.**/
virtual FValue getContiguousPairedUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap){return 0;}
virtual FValue getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap) {return 0;}
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
virtual FValue getFlipUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap);
};
}

102
josiah/FV.cpp Normal file
View File

@ -0,0 +1,102 @@
/*
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <fstream>
#include <iostream>
#include <boost/archive/text_oarchive.hpp>
#include "FeatureVector.h"
using namespace Josiah;
using namespace std;
int main() {
FVector fv;
FName g3("L", "1");
FName g4("L", "2");
FName t1("T", "1");
FName p2("P", "2");
fv[g3] = 2.0;
fv[g4] = 1.3;
cerr << "fv=" << fv << endl;
FVector fv2;
fv2[g3] = 1.5;
fv2[t1] = 3.0;
FVector fvsum = fv + fv2;
FVector fvdiff = fv - fv2;
FVector fvprod = fv * 1.4;
FVector fvdiv = fv / 4.0;
cerr << "fv2=" << fv2 << endl;
cerr << "fvsum=" << fvsum << endl;
cerr << "fvdiff=" << fvdiff << endl;
cerr << "fvprod=" << fvprod << endl;
cerr << "fvdiv=" << fvdiv << endl;
cerr << "fv.fvprod=" << inner_product(fv,fvprod) << endl;
cerr << "fvprod.fv=" << inner_product(fvprod,fv) << endl;
cerr << "fv * fv2 = " << (fv*fv2) << endl;
cerr << "fv / fv2 = " << (fv/fv2) << endl;
FVector fvp2 = fv + 2.0;
cerr << "fv + 2 = " << fvp2 << endl;
//cerr << "(fv+2)[" << g3 << "] = " << fvp2[g3] << " (fv+2)[" << g4 << "] = " << fvp2[g4] << " (fv+2)[" << t1 << "] = " << fvp2[t1] << endl;
FVector fv2m1 = fv2 - 1.0;
cerr << "(fv + 2) + (fv2 -1) = " << (fvp2 + fv2m1) << endl;
cerr << "(fv + 2) - (fv2 -1) = " << (fvp2 - fv2m1) << endl;
cerr << "(fv + 2) * (fv2 -1) = " << (fvp2 * fv2m1) << endl;
cerr << "(fv + 2) / (fv2 -1) = " << (fvp2 / fv2m1) << endl;
cerr << "max((fv + 2),(fv2 -1)) = " << fvmax(fvp2,fv2m1) << endl;
cerr << "(fv + 2) + (fv2) = " << (fvp2 + fv2) << endl;
cerr << "(fv + 2) - (fv2) = " << (fvp2 - fv2) << endl;
cerr << "(fv + 2) * (fv2) = " << (fvp2 * fv2) << endl;
cerr << "fv2 / (fv + 2) = " << (fv2 / fvp2) << endl;
cerr << "max((fv + 2),(fv2)) = " << fvmax(fv2,fvp2) << endl;
//fv2[g4] = 3.1; //error
cerr << "fv2 . (fv + 2) = " << inner_product(fv2,fv+2) << endl;
cerr << "(fv2-1) . (fv) = " << inner_product(fv2-1,fv) << endl;
cerr << "(fv -1)[p2] = " << (fv -1)[p2] << endl;
cerr << "fvp2 = " << fvp2 << endl;
cerr << "++fvp2[g3] = " << ++fvp2[g3] << endl;
//cerr << "fvp2[g3] = " << ++fvp2[g3] << endl;
cerr << "fvp2 = " << fvp2 << endl;
fvp2[p2] += 5;
cerr << "fvp2 = " << fvp2 << endl;
FVector loaded;
loaded.load("weights.txt");
cerr << "loaded=" << loaded << endl;
return 0;
}

102
josiah/FV_mpi.cpp Normal file
View File

@ -0,0 +1,102 @@
#include <cassert>
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#ifdef MPI_ENABLED
#include <boost/mpi/environment.hpp>
#include <boost/mpi/communicator.hpp>
#include <boost/mpi/collectives.hpp>
#include <boost/unordered_map.hpp>
#include <boost/serialization/access.hpp>
#include <boost/serialization/map.hpp>
namespace mpi = boost::mpi;
#endif
#include "FeatureVector.h"
using namespace Josiah;
using namespace std;
//typedef boost::unordered_map<string,float> nvmap;
typedef map<string,float> nvmap;
struct Data {
string a;
float b;
};
namespace boost { namespace serialization {
template<class Archive>
void serialize(Archive& ar, Data& d, const unsigned int) {
ar & d.a;
ar & d.b;
}
}
}
int main(int argc, char* argv[])
{
#ifdef MPI_ENABLED
mpi::environment env(argc, argv);
mpi::communicator world;
//cerr << world.rank() << endl;
float rank = world.rank();
ostringstream ostr;
ostr << rank;
string filename = "mpi.log" + ostr.str();
ofstream log(filename.c_str());
assert(log);
log << "MPI rank: " << rank << endl;
log << "MPI size: " << world.size() << endl;
FVector fv;
FName fn_r("R", ostr.str());
FName fn_s("S", ostr.str());
FName fn_t("T", ostr.str());
FName fn_i ("I", "1");
FName fn_j ("J", "1");
FName fn_k ("K", "1");
fv[fn_r] = 1.2;
fv[fn_s] = 2.1;
fv[fn_t] = -1.2;
fv[fn_i] = 1/(1 + rank);
fv[fn_j] = 2;
fv[fn_k] = 23.1;
log << "FV: " << fv << endl;
FVector sum;
//mpi::broadcast(world,fv,0);
mpi::reduce(world,fv,sum,FVectorPlus(),0);
/*
float sent = 1 / (float)(world.rank() + 3);
log << "sent: " << sent << endl;
float rcvd;
mpi::reduce(world,send,rcvd,std::plus<float>(), 0);
if (world.rank() == 0) cerr << "Received " << rcvd << endl;
*/
if (rank == 0) {
cerr << "Sum: " << sum << endl;
}
log.close();
#endif
return 0;
}

174
josiah/FeatureFunction.h Normal file
View File

@ -0,0 +1,174 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <boost/shared_ptr.hpp>
#include "FeatureVector.h"
#include "Hypothesis.h"
#include "TranslationOption.h"
using namespace Moses;
namespace Josiah {
class Sample;
/**
* Represents a gap in the target sentence. During score computation, this is used to specify where the proposed
* TranslationOptions are to be placed. The left and right hypothesis are to the left and to the right in target order.
* Note that the leftHypo could be the null hypo (if at start) and the rightHypo could be a null pointer (if at end).
**/
struct TargetGap {
TargetGap(const Hypothesis* lh, const Hypothesis* rh, const WordsRange& s) :
leftHypo(lh), rightHypo(rh), segment(s) {
//check that they're in target order.
assert(!lh->GetPrevHypo() || lh->GetCurrTargetWordsRange() < s);
assert(!rh || s < rh->GetCurrTargetWordsRange());
}
const Hypothesis* leftHypo;
const Hypothesis* rightHypo;
WordsRange segment;
};
/** Abstract base class for Gibbler feature functions.
* 1. When a new Sample() object is created to begin sampling on a new sentence:
* - constructor - passing in the sample
* - updateTarget() - to indicate to the FeatureFunction that the target words have changed
* - assignScore() - to tell the FeatureFunction to set its initial score.
* 2. When scoring possible transitions.
* - doXXX() - to calculate the score deltas.
* 3. When performing a transition.
* - updateTarget() - called with new target words. For paired updates, this is called twice, and after the first
* call the feature_vector (in the sample) will be inconsistent with the target words
**/
class FeatureFunction {
public:
/** Initialise with new sample */
FeatureFunction(const Sample& sample): m_sample(sample) {}
/** Update the target words.*/
virtual void updateTarget(){/*do nothing*/}
/** Assign the total score of this feature on the current hypo */
virtual void assignScore(FVector& scores) = 0;
/** Score due to one segment */
virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores) = 0;
/** Score due to two segments. The left and right refer to the target positions.**/
virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores) = 0;
virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) = 0;
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) = 0;
virtual ~FeatureFunction() {}
protected:
const Sample& getSample() const {return m_sample;}
private:
const Sample& m_sample;
};
typedef boost::shared_ptr<FeatureFunction> FeatureFunctionHandle;
typedef std::vector<FeatureFunctionHandle> FeatureFunctionVector;
/**
* Base class for Gibbler Features.
* Feature methods are called as follows:
* 1. When Gibbler starts up, and initialises feature functions:
* - constructor
* When a new sample() object is created:
* - getFunction() - this creates a FeatureFunction object.
**/
class Feature {
public:
Feature() {}
virtual FeatureFunctionHandle getFunction(const Sample& sample) const = 0;
virtual ~Feature() {}
};
/**
* A feature function with a single value
**/
class SingleValuedFeatureFunction: public FeatureFunction {
public:
SingleValuedFeatureFunction(const Sample& sample, const std::string& name) :
FeatureFunction(sample), m_name(name) {}
virtual void assignScore(FVector& scores)
{scores[m_name] = computeScore();}
virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores)
{scores[m_name] = getSingleUpdateScore(option,gap);}
virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores)
{scores[m_name] = getContiguousPairedUpdateScore(leftOption,rightOption,gap);}
virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
{scores[m_name] = getDiscontiguousPairedUpdateScore(leftOption,rightOption,leftGap,rightGap);}
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
{scores[m_name] = getFlipUpdateScore(leftOption,rightOption,leftGap,rightGap);}
/**
* Actual feature functions need to implement these methods.
**/
protected:
virtual FValue computeScore() = 0;
/** Score due to one segment */
virtual FValue getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap) = 0;
/** Score due to two segments. The left and right refer to the target positions.**/
virtual FValue getContiguousPairedUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap) = 0;
virtual FValue getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap) = 0;
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
virtual FValue getFlipUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap) = 0;
virtual ~SingleValuedFeatureFunction() {}
private:
FName m_name;
};
typedef boost::shared_ptr<Feature> FeatureHandle;
typedef std::vector<FeatureHandle> FeatureVector;
} //namespace

94
josiah/Gain.cpp Normal file
View File

@ -0,0 +1,94 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <fstream>
#include "FactorCollection.h"
#include "Util.h"
#include "Gain.h"
using namespace Moses;
using namespace std;
namespace Josiah {
void TextToTranslation(const string& text, Translation& words) {
vector<string> tokens = Tokenize(text);
words.clear();
FactorCollection& factorCollection = FactorCollection::Instance();
for (size_t i = 0; i < tokens.size(); ++i) {
const Factor* factor = factorCollection.AddFactor(Input, 0, tokens[i]);
words.push_back(factor);
}
}
void Gain::LoadReferences(const vector<string>& refFiles,
const string& sourceFile) {
assert(refFiles.size());
vector<boost::shared_ptr<ifstream> > refIns(refFiles.size());
for (size_t i = 0; i < refFiles.size(); ++i) {
refIns[i].reset(new ifstream());
refIns[i]->open(refFiles[i].c_str());
assert(refIns[i]->good());
}
ifstream srcIn(sourceFile.c_str());
assert(srcIn);
size_t count = 0;
while(srcIn.good()) {
string line;
getline(srcIn,line);
if (line.empty()) continue;
Translation source;
TextToTranslation(line,source);
vector<Translation> refs(refFiles.size());;
for (size_t i = 0; i < refFiles.size(); ++i) {
getline(*refIns[i],line);
assert(refIns[i]->good());
TextToTranslation(line,refs[i]);
}
AddReferences(refs,source);
++count;
}
//check we were at the end of all the references
for (size_t i = 0; i < refFiles.size(); ++i) {
string line;
getline(*refIns[i],line);
assert(line.empty());
}
VERBOSE(1, "Loaded " << count << " references" << endl);
}
GainFunctionHandle Gain::GetGainFunction(size_t sentenceId) {
vector<size_t> sentenceIds;
sentenceIds.push_back(sentenceId);
return GetGainFunction(sentenceIds);
}
float GainFunction::Evaluate(const Translation& hypothesis) const {
vector<Translation> hyps;
hyps.push_back(hypothesis);
return Evaluate(hyps);
}
}

66
josiah/Gain.h Normal file
View File

@ -0,0 +1,66 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <boost/shared_ptr.hpp>
#include "Decoder.h"
namespace Josiah {
class GainFunction;
typedef boost::shared_ptr<GainFunction> GainFunctionHandle;
void TextToTranslation(const std::string& text, Translation& words);
/**
* Factory for gain functions.
**/
class Gain {
public:
/** Load the reference files */
void LoadReferences(const std::vector<std::string>& refFilenames,
const std::string& sourceFile);
/** Get the function to calculate the gain on these sentences */
virtual GainFunctionHandle GetGainFunction(const std::vector<size_t>& sentenceIds) = 0;
/** Add the set of references for a specific sentence */
virtual void AddReferences(const std::vector<Translation>& refs, const Translation& source) = 0;
/** Convenience method fof single sentence */
GainFunctionHandle GetGainFunction(size_t sentenceId);
virtual float GetAverageReferenceLength(size_t sentenceId) const = 0;
virtual ~Gain() {}
};
class GainFunction {
public:
/** Calculate Gain for set of hypotheses */
virtual float Evaluate(const std::vector<Translation>& hypotheses) const = 0;
/** Add the stats for this hypothesis to the smoothing stats being collected */
virtual void AddSmoothingStats(size_t sentenceId, const Translation& hypothesis) {}
/** Inform the GainFunction that we've finished with this sentence, and it can now
update the parent's stats */
virtual void UpdateSmoothingStats() {}
/** Shortcut for evaluating just one sentence */
float Evaluate(const Translation& hypothesis) const;
virtual ~GainFunction() {}
};
}

441
josiah/Gibbler.cpp Normal file
View File

@ -0,0 +1,441 @@
#include "Gibbler.h"
#include "Derivation.h"
#include "Hypothesis.h"
#include "TranslationOptionCollection.h"
#include "GibblerMaxTransDecoder.h"
#include "StaticData.h"
#include "AnnealingSchedule.h"
#include "GibbsOperator.h"
using namespace std;
namespace Josiah {
Sample::Sample(Hypothesis* target_head, const std::vector<Word>& source, const FeatureVector& features, bool doRaoBlackwell) :
m_sourceWords(source), m_doRaoBlackwell(doRaoBlackwell), m_updates(0) {
std::map<int, Hypothesis*> source_order;
this->target_head = target_head;
Hypothesis* next = NULL;
for (Hypothesis* h = target_head; h; h = const_cast<Hypothesis*>(h->GetPrevHypo())) {
size_t startPos = h->GetCurrSourceWordsRange().GetStartPos();
SetSourceIndexedHyps(h);
if (h->GetPrevHypo()){
source_order[startPos] = h;
}
else {
source_order[-1] = h;
}
this->target_tail = h;
h->SetNextHypo(next);
next = h;
}
std::map<int, Hypothesis*>::const_iterator source_it = source_order.begin();
Hypothesis* prev = NULL;
this->source_tail = source_it->second;
for (; source_it != source_order.end(); source_it++) {
Hypothesis *h = source_it->second;
h->SetSourcePrevHypo(prev);
if (prev != NULL)
prev->SetSourceNextHypo(h);
this->source_head = h;
prev = h;
}
this->source_head->SetSourceNextHypo(NULL);
this->target_head->SetNextHypo(NULL);
this->source_tail->SetSourcePrevHypo(NULL);
this->target_tail->SetPrevHypo(NULL);
for (FeatureVector::const_iterator i=features.begin(); i!=features.end(); ++i){
// tell the feature that we have a new sample
m_featureFunctions.push_back((*i)->getFunction(*this));
}
UpdateTargetWords();
for (FeatureFunctionVector::const_iterator i=m_featureFunctions.begin(); i!=m_featureFunctions.end(); ++i){
(*i)->assignScore(feature_values);
}
}
Sample::~Sample() {
RemoveAllInColl(cachedSampledHyps);
}
Hypothesis* Sample::CreateHypothesis(Hypothesis& prevTarget, const TranslationOption& option) {
UpdateCoverageVector(prevTarget, option);
Hypothesis* hypo = Hypothesis::Create(prevTarget, option, NULL);
prevTarget.SetNextHypo(hypo);
cachedSampledHyps.insert(hypo);
SetSourceIndexedHyps(hypo);
//SetTgtIndexedHyps(hypo);
return hypo;
}
void Sample::UpdateTargetWords() {
m_targetWords.clear();
const Hypothesis* currHypo = GetTargetTail(); //target tail
IFVERBOSE(2) {
VERBOSE(2,"Sentence: ");
}
//we're now at the dummy hypo at the start of the sentence
while ((currHypo = (currHypo->GetNextHypo()))) {
const TargetPhrase& targetPhrase = currHypo->GetCurrTargetPhrase();
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
m_targetWords.push_back(targetPhrase.GetWord(i));
IFVERBOSE(2) {
VERBOSE(2,targetPhrase.GetWord(i) << " ");
}
}
IFVERBOSE(2) {
if (currHypo->GetCurrTargetPhrase().GetSize() > 0) {
VERBOSE(2, "|" << currHypo->GetCurrSourceWordsRange().GetStartPos()
<< "-" << currHypo->GetCurrSourceWordsRange().GetEndPos() << "| ");
}
}
}
IFVERBOSE(2) {
VERBOSE(2,endl);
}
IFVERBOSE(2) {
VERBOSE(2,"FVs: " << feature_values << endl);
}
//Inform the extra features that the target words have changed
for (FeatureFunctionVector::const_iterator i=m_featureFunctions.begin(); i!=m_featureFunctions.end(); ++i){
(*i)->updateTarget();
}
}
Hypothesis* Sample::GetHypAtSourceIndex(size_t i) {
std::map<size_t, Hypothesis*>::iterator it = sourceIndexedHyps.find(i);
if (it == sourceIndexedHyps.end())
return NULL;
return it->second;
}
void Sample::SetSourceIndexedHyps(Hypothesis* h) {
size_t startPos = h->GetCurrSourceWordsRange().GetStartPos();
size_t endPos = h->GetCurrSourceWordsRange().GetEndPos();
if (startPos + 1 == 0 ) {
sourceIndexedHyps[startPos] = h;
return;
}
for (size_t i = startPos; i <= endPos; i++) {
sourceIndexedHyps[i] = h;
}
}
void Sample::SetTgtNextHypo(Hypothesis* newHyp, Hypothesis* currNextHypo) {
if (newHyp) {
newHyp->SetNextHypo(currNextHypo);
}
if (currNextHypo) {
currNextHypo->SetPrevHypo(newHyp);
}
}
void Sample::SetSrcPrevHypo(Hypothesis* newHyp, Hypothesis* srcPrevHypo) {
if (newHyp) {
newHyp->SetSourcePrevHypo(srcPrevHypo);
}
if (srcPrevHypo) {
srcPrevHypo->SetSourceNextHypo(newHyp);
}
}
void Sample::FlipNodes(const TranslationOption& leftTgtOption, const TranslationOption& rightTgtOption, Hypothesis* m_prevTgtHypo, Hypothesis* m_nextTgtHypo, const FVector& deltaFV) {
bool tgtSideContiguous = false;
Hypothesis *oldRightHypo = GetHypAtSourceIndex(leftTgtOption.GetSourceWordsRange().GetStartPos()); //this one used to be on the right
Hypothesis *oldLeftHypo = GetHypAtSourceIndex(rightTgtOption.GetSourceWordsRange().GetStartPos());//this one used to be on the left
//created the new left most tgt
Hypothesis *newLeftHypo = CreateHypothesis(*m_prevTgtHypo, leftTgtOption);
//are the options contiguous on the target side?
Hypothesis *tgtSidePredecessor = const_cast<Hypothesis*>(oldRightHypo->GetPrevHypo()); //find its target side predecessor
//If the flip is contiguous on the target side, then the predecessor is the flipped one
if (tgtSidePredecessor->GetCurrSourceWordsRange() == rightTgtOption.GetSourceWordsRange()) {
tgtSidePredecessor = newLeftHypo;
tgtSideContiguous = true;
}
//update the target side sample pointers now
if (!tgtSideContiguous) {
Hypothesis *leftHypoTgtSideSuccessor = const_cast<Hypothesis*>(oldLeftHypo->GetNextHypo());
SetTgtNextHypo(newLeftHypo, leftHypoTgtSideSuccessor);
}
//update the target word ranges of the ones in between
if (!tgtSideContiguous) {
size_t startTgtPos = newLeftHypo->GetCurrTargetWordsRange().GetEndPos();
for (Hypothesis *h = const_cast<Hypothesis*>(oldLeftHypo->GetNextHypo()); h != oldRightHypo ; h = const_cast<Hypothesis*>(h->GetNextHypo())) {
WordsRange& range = h->GetCurrTargetWordsRange();
size_t size = range.GetNumWordsCovered();
range.SetStartPos(startTgtPos+1);
range.SetEndPos(startTgtPos+size);
startTgtPos += size;
}
}
//now create the one that goes on the right
Hypothesis *newRightHypo = CreateHypothesis(*tgtSidePredecessor, rightTgtOption);
SetTgtNextHypo(newRightHypo, m_nextTgtHypo);
//update the source side sample pointers now
Hypothesis* newLeftSourcePrevHypo = GetHypAtSourceIndex(newLeftHypo->GetCurrSourceWordsRange().GetStartPos() - 1 );
Hypothesis* newLeftSourceNextHypo = GetHypAtSourceIndex(newLeftHypo->GetCurrSourceWordsRange().GetEndPos() + 1 );
SetSrcPrevHypo(newLeftHypo, newLeftSourcePrevHypo);
SetSrcPrevHypo(newLeftSourceNextHypo, newLeftHypo);
Hypothesis* newRightSourcePrevHypo = GetHypAtSourceIndex(newRightHypo->GetCurrSourceWordsRange().GetStartPos() - 1 );
Hypothesis* newRightSourceNextHypo = GetHypAtSourceIndex(newRightHypo->GetCurrSourceWordsRange().GetEndPos() + 1 );
SetSrcPrevHypo(newRightHypo, newRightSourcePrevHypo);
SetSrcPrevHypo(newRightSourceNextHypo, newRightHypo);
UpdateHead(oldRightHypo, newLeftHypo, source_head);
UpdateHead(oldLeftHypo, newRightHypo, source_head);
UpdateHead(oldRightHypo, newRightHypo, target_head);
UpdateHead(oldLeftHypo, newRightHypo, target_head);
UpdateFeatureValues(deltaFV);
UpdateTargetWords();
DeleteFromCache(oldRightHypo);
DeleteFromCache(oldLeftHypo);
//Sanity check
IFVERBOSE(4) {
float totalDistortion(0.0);
for (Hypothesis* h = target_tail; h; h = const_cast<Hypothesis*>(h->GetNextHypo())) {
Hypothesis *next = const_cast<Hypothesis*>(h->GetNextHypo());
if (next) {
totalDistortion += ComputeDistortionDistance(h->GetCurrSourceWordsRange(), next->GetCurrSourceWordsRange());
}
else {
break;
}
}
VERBOSE(4, "Total distortion for this sample " << totalDistortion << endl);
}
}
float Sample::ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current)
{
int dist = 0;
if (prev.GetNumWordsCovered() == 0) {
dist = current.GetStartPos();
} else {
dist = (int)prev.GetEndPos() - (int)current.GetStartPos() + 1 ;
}
return - (float) abs(dist);
}
void Sample::ChangeTarget(const TranslationOption& option, const FVector& deltaFV) {
size_t optionStartPos = option.GetSourceWordsRange().GetStartPos();
Hypothesis *currHyp = GetHypAtSourceIndex(optionStartPos);
Hypothesis& prevHyp = *(const_cast<Hypothesis*>(currHyp->GetPrevHypo()));
Hypothesis *newHyp = CreateHypothesis(prevHyp, option);
SetTgtNextHypo(newHyp, const_cast<Hypothesis*>(currHyp->GetNextHypo()));
UpdateHead(currHyp, newHyp, target_head);
SetSrcPrevHypo(newHyp, const_cast<Hypothesis*>(currHyp->GetSourcePrevHypo()));
SetSrcPrevHypo(const_cast<Hypothesis*>(currHyp->GetSourceNextHypo()), newHyp);
UpdateHead(currHyp, newHyp, source_head);
//Update target word ranges
int tgtSizeChange = static_cast<int> (option.GetTargetPhrase().GetSize()) - static_cast<int> (currHyp->GetTargetPhrase().GetSize());
if (tgtSizeChange != 0) {
UpdateTargetWordRange(newHyp, tgtSizeChange);
}
DeleteFromCache(currHyp);
UpdateFeatureValues(deltaFV);
UpdateTargetWords();
}
void Sample::MergeTarget(const TranslationOption& option, const FVector& deltaFV) {
size_t optionStartPos = option.GetSourceWordsRange().GetStartPos();
size_t optionEndPos = option.GetSourceWordsRange().GetEndPos();
Hypothesis *currStartHyp = GetHypAtSourceIndex(optionStartPos);
Hypothesis *currEndHyp = GetHypAtSourceIndex(optionEndPos);
assert(currStartHyp != currEndHyp);
Hypothesis* prevHyp = NULL;
Hypothesis* newHyp = NULL;
if (currStartHyp->GetCurrTargetWordsRange() < currEndHyp->GetCurrTargetWordsRange()) {
prevHyp = const_cast<Hypothesis*> (currStartHyp->GetPrevHypo());
newHyp = CreateHypothesis(*prevHyp, option);
//Set the target ptrs
SetTgtNextHypo(newHyp, const_cast<Hypothesis*>(currEndHyp->GetNextHypo()));
UpdateHead(currEndHyp, newHyp, target_head);
}
else {
prevHyp = const_cast<Hypothesis*> (currEndHyp->GetPrevHypo());
newHyp = CreateHypothesis(*prevHyp, option);
SetTgtNextHypo(newHyp, const_cast<Hypothesis*>(currStartHyp->GetNextHypo()));
UpdateHead(currStartHyp, newHyp, target_head);
}
//Set the source ptrs
SetSrcPrevHypo(newHyp, const_cast<Hypothesis*>(currStartHyp->GetSourcePrevHypo()));
SetSrcPrevHypo(const_cast<Hypothesis*>(currEndHyp->GetSourceNextHypo()), newHyp);
UpdateHead(currEndHyp, newHyp, source_head);
//Update target word ranges
int newTgtSize = option.GetTargetPhrase().GetSize();
int prevTgtSize = currStartHyp->GetTargetPhrase().GetSize() + currEndHyp->GetTargetPhrase().GetSize();
int tgtSizeChange = newTgtSize - prevTgtSize;
if (tgtSizeChange != 0) {
UpdateTargetWordRange(newHyp, tgtSizeChange);
}
DeleteFromCache(currStartHyp);
DeleteFromCache(currEndHyp);
UpdateFeatureValues(deltaFV);
UpdateTargetWords();
}
void Sample::SplitTarget(const TranslationOption& leftTgtOption, const TranslationOption& rightTgtOption, const FVector& deltaFV) {
size_t optionStartPos = leftTgtOption.GetSourceWordsRange().GetStartPos();
Hypothesis *currHyp = GetHypAtSourceIndex(optionStartPos);
Hypothesis& prevHyp = *(const_cast<Hypothesis*>(currHyp->GetPrevHypo()));
Hypothesis *newLeftHyp = CreateHypothesis(prevHyp, leftTgtOption);
Hypothesis *newRightHyp = CreateHypothesis(*newLeftHyp, rightTgtOption);
//Update tgt ptrs
SetTgtNextHypo(newRightHyp, const_cast<Hypothesis*>(currHyp->GetNextHypo()));
UpdateHead(currHyp, newRightHyp, target_head);
//Update src ptrs
assert (newLeftHyp->GetCurrSourceWordsRange() < newRightHyp->GetCurrSourceWordsRange()); //monotone
SetSrcPrevHypo(newLeftHyp, const_cast<Hypothesis*>(currHyp->GetSourcePrevHypo()));
SetSrcPrevHypo(newRightHyp, newLeftHyp);
SetSrcPrevHypo(const_cast<Hypothesis*>(currHyp->GetSourceNextHypo()), newRightHyp);
UpdateHead(currHyp, newRightHyp, source_head);
//Update target word ranges
int prevTgtSize = currHyp->GetTargetPhrase().GetSize();
int newTgtSize = newLeftHyp->GetTargetPhrase().GetSize() + newRightHyp->GetTargetPhrase().GetSize();
int tgtSizeChange = newTgtSize - prevTgtSize;
if (tgtSizeChange != 0) {
UpdateTargetWordRange(newRightHyp, tgtSizeChange);
}
DeleteFromCache(currHyp);
UpdateFeatureValues(deltaFV);
UpdateTargetWords();
}
void Sample::UpdateHead(Hypothesis* currHyp, Hypothesis* newHyp, Hypothesis *&head) {
if (head == currHyp)
head = newHyp;
}
void Sample::UpdateTargetWordRange(Hypothesis* hyp, int tgtSizeChange) {
Hypothesis* nextHyp = const_cast<Hypothesis*>(hyp->GetNextHypo());
if (!nextHyp)
return;
for (Hypothesis* h = nextHyp; h; h = const_cast<Hypothesis*>(h->GetNextHypo())){
WordsRange& range = h->GetCurrTargetWordsRange();
range.SetStartPos(range.GetStartPos()+tgtSizeChange);
range.SetEndPos(range.GetEndPos()+tgtSizeChange);
}
}
void Sample::UpdateFeatureValues(const FVector& deltaFV) {
feature_values +=deltaFV;
}
void Sample::CheckFeatureConsistency() const {
FVector expected;
for (FeatureFunctionVector::const_iterator i=m_featureFunctions.begin(); i!=m_featureFunctions.end(); ++i){
(*i)->assignScore(expected);
}
if (expected != feature_values) {
VERBOSE(1, "Expected: " << expected << endl);
VERBOSE(1, "Actual: " << feature_values << endl);
ostringstream msg;
msg << "Score mismatch: e-a = " << (expected-feature_values);
throw runtime_error(msg.str());
}
}
//update the bitmap of the predecessor
void Sample::UpdateCoverageVector(Hypothesis& hyp, const TranslationOption& option) {
size_t startPos = option.GetSourceWordsRange().GetStartPos();
size_t endPos = option.GetSourceWordsRange().GetEndPos();
WordsBitmap & wordBitmap = hyp.GetWordsBitmap();
wordBitmap.SetValue(startPos, endPos, false);
}
void Sample::DeleteFromCache(Hypothesis *hyp) {
set<Hypothesis*>::iterator it = find(cachedSampledHyps.begin(), cachedSampledHyps.end(), hyp);
if (it != cachedSampledHyps.end()){
delete *it;
cachedSampledHyps.erase(it);
}
}
bool Sample::DoRaoBlackwell() const {
return m_doRaoBlackwell;
}
void Sample::AddConditionalFeatureValues( const FVector & fv ) {
m_conditionalFeatureValues += fv;
++m_updates;
}
const FVector Sample::GetConditionalFeatureValues( ) const {
if (m_doRaoBlackwell) {
FVector fv(m_conditionalFeatureValues);
fv /= m_updates;
return fv;
} else {
return GetFeatureValues();
}
}
void Sample::ResetConditionalFeatureValues(){
m_updates = 0;
m_conditionalFeatureValues.clear();
}
}

113
josiah/Gibbler.h Normal file
View File

@ -0,0 +1,113 @@
#pragma once
#include <map>
#include <set>
#include <vector>
#include "FeatureFunction.h"
#include "FeatureVector.h"
namespace Moses {
class Hypothesis;
class TranslationOptionCollection;
class TranslationOption;
class Word;
}
using namespace Moses;
namespace Josiah {
class AnnealingSchedule;
class GibbsOperator;
class Sampler;
class OnlineLearner;
class SampleAcceptor;
class Sample {
private:
std::vector<Word> m_targetWords;
const std::vector<Word>& m_sourceWords;
Hypothesis* target_head;
Hypothesis* target_tail;
Hypothesis* source_head;
Hypothesis* source_tail;
FVector feature_values;
FeatureFunctionVector m_featureFunctions;
std::set<Hypothesis*> cachedSampledHyps;
std::map<size_t, Hypothesis*> sourceIndexedHyps;
//Used for conditional estimation (aka Rao-Blackwellisation)
bool m_doRaoBlackwell;
FVector m_conditionalFeatureValues;
size_t m_updates;
void SetSourceIndexedHyps(Hypothesis* h);
void UpdateFeatureValues(const FVector& deltaFV);
void UpdateTargetWordRange(Hypothesis* hyp, int tgtSizeChange);
void UpdateHead(Hypothesis* currHyp, Hypothesis* newHyp, Hypothesis *&head);
void UpdateCoverageVector(Hypothesis& hyp, const TranslationOption& option) ;
Hypothesis* CreateHypothesis( Hypothesis& prevTarget, const TranslationOption& option);
void SetTgtNextHypo(Hypothesis* newHyp, Hypothesis* currNextHypo);
void SetSrcPrevHypo(Hypothesis* newHyp, Hypothesis* srcPrevHypo);
void UpdateTargetWords();
void DeleteFromCache(Hypothesis *hyp);
float ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current) ;
public:
Sample(Hypothesis* target_head, const std::vector<Word>& source, const FeatureVector& features, bool raoBlackwell);
~Sample();
int GetSourceSize() const { return m_sourceWords.size(); }
Hypothesis* GetHypAtSourceIndex(size_t ) ;
const Hypothesis* GetSampleHypothesis() const {
return target_head;
}
const Hypothesis* GetTargetTail() const {
return target_tail;
}
const FVector& GetFeatureValues() const {
return feature_values;
}
const FeatureFunctionVector& GetFeatureFunctions() const {
return m_featureFunctions;
}
/** Check that the feature values are correct */
void CheckFeatureConsistency() const;
void FlipNodes(size_t x, size_t y, const FVector& deltaFV) ;
void FlipNodes(const TranslationOption& , const TranslationOption&, Hypothesis* , Hypothesis* , const FVector& deltaFV);
void ChangeTarget(const TranslationOption& option, const FVector& deltaFV);
void MergeTarget(const TranslationOption& option, const FVector& deltaFV);
void SplitTarget(const TranslationOption& leftTgtOption, const TranslationOption& rightTgtOption, const FVector& deltaFV);
/** Words in the current target */
const std::vector<Word>& GetTargetWords() const { return m_targetWords; }
const std::vector<Word>& GetSourceWords() const { return m_sourceWords; }
int GetTargetLength() { return m_targetWords.size(); }
//Used for conditional estimation (aka Rao-Blackwellisation)
bool DoRaoBlackwell() const;
void AddConditionalFeatureValues(const FVector& fv);
void ResetConditionalFeatureValues();
const FVector GetConditionalFeatureValues() const;
friend class Sampler;
friend class GibbsOperator;
};
typedef boost::shared_ptr<Sample> SampleHandle;
typedef std::vector<SampleHandle> SampleVector;
}

View File

@ -0,0 +1,30 @@
#include "GibblerAnnealedExpectedLossTrainer.h"
#include "Hypothesis.h"
#include "Derivation.h"
using namespace std;
namespace Josiah {
float GibblerAnnealedExpectedLossCollector::UpdateGradient(FVector* gradient, FValue* exp_len, FValue *unreg_exp_gain) {
//the distribution is fetched here so that it only has to be done once during gradient calculation
m_p.clear();
m_derivationCollector.getDistribution(m_p);
return ExpectedLossCollector::UpdateGradient(gradient,exp_len, unreg_exp_gain);
}
float GibblerAnnealedExpectedLossCollector::getRegularisationGradientFactor(size_t i) {
FValue temperature = GetTemperature();
const Derivation* d = m_derivationCollector.getSample(i);
FValue prob = m_p[d];
return -temperature * log (N()*prob) ;
}
float GibblerAnnealedExpectedLossCollector:: getRegularisation() {
return GetTemperature() * m_derivationCollector.getEntropy();
}
}

View File

@ -0,0 +1,46 @@
#pragma once
#include <map>
#include <utility>
#include <ext/hash_map>
#include "ScoreComponentCollection.h"
#include "Derivation.h"
#include "GibblerExpectedLossTraining.h"
#include "Phrase.h"
#include "Sampler.h"
#include "GibblerMaxDerivDecoder.h"
using namespace Moses;
namespace Josiah {
class GainFunction;
class GibblerAnnealedExpectedLossCollector : public ExpectedLossCollector {
public:
GibblerAnnealedExpectedLossCollector(const GainFunctionHandle& gain, Sampler& sampler)
: ExpectedLossCollector(gain) {
sampler.AddCollector(&m_derivationCollector);
}
FValue ComputeEntropy();
FValue GetTemperature() { return m_temp;}
void SetTemperature(FValue temp) {m_temp = temp;}
virtual FValue UpdateGradient(FVector* gradient, FValue* exp_len, FValue* unreg_exp_gain);
virtual FValue getRegularisationGradientFactor(size_t i);
virtual FValue getRegularisation();
private:
float m_temp;
DerivationCollector m_derivationCollector;
//cache the distribution during gradient calculation
std::map<const Derivation*,double> m_p;
};
}

View File

@ -0,0 +1,112 @@
#include <ext/algorithm>
#include "GibblerExpectedLossTraining.h"
#include "Hypothesis.h"
#include "WeightManager.h"
using namespace std;
using namespace __gnu_cxx;
namespace Josiah {
void ExpectedLossCollector::collect(Sample& s) {
const Hypothesis* h = s.GetSampleHypothesis();
vector<const Factor*> trans;
h->GetTranslation(&trans, 0);
const FValue gain = m_gainFunction->Evaluate(trans);
m_lengths.push_back(trans.size());
const FVector& fvs = s.GetFeatureValues();
const FVector& rbFvs = s.GetConditionalFeatureValues();
VERBOSE(2, gain << "\tFeatures=" << fvs << endl);
VERBOSE(2, gain << "\tRao-Blackwellised features=" << rbFvs << endl);
//VERBOSE(0, "Collected : Target " << s << ", gain " << gain << "\tFeatures=" << s.GetFeatureValues() << endl);
m_gains.push_back(gain);
// m_samples.push_back(Derivation(s));
m_featureVectors.push_back(fvs);
m_rbFeatureVectors.push_back(rbFvs);
MPI_VERBOSE(2,"Sample: " << Derivation(s) << endl)
}
float ExpectedLossCollector::UpdateGradient(FVector* gradient,FValue *exp_len, FValue *unreg_exp_gain) {
FVector feature_expectations = getFeatureExpectations();
MPI_VERBOSE(1,"FEXP: " << feature_expectations << endl)
const FVector& weights = WeightManager::instance().get();
FValue exp_score = inner_product(feature_expectations, weights);
//gradient computation
FVector grad;
FValue exp_gain = 0;
for (size_t i = 0; i < N(); ++i) {
FVector fv = m_featureVectors[i];
MPI_VERBOSE(2,"FV: " << fv)
const FValue gain = m_gains[i];
fv -= feature_expectations;
MPI_VERBOSE(2,"DIFF: " << fv)
fv *= (gain + getRegularisationGradientFactor(i));
MPI_VERBOSE(2,"GAIN: " << gain << " RF: " << getRegularisationGradientFactor(i) << endl);
exp_gain += gain/N();
fv /= N();
MPI_VERBOSE(2,"WEIGHTED: " << fv << endl)
grad += fv;
MPI_VERBOSE(2,"grad: " << grad << endl)
}
cerr << "Exp gain without reg term : " << exp_gain << endl;
*unreg_exp_gain = exp_gain;
exp_gain += getRegularisation();
cerr << "Exp gain with reg term: " << exp_gain << endl;
(*gradient) += grad;
MPI_VERBOSE(1,"Gradient: " << grad << endl)
cerr << "Gradient: " << grad << endl;
//expected length
if (exp_len) {
*exp_len = 0;
for (size_t i = 0; i < N(); ++i) {
*exp_len += m_lengths[i];
}
*exp_len /= N();
}
return exp_gain;
}
double ExpectedLossCollector::getExpectedGain() const {
double exp_gain = 0;
for (size_t i = 0; i < N(); ++i) {
exp_gain += m_gains[i];
}
exp_gain /= N();
return exp_gain;
}
FVector ExpectedLossCollector::getFeatureExpectations() const {
FVector sum;
for (size_t i = 0; i < m_featureVectors.size(); ++i) {
sum += m_featureVectors[i];
}
sum /= m_featureVectors.size();
return sum;
}
}

View File

@ -0,0 +1,48 @@
#pragma once
#include <map>
#include <utility>
#include "Derivation.h"
#include "FeatureVector.h"
#include "Gain.h"
#include "Gibbler.h"
#include "MpiDebug.h"
#include "StaticData.h"
#include "SampleCollector.h"
using namespace Moses;
namespace Josiah {
//class Derivation;
class ExpectedLossCollector : public SampleCollector {
public:
ExpectedLossCollector( const GainFunctionHandle gainFunction): m_gainFunction(gainFunction) {}
//ExpectedLossCollector() {}
virtual ~ExpectedLossCollector() {}
virtual void collect(Sample& sample);
// returns the expected gain and expected sentence length
virtual float UpdateGradient(FVector* gradient, FValue* exp_len, FValue* unreg_gain);
virtual FVector getFeatureExpectations() const;
double getExpectedGain() const;
protected:
/** Hooks for adding, eg, entropy regularisation. The first is added in to the gradient, the second to the objective.*/
virtual FValue getRegularisationGradientFactor(size_t i) {return 0;}
virtual FValue getRegularisation() {return 0;}
virtual bool ComputeScaleGradient() {return false;}
const GainFunctionHandle& m_gainFunction;
std::vector<FVector> m_featureVectors;
std::vector<FVector> m_rbFeatureVectors; // Rao-Blackwellised feature vectors
std::vector<FValue> m_gains;
std::vector<size_t> m_lengths;
// std::vector<Derivation> m_samples;
};
}

View File

@ -0,0 +1,87 @@
#include "GibblerMaxDerivDecoder.h"
#include "StaticData.h"
#include "MpiDebug.h"
#include <iomanip>
using namespace Moses;
using namespace std;
namespace Josiah {
void DerivationCollector::outputDerivationProbability(const DerivationProbability& dp,size_t n, std::ostream& out) {
out << std::setprecision(8) << dp.second << " " << dp.second*n <<" " << *(dp.first);
}
void DerivationCollector::reset() {
MaxCollector<Derivation>::reset();
m_derivByTrans.clear();
}
void DerivationCollector::collect(Sample& sample) {
collectSample(Derivation(sample));
IFVERBOSE(1) {
VERBOSE(1,"Collected: " << Derivation(sample) << endl);
}
if (m_collectDerivByTrans) {
//derivations per translation
Derivation d(sample);
ostringstream os;
vector<string> sentence;
d.getTargetSentence(sentence);
copy(sentence.begin(),sentence.end(),ostream_iterator<string>(os," "));
m_derivByTrans[os.str()].insert(d);
}
size_t n = N() + 1;
if (m_pd > 0 && n > 0 && n%m_pd == 0) {
pair<const Derivation*,float> max = getMax();
if (max.first) {
MPI_VERBOSE(0, "MaxDeriv(" << n << "): " << std::setprecision(8) << max.second << " " << max.second*n <<" " << *(max.first) << endl)
MPI_VERBOSE(0, "DerivEntropy(" << n << "): " << getEntropy() << endl)
}
}
}
void DerivationCollector::outputDerivationsByTranslation(ostream& out) {
out << "Derivations per translation" << endl;
multimap<size_t,string,greater<size_t> > sortedCounts;
for (map<string, set<Derivation> >::const_iterator i = m_derivByTrans.begin();
i != m_derivByTrans.end(); ++i) {
sortedCounts.insert(pair<size_t,string>(i->second.size(),i->first));
}
for (multimap<size_t,string, greater<size_t> >::const_iterator i = sortedCounts.begin(); i != sortedCounts.end(); ++i) {
out << "COUNT: " << i->first << " TRANS:" << i->second << endl;
if (i->first > 1) {
for (set<Derivation>::const_iterator j = m_derivByTrans[i->second].begin();
j != m_derivByTrans[i->second].end(); ++j) {
out << *j << endl;
}
}
}
}
/**argmax and max*/
std::pair<const Derivation*,float> DerivationCollector::getMAP() const {
const Derivation* argmax = NULL;
float max = -10000;
map<const Derivation*,double> p;
getDistribution(p);
for (map<const Derivation*,double>::const_iterator pi = p.begin(); pi != p.end(); ++pi) {
float score = pi->first->getScore();
if (score > max) {
max = score;
argmax = pi->first;
}
}
return pair<const Derivation*,float>(argmax,max);
}
}

View File

@ -0,0 +1,30 @@
#pragma once
#include <functional>
#include <string>
#include <vector>
#include <set>
#include "Derivation.h"
#include "GibblerMaxTransDecoder.h"
namespace Josiah {
class DerivationCollector: public virtual MaxCollector<Derivation> {
public:
DerivationCollector(): MaxCollector<Derivation>("Deriv"), m_pd(0) ,m_collectDerivByTrans(false) {}
void collect(Sample& sample);
/** Write max periodically to stderr */
void setPeriodicDecode(int pd) {m_pd = pd;}
void setCollectDerivationsByTranslation(bool dbyt) {m_collectDerivByTrans = dbyt;}
void outputDerivationsByTranslation(std::ostream& out);
void outputDerivationProbability(const DerivationProbability& dp,size_t n, std::ostream& out);
void reset();
virtual ~DerivationCollector(){}
std::pair<const Derivation*,float> getMAP() const;
private:
std::map<std::string,std::set<Derivation> > m_derivByTrans;
int m_pd;
bool m_collectDerivByTrans;
};
}

View File

@ -0,0 +1,289 @@
#include "GibblerMaxTransDecoder.h"
#include "Derivation.h"
#include "StaticData.h"
#include "Gibbler.h"
#include <sstream>
#include <map>
#include <ext/algorithm>
using namespace __gnu_cxx;
using namespace std;
namespace Josiah
{
template<class M>
void MaxCollector<M>::reset()
{
m_samples.clear();
m_sampleList.clear();
SampleCollector::reset();
}
template<class M>
void MaxCollector<M>::getDistribution(map<const M*,double>& p) const
{
double pevent = 1.0/N();
for (typename map<M,vector<size_t> >::const_iterator i = m_samples.begin(); i != m_samples.end(); ++i) {
const M* sample = &(i->first);
p[sample] = i->second.size()*pevent;
}
IFVERBOSE(2) {
float total = 0;
VERBOSE(2, "Distribution: ");
//sort it
multimap<double, const M*> sortedp;
for (typename map<const M*,double>::const_iterator pi = p.begin(); pi != p.end(); ++pi) {
sortedp.insert(make_pair(pi->second,pi->first));
total += pi->second;
}
for (typename multimap<double, const M*>::reverse_iterator spi = sortedp.rbegin(); spi != sortedp.rend(); ++spi) {
VERBOSE(2, spi->second << "{ " << *(spi->second) << " }: " << spi->first << " " << endl;);
}
VERBOSE(2, endl << "Total = " << total << endl);
}
}
template<class M>
void MaxCollector<M>::printDistribution(ostream& out) const
{
map<const M*, double> p;
getDistribution(p);
//sort it
multimap<double, const M*> sortedp;
for (typename map<const M*,double>::const_iterator pi = p.begin(); pi != p.end(); ++pi) {
sortedp.insert(make_pair(pi->second,pi->first));
}
for (typename multimap<double, const M*>::reverse_iterator spi = sortedp.rbegin(); spi != sortedp.rend(); ++spi) {
out << *(spi->second) << "|||" << spi->first << endl;
}
}
template<class M>
float MaxCollector<M>::getEntropy() const
{
map<const M*, double> p;
getDistribution(p);
float entropy = 0;
//cerr << "Entropy: ";
for (typename map<const M*,double>::const_iterator pi = p.begin(); pi != p.end(); ++pi) {
//cerr << pi->second << " ";
entropy -= pi->second*log(pi->second);
}
//cerr << endl;
//cerr << "Entropy : " << entropy << endl;
return entropy;
}
template<class M>
void MaxCollector<M>::collectSample( const M &m)
{
m_samples[m].push_back(N());
typename map<M,vector<size_t> >::const_iterator i = m_samples.find(m);
m_sampleList.push_back(&(i->first));
if (m_outputMaxChange) {
pair<const M*,float> max = getMax();
if (max.first != m_max) {
m_max = max.first;
cerr << "NewMax" << m_name << "(" << N() << ") ";
cerr << *m_max;
cerr << endl;
}
}
}
template<class M>
const M* MaxCollector<M>::getSample(size_t index) const
{
return m_sampleList.at(index);
}
template<class M>
pair<const M*,float> MaxCollector<M>::getMax() const
{
const M* argmax = NULL;
float max = 0;
map<const M*,double> p;
getDistribution(p);
for (typename map<const M*,double>::const_iterator pi = p.begin(); pi != p.end(); ++pi) {
if (pi->second > max) {
max = pi->second;
argmax = pi->first;
}
}
return pair<const M*,float>(argmax,max);
}
template<class M>
struct ProbGreaterThan : public std::binary_function<const pair<const M*,float>&,const pair<const M*,float>&,bool>{
bool operator()(const pair<const M*,float>& d1, const pair<const M*,float>& d2) const {
return d1.second > d2.second;
}
};
template<class M>
void MaxCollector<M>::getNbest(vector<pair<const M*, float> >& nbest, size_t n) const
{
map<const M*,double> p;
getDistribution(p);
nbest.assign(p.begin(),p.end());
ProbGreaterThan<M> comparator;
stable_sort(nbest.begin(),nbest.end(),comparator);
if (n > 0) {
while (nbest.size() > n) {
nbest.pop_back();
}
}
}
template class MaxCollector<Josiah::Derivation>;
template class MaxCollector<Josiah::Translation>;
string ToString(const Translation& ws)
{
ostringstream os;
for (Translation::const_iterator i = ws.begin(); i != ws.end(); ++i)
os << (*i)->GetString() << " ";
return os.str();
}
ostream& operator<<(ostream& out, const Translation& ws)
{
out << ToString(ws);
return out;
}
void GibblerMaxTransDecoder::collect(Sample& sample)
{
const Hypothesis* h = sample.GetSampleHypothesis();
vector<const Factor*> trans;
h->GetTranslation(&trans, 0);
collectSample(trans);
}
pair<const Translation*,float> GibblerMaxTransDecoder::getMbr(size_t mbrSize, size_t topNsize) const {
//Posterior probs computed using the whole evidence set
//MBR decoding outer loop using configurable size
/* vector<pair<const Translation*, float> > topNTranslations;
getNbest(topNTranslations,topNsize);
GainFunctionVector g;
vector<pair<const Translation*, float> >::iterator it;
for (it = topNTranslations.begin(); it != topNTranslations.end(); ++it) {
VERBOSE(1, "translation: " << ToString(*it->first) << " " << (it->second) << endl);
g.push_back(new SentenceBLEU(4,*it->first)); //Calc the sufficient statistics for the translation
}
//Main MBR computation done here
float bleu(0.0), weightedLoss(0.0), weightedLossCumul(0.0), minMBRLoss(100000);
vector<float> mbrLoss;
int minMBRLossIdx(-1);
mbrSize = min(mbrSize, topNTranslations.size());
VERBOSE(1, "MBR SIZE " << mbrSize << ", all Translations Size " << topNTranslations.size() << endl);
//Outer loop using only the top #mbrSize samples
for(size_t i = 0; i < mbrSize; ++i) {
weightedLossCumul = 0.0;
const GainFunction& gf = g[i];
VERBOSE(2, "Reference " << ToString(*topNTranslations[i].first) << endl);
for(size_t j = 0; j < topNTranslations.size(); ++j) {//Inner loop using all samples
if (static_cast<size_t>(i) != j) {
bleu = gf.ComputeGain(g[j]);
VERBOSE(2, "Hypothesis " << ToString(*topNTranslations[j].first) << endl);
weightedLoss = (1- bleu) * topNTranslations[j].second;
VERBOSE(2, "Bleu " << bleu << ", prob " << topNTranslations[j].second << ", weightedLoss : " << weightedLoss << endl);
weightedLossCumul += weightedLoss;
if (weightedLossCumul > minMBRLoss)
break;
}
}
VERBOSE(2, "Bayes risk for cand " << i << " " << weightedLossCumul << endl);
if (weightedLossCumul < minMBRLoss){
minMBRLoss = weightedLossCumul;
minMBRLossIdx = i;
}
}
VERBOSE(2, "Minimum Bayes risk cand is " << minMBRLossIdx << " with risk " << minMBRLoss << endl);
return topNTranslations[minMBRLossIdx]; */
assert(!"Not yet implemented with new gain function");
}
size_t GibblerMaxTransDecoder::getMbr(const vector<pair<Translation,float> >& translations, size_t topNsize) const {
//Posterior probs computed using the whole evidence set
//MBR decoding outer loop using configurable size
/* vector<pair<const Translation*, float> > topNTranslations;
getNbest(topNTranslations,topNsize);
GainFunctionVector gEvidenceSet;
vector<pair<const Translation*, float> >::iterator it;
for (it = topNTranslations.begin(); it != topNTranslations.end(); ++it) {
VERBOSE(1, "Evidence translation: " << ToString(*it->first) << " " << (it->second) << endl);
gEvidenceSet.push_back(new SentenceBLEU(4,*it->first)); //Calc the sufficient statistics for the translation
}
GainFunctionVector gHypothesisSet;
vector<pair<Translation, float> >::const_iterator itt;
for (itt = translations.begin(); itt != translations.end(); ++itt) {
VERBOSE(1, "Hypothesis translation: " << ToString(itt->first) << " " << (itt->second) << endl);
gHypothesisSet.push_back(new SentenceBLEU(4,itt->first)); //Calc the sufficient statistics for the translation
}
//Main MBR computation done here
float bleu(0.0), weightedLoss(0.0), weightedLossCumul(0.0), minMBRLoss(100000);
vector<float> mbrLoss;
int minMBRLossIdx(-1);
size_t mbrSize = translations.size();
VERBOSE(1, "MBR SIZE " << mbrSize << ", all Translations Size " << topNTranslations.size() << endl);
//Outer loop using only the top #mbrSize samples
for(size_t i = 0; i < mbrSize; ++i) {
weightedLossCumul = 0.0;
const GainFunction& gf = gHypothesisSet[i];
VERBOSE(1, "Reference " << ToString(translations[i].first) << " : [" << translations[i].second << "]" << endl);
for(size_t j = 0; j < topNTranslations.size(); ++j) {//Inner loop using all samples
//if (static_cast<size_t>(i) != j) {
bleu = gf.ComputeGain(gEvidenceSet[j]);
VERBOSE(1, "Hypothesis " << ToString(*topNTranslations[j].first) << endl);
weightedLoss = (1- bleu) * topNTranslations[j].second;
VERBOSE(1, "Bleu " << bleu << ", prob " << topNTranslations[j].second << ", weightedLoss : " << weightedLoss << endl);
weightedLossCumul += weightedLoss;
if (weightedLossCumul > minMBRLoss)
break;
//}
}
VERBOSE(1, "Bayes risk for cand " << i << " " << weightedLossCumul << endl);
if (weightedLossCumul < minMBRLoss){
VERBOSE(1, "New best MBR sol: " << ToString(translations[i].first) << " " << weightedLossCumul << endl);
minMBRLoss = weightedLossCumul;
minMBRLossIdx = i;
}
}
VERBOSE(2, "Minimum Bayes risk cand is " << minMBRLossIdx << " with risk " << minMBRLoss << endl);
return minMBRLossIdx; */
assert(!"Not yet implemented with new gain function");
}
}

View File

@ -0,0 +1,79 @@
#pragma once
#include <algorithm>
#include <vector>
#include <utility>
#include <map>
#include "ScoreComponentCollection.h"
#include "Phrase.h"
#include "SampleCollector.h"
namespace Moses {
class Factor;
}
using namespace Moses;
namespace Josiah {
typedef std::vector<const Moses::Factor*> Translation;
std::ostream& operator<<(std::ostream& out, const Translation& ws);
/**
* Collector that looks for a max (eg translation, derivation).
**/
template <class M>
class MaxCollector : public virtual SampleCollector {
public:
MaxCollector<M>(const std::string& name) : m_name(name), m_outputMaxChange(false) {}
/** Should be called to report that an example of M was found in the sample*/
void collectSample(const M&);
/**argmax and max*/
virtual std::pair<const M*,float> getMax() const;
/** n-best list. Set n=0 to get all translations*/
void getNbest(std::vector<std::pair<const M*, float> >& nbest, size_t n) const;
/**Estimate of the probability distribution */
void getDistribution(std::map<const M*,double>& p) const;
/**Print the probability distribution to a file*/
void printDistribution(std::ostream& out) const;
/** Output the max whenever it changes */
void setOutputMaxChange(bool outputMaxChange){m_outputMaxChange = outputMaxChange;}
/** The sample at a given index.*/
const M* getSample(size_t index) const;
float getEntropy() const;
void reset();
virtual ~MaxCollector<M>(){}
private:
//maps the sample to the indices at which it was found.
std::map<M,std::vector<size_t> > m_samples;
//maps indices to samples
std::vector<const M*> m_sampleList;
//used for debug messages
std::string m_name;
//output when max changes?
bool m_outputMaxChange;
const M* m_max;
};
std::string ToString(const Translation& ws);
class GibblerMaxTransDecoder : public virtual MaxCollector<Translation> {
public:
GibblerMaxTransDecoder() : MaxCollector<Translation>("Trans") {}
virtual void collect(Sample& sample);
/** Do mbr decoding */
std::pair<const Translation*,float> getMbr(size_t mbrSize, size_t topNsize = 0) const;
/** Do mbr decoding */
size_t getMbr(const std::vector<std::pair<Translation, float> > & translations, size_t topNsize = 0) const;
virtual ~GibblerMaxTransDecoder(){}
private:
};
}

419
josiah/GibbsOperator.cpp Normal file
View File

@ -0,0 +1,419 @@
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "Hypothesis.h"
#include "TranslationOptionCollection.h"
#include "Utils.h"
#include "WordsRange.h"
#include "GibbsOperator.h"
#include "Selector.h"
using namespace std;
using namespace Moses;
namespace Josiah {
static float ComputeDistortionDistance(const WordsRange& prev, const WordsRange& current)
{
int dist = 0;
if (prev.GetNumWordsCovered() == 0) {
dist = current.GetStartPos();
} else {
dist = (int)prev.GetEndPos() - (int)current.GetStartPos() + 1 ;
}
//cerr << "Computing dist " << prev << " " << current << " " << -abs(dist) << endl;
return - (float) abs(dist);
}
GibbsOperator::~GibbsOperator() {}
PrunedTranslationOptionList::PrunedTranslationOptionList(
const TranslationOptionCollection& toc,
const WordsRange& segment,
size_t count) :
m_options(toc.GetTranslationOptionList(segment)),
m_count(count)
{ }
TranslationOptionList::const_iterator
PrunedTranslationOptionList::begin() const {
return m_options.begin();
}
TranslationOptionList::const_iterator
PrunedTranslationOptionList::end() const {
if (!m_count || m_count > m_options.size()) {
return m_options.end();
} else {
return m_options.begin() + m_count;
}
}
/*
if (sample.DoRaoBlackwell()) {
FVector fv(sample.GetFeatureValues());
fv -= noChangeDelta->getScores();
//Add FV(d)*p(d) for each delta.
vector<double> scores;
//m_acceptor->getNormalisedScores(deltas,scores);
//scores now contain the normalised logprobs
assert(scores.size() == deltas.size());
for (size_t i = 0; i < deltas.size(); ++i) {
if (scores[i] < -30) continue; //floor
FVector deltaFv = deltas[i]->getScores();
deltaFv *= exp(scores[i]);
fv +=deltaFv;
}
//cout << "Rao-Blackwellised fv: " << fv << endl;
sample.AddConditionalFeatureValues(fv);
} */
void MergeSplitOperator::propose(Sample& sample, const TranslationOptionCollection& toc,
TDeltaVector& deltas, TDeltaHandle& noChangeDelta)
{
size_t sourceSize = sample.GetSourceSize();
if (sourceSize == 1) return;
size_t splitIndex = RandomNumberGenerator::instance().
getRandomIndexFromZeroToN(sourceSize-1) + 1;
//NB splitIndex n refers to the position between word n-1 and word n. Words are zero indexed
VERBOSE(3,"Sampling at source index " << splitIndex << endl);
Hypothesis* hypothesis = sample.GetHypAtSourceIndex(splitIndex);
auto_ptr<TargetGap> gap;
auto_ptr<TargetGap> leftGap;
auto_ptr<TargetGap> rightGap;
//find out which source and target segments this split-merge operator should consider
//if we're at the left edge of a segment, then we're on a split
if (hypothesis->GetCurrSourceWordsRange().GetStartPos() == splitIndex) {
VERBOSE(3, "Existing split" << endl);
WordsRange rightSourceSegment = hypothesis->GetCurrSourceWordsRange();
WordsRange rightTargetSegment = hypothesis->GetCurrTargetWordsRange();
const Hypothesis* prev = hypothesis->GetSourcePrevHypo();
assert(prev);
assert(prev->GetSourcePrevHypo()); //must be a valid hypo
WordsRange leftSourceSegment = prev->GetCurrSourceWordsRange();
WordsRange leftTargetSegment = prev->GetCurrTargetWordsRange();
if (leftTargetSegment.GetEndPos() + 1 == rightTargetSegment.GetStartPos()) {
//contiguous on target side.
//In this case source and target order are the same
//Add MergeDeltas
WordsRange sourceSegment(leftSourceSegment.GetStartPos(), rightSourceSegment.GetEndPos());
WordsRange targetSegment(leftTargetSegment.GetStartPos(), rightTargetSegment.GetEndPos());
gap.reset( new TargetGap(prev->GetPrevHypo(), hypothesis->GetNextHypo(), targetSegment));
VERBOSE(3, "Creating merge deltas for merging source segments " << leftSourceSegment << " with " <<
rightSourceSegment << " and target segments " << leftTargetSegment << " with " << rightTargetSegment << endl);
PrunedTranslationOptionList options(toc, sourceSegment, m_toptionLimit);
for (TranslationOptionList::const_iterator i = options.begin(); i != options.end(); ++i) {
TDeltaHandle delta(new MergeDelta(sample,*i,*(gap.get())));
deltas.push_back(delta);
}
}
//make sure that the 'left' and 'right' refer to the target order
auto_ptr<PrunedTranslationOptionList> leftOptions;
auto_ptr<PrunedTranslationOptionList> rightOptions;
if (leftTargetSegment < rightTargetSegment) {
//source and target order same
leftOptions.reset(new PrunedTranslationOptionList(toc,leftSourceSegment,m_toptionLimit));
rightOptions.reset(new PrunedTranslationOptionList(toc,rightSourceSegment,m_toptionLimit));
leftGap.reset(new TargetGap(prev->GetPrevHypo(), prev->GetNextHypo(), prev->GetCurrTargetWordsRange()));
rightGap.reset(new TargetGap(hypothesis->GetPrevHypo(), hypothesis->GetNextHypo(),
hypothesis->GetCurrTargetWordsRange()));
noChangeDelta.reset(new PairedTranslationUpdateDelta(sample,&(prev->GetTranslationOption())
,&(hypothesis->GetTranslationOption()),*leftGap, *rightGap));
} else {
//target in opposite order to source
leftOptions.reset(new PrunedTranslationOptionList(toc,rightSourceSegment,m_toptionLimit));
rightOptions.reset(new PrunedTranslationOptionList(toc,leftSourceSegment,m_toptionLimit));
leftGap.reset(new TargetGap(hypothesis->GetPrevHypo(), hypothesis->GetNextHypo(),
hypothesis->GetCurrTargetWordsRange()));
rightGap.reset(new TargetGap(prev->GetPrevHypo(), prev->GetNextHypo(), prev->GetCurrTargetWordsRange()));
noChangeDelta.reset(new PairedTranslationUpdateDelta(sample,&(hypothesis->GetTranslationOption())
,&(prev->GetTranslationOption()),*leftGap, *rightGap));
}
//Add PairedTranslationUpdateDeltas
for (TranslationOptionList::const_iterator ri = rightOptions->begin(); ri != rightOptions->end(); ++ri) {
for (TranslationOptionList::const_iterator li = leftOptions->begin(); li != leftOptions->end(); ++li) {
TDeltaHandle delta(new PairedTranslationUpdateDelta(sample,*li, *ri, *leftGap, *rightGap));
deltas.push_back(delta);
}
}
//cerr << "Added " << ds << " deltas" << endl;
} else {
VERBOSE(3, "No existing split" << endl);
WordsRange sourceSegment = hypothesis->GetCurrSourceWordsRange();
gap.reset( new TargetGap(hypothesis->GetPrevHypo(), hypothesis->GetNextHypo(), hypothesis->GetCurrTargetWordsRange()));
noChangeDelta.reset(new TranslationUpdateDelta(sample,&(hypothesis->GetTranslationOption()),*(gap.get())));
//Add TranslationUpdateDeltas
PrunedTranslationOptionList options(toc,sourceSegment,m_toptionLimit);
//cerr << "Got " << options.size() << " options for " << sourceSegment << endl;
VERBOSE(3, "Creating simple deltas for source segment " << sourceSegment << " and target segment " <<gap.get()->segment
<< endl);
for (TranslationOptionList::const_iterator i = options.begin(); i != options.end(); ++i) {
TDeltaHandle delta(new TranslationUpdateDelta(sample,*i,*(gap.get())));
deltas.push_back(delta);
}
//cerr << "Added " << ds << " deltas" << endl;
//Add SplitDeltas
VERBOSE(3, "Adding deltas to split " << sourceSegment << " at " << splitIndex << endl);
//Note no reordering in split
WordsRange leftSourceSegment(sourceSegment.GetStartPos(),splitIndex-1);
WordsRange rightSourceSegment(splitIndex,sourceSegment.GetEndPos());
PrunedTranslationOptionList leftOptions(toc,leftSourceSegment,m_toptionLimit);
PrunedTranslationOptionList rightOptions(toc,rightSourceSegment,m_toptionLimit);
for (TranslationOptionList::const_iterator ri = rightOptions.begin(); ri != rightOptions.end(); ++ri) {
for (TranslationOptionList::const_iterator li = leftOptions.begin(); li != leftOptions.end(); ++li) {
TDeltaHandle delta(new SplitDelta(sample, *li, *ri, *(gap.get())));
deltas.push_back(delta);
}
}
}
}
void TranslationSwapOperator::propose(Sample& sample, const TranslationOptionCollection& toc,
TDeltaVector& deltas, TDeltaHandle& noChangeDelta) {
size_t curPos = RandomNumberGenerator::instance().getRandomIndexFromZeroToN(sample.GetSourceSize());
const Hypothesis* currHypo = sample.GetHypAtSourceIndex(curPos);
TargetGap gap(currHypo->GetPrevHypo(), currHypo->GetNextHypo(), currHypo->GetCurrTargetWordsRange());
const WordsRange& sourceSegment = currHypo->GetCurrSourceWordsRange();
VERBOSE(3, "Considering source segment " << sourceSegment << " and target segment " << gap.segment << endl);
const TranslationOption* noChangeOption = &(currHypo->GetTranslationOption());
noChangeDelta.reset(new TranslationUpdateDelta(sample,noChangeOption,gap));
//const TranslationOptionList& options = toc.GetTranslationOptionList(sourceSegment);
PrunedTranslationOptionList options(toc,sourceSegment,m_toptionLimit);
for (TranslationOptionList::const_iterator i = options.begin(); i != options.end(); ++i) {
TDeltaHandle delta(new TranslationUpdateDelta(sample,*i,gap));
deltas.push_back(delta);
}
}
void FlipOperator::propose(Sample& sample, const TranslationOptionCollection& toc,
TDeltaVector& deltas, TDeltaHandle& noChangeDelta)
{
VERBOSE(2, "Running an iteration of the flip operator" << endl);
CollectAllSplitPoints(sample);
if (m_splitPoints.size() < 2) {
return;
}
size_t i = RandomNumberGenerator::instance().getRandomIndexFromZeroToN(GetSplitPoints().size());
size_t j = i;
while (i == j) {
j = RandomNumberGenerator::instance().getRandomIndexFromZeroToN(GetSplitPoints().size());
}
if (i < j) {
VERBOSE(2, "Forward Flipping phrases at pos " << m_splitPoints[i] << " and " << m_splitPoints[j] << endl);
} else {
VERBOSE(2, "Backward Flipping phrases at pos " << m_splitPoints[i] << " and " << m_splitPoints[j] << endl);
}
Hypothesis* hypothesis = sample.GetHypAtSourceIndex(m_splitPoints[i]);
WordsRange thisSourceSegment = hypothesis->GetCurrSourceWordsRange();
WordsRange thisTargetSegment = hypothesis->GetCurrTargetWordsRange();
Hypothesis* followingHyp = sample.GetHypAtSourceIndex(m_splitPoints[j]);
//would this be a valid reordering?
WordsRange followingSourceSegment = followingHyp->GetCurrSourceWordsRange();
WordsRange followingTargetSegment = followingHyp->GetCurrTargetWordsRange();
if (thisTargetSegment < followingTargetSegment ) {
//source and target order are the same
bool contiguous = (thisTargetSegment.GetEndPos() + 1 == followingTargetSegment.GetStartPos());
/*contiguous on target side, flipping would make this a swap
would this be a valid reordering if we flipped?*/
float totalDistortion = 0;
Hypothesis *newLeftNextHypo, *newRightPrevHypo;
if (contiguous) {
newLeftNextHypo = hypothesis;
newRightPrevHypo = followingHyp;
}
else {
newLeftNextHypo = const_cast<Hypothesis*>(hypothesis->GetNextHypo());
newRightPrevHypo = const_cast<Hypothesis*>(followingHyp->GetPrevHypo());
}
bool isValidSwap = CheckValidReordering(followingHyp->GetCurrSourceWordsRange(), hypothesis->GetCurrSourceWordsRange(), hypothesis->GetPrevHypo(), newLeftNextHypo, newRightPrevHypo, followingHyp->GetNextHypo(), totalDistortion);
if (isValidSwap) {//yes
TargetGap leftGap(hypothesis->GetPrevHypo(), hypothesis->GetNextHypo(), thisTargetSegment);
TargetGap rightGap(followingHyp->GetPrevHypo(), followingHyp->GetNextHypo(), followingTargetSegment);
TDeltaHandle delta(new FlipDelta(sample, &(followingHyp->GetTranslationOption()),
&(hypothesis->GetTranslationOption()),
leftGap, rightGap));
deltas.push_back(delta);
CheckValidReordering(hypothesis->GetCurrSourceWordsRange(), followingHyp->GetCurrSourceWordsRange(), hypothesis->GetPrevHypo(), hypothesis->GetNextHypo(), followingHyp->GetPrevHypo(), followingHyp->GetNextHypo(), totalDistortion);
noChangeDelta.reset(new FlipDelta(sample, &(hypothesis->GetTranslationOption()),
&(followingHyp->GetTranslationOption()), leftGap, rightGap));
deltas.push_back(noChangeDelta);
}
}
else {
//swapped on target side, flipping would make this monotone
bool contiguous = (thisTargetSegment.GetStartPos() == followingTargetSegment.GetEndPos() + 1);
float totalDistortion = 0;
Hypothesis *newLeftNextHypo, *newRightPrevHypo;
if (contiguous) {
newLeftNextHypo = followingHyp;
newRightPrevHypo = hypothesis;
}
else {
newLeftNextHypo = const_cast<Hypothesis*>(followingHyp->GetNextHypo());
newRightPrevHypo = const_cast<Hypothesis*>(hypothesis->GetPrevHypo());
}
bool isValidSwap = CheckValidReordering(hypothesis->GetCurrSourceWordsRange(), followingHyp->GetCurrSourceWordsRange(), followingHyp->GetPrevHypo(), newLeftNextHypo, newRightPrevHypo, hypothesis->GetNextHypo(), totalDistortion);
if (isValidSwap) {//yes
TargetGap leftGap(followingHyp->GetPrevHypo(), followingHyp->GetNextHypo(), followingTargetSegment);
TargetGap rightGap(hypothesis->GetPrevHypo(), hypothesis->GetNextHypo(), thisTargetSegment);
TDeltaHandle delta(new FlipDelta(sample, &(hypothesis->GetTranslationOption()),
&(followingHyp->GetTranslationOption()), leftGap, rightGap));
deltas.push_back(delta);
CheckValidReordering(followingHyp->GetCurrSourceWordsRange(),hypothesis->GetCurrSourceWordsRange(), followingHyp->GetPrevHypo(), followingHyp->GetNextHypo(), hypothesis->GetPrevHypo(), hypothesis->GetNextHypo(), totalDistortion);
noChangeDelta.reset(new FlipDelta(sample,&(followingHyp->GetTranslationOption()),
&(hypothesis->GetTranslationOption()), leftGap, rightGap));
deltas.push_back(noChangeDelta);
}
}
}
bool CheckValidReordering(const WordsRange& leftSourceSegment, const WordsRange& rightSourceSegment, const Hypothesis* leftTgtPrevHypo, const Hypothesis* leftTgtNextHypo, const Hypothesis* rightTgtPrevHypo, const Hypothesis* rightTgtNextHypo, float & totalDistortion){
totalDistortion = 0;
//linear distortion
//const DistortionScoreProducer *dsp = StaticData::Instance().GetDistortionScoreProducer();
//Calculate distortion for leftmost target
//who is proposed new leftmost's predecessor?
// Hypothesis *leftPrevHypo = const_cast<Hypothesis*>(rightTgtHypo->GetPrevHypo());
float distortionScore = 0.0;
if (leftTgtPrevHypo) {
distortionScore = ComputeDistortionDistance(
leftTgtPrevHypo->GetCurrSourceWordsRange(),
leftSourceSegment
);
if (abs(distortionScore) > StaticData::Instance().GetMaxDistortion()) {
return false;
}
totalDistortion += distortionScore;
}
if (leftTgtNextHypo) {
//Calculate distortion from leftmost target to right target
distortionScore = ComputeDistortionDistance(
leftSourceSegment,
leftTgtNextHypo->GetCurrSourceWordsRange()
);
if (abs(distortionScore) > StaticData::Instance().GetMaxDistortion()) {
return false;
}
totalDistortion += distortionScore;
}
//Calculate distortion from rightmost target to its successor
//Hypothesis *rightNextHypo = const_cast<Hypothesis*> (leftTgtHypo->GetNextHypo());
if (rightTgtPrevHypo && rightTgtPrevHypo->GetCurrSourceWordsRange() != leftSourceSegment) {
distortionScore = ComputeDistortionDistance(
rightTgtPrevHypo->GetCurrSourceWordsRange(),
rightSourceSegment
);
if (abs(distortionScore) > StaticData::Instance().GetMaxDistortion()) {
return false;
}
totalDistortion += distortionScore;
}
if (rightTgtNextHypo) {
//Calculate distortion from leftmost target to right target
distortionScore = ComputeDistortionDistance(
rightSourceSegment,
rightTgtNextHypo->GetCurrSourceWordsRange()
);
if (abs(distortionScore) > StaticData::Instance().GetMaxDistortion()) {
return false;
}
totalDistortion += distortionScore;
}
return true;
}
void FlipOperator::CollectAllSplitPoints(Sample& sample) {
m_splitPoints.clear();
size_t sourceSize = sample.GetSourceSize();
for (size_t splitIndex = 0; splitIndex < sourceSize; ++splitIndex) {
Hypothesis* hypothesis = sample.GetHypAtSourceIndex(splitIndex);
if (hypothesis->GetCurrSourceWordsRange().GetEndPos() == splitIndex) {
m_splitPoints.push_back(splitIndex);
}
}
}
}//namespace

134
josiah/GibbsOperator.h Normal file
View File

@ -0,0 +1,134 @@
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <ctime>
#include <iomanip>
#include "FeatureVector.h"
#include "Gibbler.h"
#include "TranslationDelta.h"
#include "TypeDef.h"
namespace Moses {
class Hypothesis;
class TranslationOptionCollection;
class WordsRange;
}
using namespace Moses;
namespace Josiah {
/**
* Used to extract the top-n translation options.
**/
class PrunedTranslationOptionList {
public:
PrunedTranslationOptionList(
const Moses::TranslationOptionCollection& toc,
const Moses::WordsRange& segment,
size_t count);
TranslationOptionList::const_iterator begin() const;
TranslationOptionList::const_iterator end() const;
private:
const TranslationOptionList& m_options;
size_t m_count;
};
/** Abstract base class for gibbs operators **/
class GibbsOperator {
public:
GibbsOperator(const std::string& name, float prob) : m_name(name), m_prob(prob) {}
/** Proposes a set of possible changes to the current sample, and a delta signifying 'noChange'. */
virtual void propose(Sample& sample, const TranslationOptionCollection& toc,
TDeltaVector& deltas, TDeltaHandle& noChangeDelta) = 0;
/** The name of this operator */
const std::string& name() const {return m_name;}
/** The weight given to this operator in random scanning */
float GetScanProb() const {return m_prob;}
virtual ~GibbsOperator();
protected:
std::string m_name;
float m_prob; // the probability of sampling this operator
};
/**
* Operator that keeps ordering constant, but visits each (internal) source word boundary, and
* merge or split the segment(s) at that boundary, and update the translation.
**/
class MergeSplitOperator : public virtual GibbsOperator {
public:
MergeSplitOperator(float scanProb = 0.333,size_t toptionLimit=20) :
GibbsOperator("merge-split", scanProb),
m_toptionLimit(toptionLimit) {}
virtual ~MergeSplitOperator() {}
virtual void propose(Sample& sample, const TranslationOptionCollection& toc,
TDeltaVector& deltas, TDeltaHandle& noChangeDelta);
private:
size_t m_toptionLimit;
};
/**
* Operator which may update any translation option, but may not change segmentation or ordering.
**/
class TranslationSwapOperator : public virtual GibbsOperator {
public:
TranslationSwapOperator(float scanProb = 0.333, size_t toptionLimit = 0) :
GibbsOperator("translation-swap", scanProb),
m_toptionLimit(toptionLimit) {}
virtual ~TranslationSwapOperator() {}
virtual void propose(Sample& sample, const TranslationOptionCollection& toc,
TDeltaVector& deltas, TDeltaHandle& noChangeDelta);
private:
size_t m_toptionLimit;
};
/**
* Operator which performs local reordering provided both source segments and target segments are contiguous, and that the swaps
* will not violate the reordering constraints of the model
**/
class FlipOperator : public virtual GibbsOperator {
public:
FlipOperator(float scanProb = 0.333) : GibbsOperator("flip", scanProb) {}
virtual ~FlipOperator() {}
virtual void propose(Sample& sample, const TranslationOptionCollection& toc,
TDeltaVector& deltas, TDeltaHandle& noChangeDelta);
const std::vector<size_t> & GetSplitPoints() {
return m_splitPoints;
}
private:
void CollectAllSplitPoints(Sample& sample);
std::vector<size_t> m_splitPoints;
};
bool CheckValidReordering(const WordsRange& leftSourceSegment, const WordsRange& rightSourceSegment, const Hypothesis* leftTgtPrevHypo, const Hypothesis* leftTgtNextHypo, const Hypothesis* rightTgtPrevHypo, const Hypothesis* rightTgtNextHypo, float & totalDistortion);
}

61
josiah/Hildreth.cpp Normal file
View File

@ -0,0 +1,61 @@
#include <iostream>
#include <sstream>
#include <boost/program_options.hpp>
#include "FeatureVector.h"
#include "OnlineLearner.h"
namespace po = boost::program_options;
using namespace Josiah;
using namespace std;
int main(int argc, char** argv) {
vector<float> avec;
float b;
float C;
bool help;
po::options_description desc("Allowed options");
desc.add_options()
("help",po::value( &help )->zero_tokens()->default_value(false), "Print this help message and exit")
("a", po::value<vector<float> >(&avec), "Constraint vector")
("b", po::value<float>(&b), "Constraint scalar")
("c", po::value<float>(&C)->default_value(0.0f), "slack");
po::options_description cmdline_options;
cmdline_options.add(desc);
po::variables_map vm;
po::store(po::command_line_parser(argc,argv).
options(cmdline_options).run(), vm);
po::notify(vm);
if (help) {
cout << "Usage: " + string(argv[0]) + " -f mosesini-file [options]" << endl;
cout << desc << endl;
return 0;
}
FVector a;
for (size_t i = 0; i < avec.size(); ++i) {
ostringstream name;
name << i;
a[name.str()] = avec[i];
}
vector<FVector> as;
as.push_back(a);
vector<float> bs;
bs.push_back(b);
vector<float> alpha;
if (C) {
alpha = hildreth(as,bs,C);
} else {
alpha = hildreth(as,bs);
}
cout << alpha[0] << endl;
}

55
josiah/InputSource.cpp Normal file
View File

@ -0,0 +1,55 @@
#include "InputSource.h"
using namespace std;
namespace Josiah {
InputSource::~InputSource() {}
StreamInputSource::StreamInputSource(std::istream& is) : in(is) {
}
bool StreamInputSource::HasMore() const {
return (in);
}
void StreamInputSource::GetSentence(std::string* sentence, int* lineno) {
(void) lineno;
std::getline(in, *sentence);
};
BatchedFileInputSource::BatchedFileInputSource(
const string& filename, int rank, int size): m_next(0) {
ifstream in(filename.c_str());
if (!in) {
throw runtime_error("Failed to open input file: " + filename);
}
vector<string> lines;
string line;
while(getline(in,line)) {
lines.push_back(line);
}
float batchSize = (float)lines.size()/size;
cerr << "Batch size: " << batchSize << endl;
size_t start = (size_t)(rank*batchSize+0.5);
size_t end = (size_t)((rank+1)*batchSize+0.5);
m_lines.resize(end-start);
copy(lines.begin()+start,lines.begin()+end,m_lines.begin());
cerr << "batch start: " << start << " batch end: " << end << endl;
}
bool BatchedFileInputSource::HasMore() const {
return m_next < m_lines.size();
}
void BatchedFileInputSource::GetSentence(string* sentence, int* lineno) {
*lineno = m_next;
*sentence = m_lines[m_next++];
}
}

42
josiah/InputSource.h Normal file
View File

@ -0,0 +1,42 @@
#pragma once
#include <stdexcept>
#include <string>
#include <vector>
#include <iostream>
#include <fstream>
namespace Josiah {
struct InputSource {
virtual bool HasMore() const = 0;
virtual void GetSentence(std::string* sentence, int* lineno) = 0;
virtual ~InputSource();
};
struct StreamInputSource : public InputSource {
std::istream& in;
StreamInputSource(std::istream& is);
virtual bool HasMore() const;
virtual void GetSentence(std::string* sentence, int* lineno);
};
/**
* Splits a file into batches.
**/
class BatchedFileInputSource : public InputSource {
public:
BatchedFileInputSource(
const std::string& filename, int rank, int size);
virtual bool HasMore() const;
virtual void GetSentence(std::string* sentence, int* lineno);
private:
std::vector<std::string> m_lines;
size_t m_next;
};
}

566
josiah/Josiah.cpp Normal file
View File

@ -0,0 +1,566 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <algorithm>
#include <functional>
#include <iostream>
#include <iomanip>
#include <fstream>
#ifdef MPI_ENABLED
#include <mpi.h>
#endif
#include <boost/program_options.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/algorithm/string.hpp>
#include "AnnealingSchedule.h"
#include "Bleu.h"
#include "Decoder.h"
#include "Derivation.h"
#include "FeatureVector.h"
#include "Gibbler.h"
#include "InputSource.h"
#include "TrainingSource.h"
#include "GibbsOperator.h"
#include "Gain.h"
#include "GibblerExpectedLossTraining.h"
#include "GibblerAnnealedExpectedLossTrainer.h"
#include "GibblerMaxTransDecoder.h"
#include "MpiDebug.h"
#include "Selector.h"
#include "StaticData.h"
#include "Optimizer.h"
#include "Utils.h"
#include "WeightManager.h"
using namespace std;
using namespace Josiah;
using namespace Moses;
using boost::lexical_cast;
using boost::bad_lexical_cast;
using boost::split;
using boost::is_any_of;
namespace po = boost::program_options;
/**
* Main for Josiah - the Gibbs sampler for moses.
**/
int main(int argc, char** argv) {
int rank = 0, size = 1;
#ifdef MPI_ENABLED
MPI_Init(&argc,&argv);
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Comm_rank(comm,&rank);
MPI_Comm_size(comm,&size);
cerr << "MPI rank: " << rank << endl;
cerr << "MPI size: " << size << endl;
#endif
GibbsTimer timer;
size_t iterations;
unsigned int topn;
int debug;
int mpidebug;
string mpidebugfile;
string feature_file;
int burning_its;
int mbr_size, topNsize;
string inputfile;
string outputfile;
string mosesini;
bool decode;
bool translate;
bool translation_distro;
bool derivation_distro;
bool help;
bool expected_sbleu;
bool expected_sbleu_da;
bool output_expected_sbleu;
unsigned training_batch_size;
bool mbr_decoding;
bool do_timing;
int max_training_iterations;
uint32_t seed;
int lineno;
bool randomize;
FValue scalefactor;
FValue eta;
FValue mu;
string weightfile;
vector<string> ref_files;
int periodic_decode;
bool collect_dbyt;
bool output_max_change;
bool anneal;
unsigned int reheatings;
float max_temp;
FValue prior_variance;
FValue prior_mean;
string prev_gradient_file;
float start_temp_expda;
float stop_temp_expda;
float floor_temp_expda;
float anneal_ratio_da;
float gamma;
bool use_metanormalized_egd;
int optimizerFreq;
int weight_dump_freq;
string weight_dump_stem;
int init_iteration_number;
bool greedy, fixedTemp;
float fixed_temperature;
bool mapdecode;
vector<string> ngramorders;
bool raoBlackwell;
bool use_moses_kbesthyposet;
bool print_moseskbest;
bool randomScan;
size_t lag;
float flip_prob, merge_split_prob, retrans_prob;
bool calc_exact_posterior, filter_by_posterior;
float evidenceSetShrinkFactor;
bool randomShrink;
float log_base_factor;
bool checkFeatures;
po::options_description desc("Allowed options");
desc.add_options()
("help",po::value( &help )->zero_tokens()->default_value(false), "Print this help message and exit")
("config,f",po::value<string>(&mosesini),"Moses ini file")
("verbosity,v", po::value<int>(&debug)->default_value(0), "Verbosity level")
("mpi-debug-level", po::value<int>(&MpiDebug::verbosity)->default_value(0), "Verbosity level for debugging messages used in mpi.")
("mpi-debug-file", po::value<string>(&mpidebugfile), "Debug file stem for use by mpi processes")
("random-seed,e", po::value<uint32_t>(&seed), "Random seed")
("timing,m", po::value(&do_timing)->zero_tokens()->default_value(false), "Display timing information.")
("iterations,s", po::value<size_t>(&iterations)->default_value(10),
"Number of sampler iterations")
("burn-in,b", po::value<int>(&burning_its)->default_value(1), "Duration (in sampling iterations) of burn-in period")
("scale-factor,c", po::value<FValue>(&scalefactor)->default_value(1.0), "Scale factor for model weights.")
("input-file,i",po::value<string>(&inputfile),"Input file containing tokenised source")
("output-file-prefix,o",po::value<string>(&outputfile),"Output file prefix for translations, MBR output, etc")
("nbest-drv,n",po::value<unsigned int>(&topn)->default_value(0),"Write the top n derivations to stdout")
("weights,w",po::value<string>(&weightfile),"Weight file")
("decode-derivation,d",po::value( &decode)->zero_tokens()->default_value(false),"Write the most likely derivation to stdout")
("decode-translation,t",po::value(&translate)->zero_tokens()->default_value(false),"Write the most likely translation to stdout")
("distro-derivation", po::value(&derivation_distro)->zero_tokens()->default_value(false), "Print derivation probability distribution")
("distro-translation", po::value(&translation_distro)->zero_tokens()->default_value(false), "Print translation probability distribution")
("periodic-derivation,p",po::value(&periodic_decode)->default_value(0), "Periodically write the max derivation to stderr")
("max-change", po::value(&output_max_change)->zero_tokens()->default_value(false), "Whenever the max deriv or max trans changes, write it to stderr")
("collect-dbyt",po::value(&collect_dbyt)->zero_tokens()->default_value(false), "Collect derivations per translation")
("line-number,L", po::value(&lineno)->default_value(0), "Starting reference/line number")
("randomize-batches,R", po::value(&randomize)->zero_tokens()->default_value(false), "Randomize training batches")
("gaussian-prior-variance", po::value<FValue>(&prior_variance)->default_value(0.0f), "Gaussian prior variance (0 for no prior)")
("gaussian-prior-mean,P", po::value<FValue>(&prior_mean)->default_value(0.0f), "Gaussian prior mean")
("expected-bleu-training,T", po::value(&expected_sbleu)->zero_tokens()->default_value(false), "Train to maximize expected sentence BLEU")
("output-expected-sbleu", po::value(&output_expected_sbleu)->zero_tokens()->default_value(false), "Output expected bleu and feature expectations at end of sampling")
("max-training-iterations,M", po::value(&max_training_iterations)->default_value(30), "Maximum training iterations")
("training-batch-size,S", po::value(&training_batch_size)->default_value(0), "Batch size to use during xpected bleu training, 0 = full corpus")
("reheatings", po::value<unsigned int>(&reheatings)->default_value(1), "Number of times to reheat the sampler")
("anneal,a", po::value(&anneal)->default_value(false)->zero_tokens(), "Use annealing during the burn in period")
("max-temp", po::value<float>(&max_temp)->default_value(4.0), "Annealing maximum temperature")
("eta", po::value<FValue>(&eta), "Default learning rate for SGD/EGD")
("prev-gradient", po::value<string>(&prev_gradient_file), "File containing previous gradient for restarting SGD/EGD")
("mu", po::value<float>(&mu)->default_value(1.0f), "Metalearning rate for EGD")
("gamma", po::value<float>(&gamma)->default_value(0.9f), "Smoothing parameter for Metanormalized EGD ")
("mbr-size", po::value<int>(&mbr_size)->default_value(200),"Number of samples to use for MBR decoding")
("mbr", po::value(&mbr_decoding)->zero_tokens()->default_value(false), "Minimum Bayes Risk Decoding")
("topn-size", po::value<int>(&topNsize)->default_value(0),"Number of samples to use for inner loop of MBR decoding")
("ref,r", po::value<vector<string> >(&ref_files), "Reference translation files for training")
("extra-feature-config,X", po::value<string>(&feature_file), "Configuration file for extra (non-Moses) features")
("check-features", po::value<bool>(&checkFeatures)->zero_tokens()->default_value(false), "Check features for consistency after every update")
("use-metanormalized-egd,N", po::value(&use_metanormalized_egd)->zero_tokens()->default_value(false), "Use metanormalized EGD")
("expected-bleu-deterministic-annealing-training,D", po::value(&expected_sbleu_da)->zero_tokens()->default_value(false), "Train to maximize expected sentence BLEU using deterministic annealing")
("optimizer-freq", po::value<int>(&optimizerFreq)->default_value(1),"Number of optimization to perform at given temperature")
("initial-det-anneal-temp", po::value<float>(&start_temp_expda)->default_value(1000.0f), "Initial deterministic annealing entropy temperature")
("final-det-anneal-temp", po::value<float>(&stop_temp_expda)->default_value(0.001f), "Final deterministic annealing entropy temperature")
("floor-temp", po::value<float>(&floor_temp_expda)->default_value(0.0f), "Floor temperature for det annealing")
("det-annealing-ratio,A", po::value<float>(&anneal_ratio_da)->default_value(0.5f), "Deterministc annealing ratio")
("weight-dump-freq", po::value<int>(&weight_dump_freq)->default_value(0), "Frequency to dump weight files during training")
("weight-dump-stem", po::value<string>(&weight_dump_stem)->default_value("weights"), "Stem of filename to use for dumping weights")
("init-iteration-number", po::value<int>(&init_iteration_number)->default_value(0), "First training iteration will be one after this (useful for restarting)")
("greedy", po::value(&greedy)->zero_tokens()->default_value(false), "Greedy sample acceptor")
("fixed-temp-accept", po::value(&fixedTemp)->zero_tokens()->default_value(false), "Fixed temperature sample acceptor")
("fixed-temperature", po::value<float>(&fixed_temperature)->default_value(1.0f), "Temperature for fixed temp sample acceptor")
("rao-blackwell", po::value(&raoBlackwell)->zero_tokens()->default_value(false), "Do Rao-Blackwellisation (aka conditional estimation")
("mapdecode", po::value(&mapdecode)->zero_tokens()->default_value(false), "MAP decoding")
("mh.ngramorders", po::value< vector <string> >(&ngramorders), "Indicate LMs and ngram orders to be used during MH/Gibbs")
("use-moses-kbesthyposet", po::value(&use_moses_kbesthyposet)->zero_tokens()->default_value(false), "Use Moses to generate kbest hypothesis set")
("print-moseskbest", po::value(&print_moseskbest)->zero_tokens()->default_value(false), "Print Moses kbest")
("lag", po::value<size_t>(&lag)->default_value(10), "Lag between collecting samples")
("flip-prob", po::value<float>(&flip_prob)->default_value(0.6f), "Probability of applying flip operator during random scan")
("merge-split-prob", po::value<float>(&merge_split_prob)->default_value(0.2f), "Probability of applying merge-split operator during random scan")
("retrans-prob", po::value<float>(&retrans_prob)->default_value(0.2f), "Probability of applying retrans operator during random scan")
("calc-exact-post", po::value(&calc_exact_posterior)->zero_tokens()->default_value(false), "Calculate exact posterior")
("filter-exact-post", po::value(&filter_by_posterior)->zero_tokens()->default_value(false), "Filter sample set using exact posterior")
("evidence-shrink", po::value<float>(&evidenceSetShrinkFactor)->default_value(0.9f), "Evidence set shrink factor for MBR decoding")
("random-shrink", po::value(&randomShrink)->zero_tokens()->default_value(false), "Shrink evidence set randomly, otherwise shrink by discarding low probability elements")
("log-base-factor", po::value<float>(&log_base_factor)->default_value(1.0f), "Scaling factor for log probabilities in translation and language models");
po::options_description cmdline_options;
cmdline_options.add(desc);
po::variables_map vm;
po::store(po::command_line_parser(argc,argv).
options(cmdline_options).run(), vm);
po::notify(vm);
if (help) {
std::cout << "Usage: " + string(argv[0]) + " -f mosesini-file [options]" << std::endl;
std::cout << desc << std::endl;
return 0;
}
if (weightfile.empty()) {
std::cerr << "Setting all feature weights to zero" << std::endl;
WeightManager::init();
} else {
std::cerr << "Loading feature weights from " << weightfile << std::endl;
WeightManager::init(weightfile);
}
if (expected_sbleu && expected_sbleu_da) {
std::cerr << "Incorrect usage: Cannot do both expected bleu training and expected bleu deterministic annealing training" << std::endl;
return 0;
}
float opProb = flip_prob + merge_split_prob + retrans_prob;
if (fabs(1.0 - opProb) > 0.00001) {
std::cerr << "Incorrect usage: specified operator probs should sum up to 1" << std::endl;
return 0;
}
if (translation_distro) translate = true;
if (derivation_distro) decode = true;
if (mosesini.empty()) {
cerr << "Error: No moses ini file specified" << endl;
return 1;
}
if (mpidebugfile.length()) {
MpiDebug::init(mpidebugfile,rank);
}
cerr << "optimizer freq " << optimizerFreq << endl;
assert(optimizerFreq != 0);
if (do_timing) {
timer.on();
}
if (log_base_factor != 1.0) {
//cerr << "Setting log base factor to " << log_base_factor << endl;
cerr << "setting log base factor disabled" << endl;
exit(1);
//SetLogBaseFactor(log_base_factor);
}
//set up moses
initMoses(mosesini,debug);
FeatureVector features;
FVector coreWeights;
configure_features_from_file(feature_file, features, false, coreWeights);
std::cerr << "Using " << features.size() << " features" << std::endl;
//scale model weights
WeightManager::instance().scale(scalefactor);
VERBOSE(1,"Scaled weights by factor of " << scalefactor << endl);
if (vm.count("random-seed")) {
RandomNumberGenerator::instance().setSeed(seed + rank);
}
auto_ptr<Gain> gain;
ostream* out = &cout;
if (!outputfile.empty()) {
ostringstream os;
os << setfill('0');
os << outputfile << '.' << setw(3) << rank << "_of_" << size;
VERBOSE(1, "Writing output to: " << os.str() << endl);
out = new ofstream(os.str().c_str());
}
auto_ptr<istream> in;
auto_ptr<InputSource> input;
auto_ptr<Optimizer> optimizer;
FVector etaVector(eta);
FVector prev_gradient;
if (!prev_gradient_file.empty()) {
prev_gradient.load(prev_gradient_file);
}
if (use_metanormalized_egd) {
optimizer.reset(new MetaNormalizedExponentiatedGradientDescent(
etaVector,
mu,
0.1, // minimal step scaling factor
gamma,
max_training_iterations,
prev_gradient));
} else {
optimizer.reset(new ExponentiatedGradientDescent(
etaVector,
mu,
0.1f, // minimal step scaling factor
max_training_iterations,
prev_gradient));
}
if (optimizer.get()) {
optimizer->SetIteration(init_iteration_number);
}
if (prior_variance != 0.0f) {
assert(prior_variance > 0);
std::cerr << "Using Gaussian prior: \\sigma^2=" << prior_variance << " \\mu=" << prior_mean << endl;
optimizer->SetUseGaussianPrior(prior_mean, prior_variance);
}
ExpectedBleuTrainer* trainer = NULL;
if (expected_sbleu || expected_sbleu_da) {
gain.reset(new Bleu());
gain->LoadReferences(ref_files,inputfile);
vector<string> input_lines;
ifstream infiles(inputfile.c_str());
assert (infiles);
while(infiles) {
string line;
getline(infiles, line);
if (line.empty() && infiles.eof()) break;
assert(!line.empty());
input_lines.push_back(line);
}
VERBOSE(1, "Loaded " << input_lines.size() << " lines in training mode" << endl);
if (!training_batch_size || training_batch_size > input_lines.size())
training_batch_size = input_lines.size();
VERBOSE(1, "Batch size: " << training_batch_size << endl);
trainer = new ExpectedBleuTrainer(rank, size, training_batch_size, &input_lines, seed, randomize, optimizer.get(),
weight_dump_freq, weight_dump_stem);
input.reset(trainer);
} else {
if (inputfile.size()) {
input.reset(new BatchedFileInputSource(inputfile,rank,size));
} else {
input.reset(new StreamInputSource(cin));
}
}
auto_ptr<SamplingSelector> selector(new SamplingSelector());
auto_ptr<AnnealingSchedule> annealingSchedule;
if (anneal) {
annealingSchedule.reset(new LinearAnnealingSchedule(burning_its, max_temp));
selector->SetAnnealingSchedule(annealingSchedule.get());
}
auto_ptr<AnnealingSchedule> detAnnealingSchedule;
if (expected_sbleu_da) {
detAnnealingSchedule.reset(new ExponentialAnnealingSchedule(start_temp_expda, stop_temp_expda, floor_temp_expda, anneal_ratio_da));
}
timer.check("Processing input file");
while (input->HasMore()) {
string line;
input->GetSentence(&line, &lineno);
cerr << "line : " << line << endl;
if (line.empty()) {
if (!input->HasMore()) continue;
assert(!"I don't like empty lines");
}
//configure the sampler
Sampler sampler;
sampler.SetSelector(selector.get());
sampler.SetCheckFeatures(checkFeatures);
VERBOSE(2,"Reheatings: " << reheatings << endl);
sampler.SetReheatings(reheatings);
sampler.SetLag(lag); //thinning factor for sample collection
auto_ptr<DerivationCollector> derivationCollector;
auto_ptr<ExpectedLossCollector> elCollector;
auto_ptr<GibblerMaxTransDecoder> transCollector;
if (expected_sbleu || output_expected_sbleu) {
elCollector.reset(new ExpectedLossCollector(gain->GetGainFunction(lineno)));
sampler.AddCollector(elCollector.get());
}
else if (expected_sbleu_da) {
elCollector.reset(new GibblerAnnealedExpectedLossCollector(gain->GetGainFunction(lineno), sampler));
sampler.AddCollector(elCollector.get());
//Set the annealing temperature
int it = optimizer->GetIteration() / optimizerFreq ;
float temp = detAnnealingSchedule->GetTemperatureAtTime(it);
GibblerAnnealedExpectedLossCollector* annealedELCollector = static_cast<GibblerAnnealedExpectedLossCollector*>(elCollector.get());
annealedELCollector->SetTemperature(temp);
cerr << "Annealing temperature " << annealedELCollector->GetTemperature() << endl;
}
if (mapdecode || decode || topn > 0 || periodic_decode > 0) {
DerivationCollector* collector = new DerivationCollector();
collector->setPeriodicDecode(periodic_decode);
collector->setCollectDerivationsByTranslation(collect_dbyt);
collector->setOutputMaxChange(output_max_change);
derivationCollector.reset(collector);
sampler.AddCollector(derivationCollector.get());
}
if (translate || mbr_decoding) {
transCollector.reset(new GibblerMaxTransDecoder());
transCollector->setOutputMaxChange(output_max_change);
sampler.AddCollector(transCollector.get() );
}
MergeSplitOperator mso(merge_split_prob);
FlipOperator fo(flip_prob);
TranslationSwapOperator tso(retrans_prob);
sampler.AddOperator(&mso);
sampler.AddOperator(&tso);
sampler.AddOperator(&fo);
if (greedy || fixed_temperature == 0) {
assert(!"greedy not supported");
}
else if (fixedTemp){
assert(!"fixed temp not supported");
}
sampler.SetIterations(iterations);
sampler.SetBurnIn(burning_its);
timer.check("Running decoder");
vector<TranslationHypothesis> translations;
translations.push_back(TranslationHypothesis(line));
timer.check("Running sampler");
sampler.Run(translations,features, raoBlackwell);
timer.check("Outputting results");
if (expected_sbleu || expected_sbleu_da) {
FVector gradient;
FValue exp_trans_len = 0;
FValue unreg_exp_gain = 0;
const float exp_gain = elCollector->UpdateGradient(&gradient, &exp_trans_len, &unreg_exp_gain);
(*out) << '(' << lineno << ") Expected sentence BLEU: " << exp_gain
<< " \tExpected length: " << exp_trans_len << endl;
if (trainer)
trainer->IncorporateGradient(
exp_trans_len,
gain->GetAverageReferenceLength(lineno),
exp_gain,
unreg_exp_gain,
gradient);
}
if (output_expected_sbleu) {
(*out) << "ESBLEU: " << lineno << " " << elCollector->getExpectedGain() << endl;
(*out) << "EFVs: " << lineno;
FVector scores = elCollector->getFeatureExpectations();
(*out) << scores << endl;
}
if (derivationCollector.get()) {
cerr << "DerivEntropy " << derivationCollector->getEntropy() << endl;
vector<pair<const Derivation*, float> > nbest;
derivationCollector->getNbest(nbest,max(topn,1u));
for (size_t i = 0; i < topn && i < nbest.size() ; ++i) {
//const Derivation d = *(nbest[i].first);
cerr << "NBEST: " << lineno << " ";
derivationCollector->outputDerivationProbability(nbest[i],derivationCollector->N(),cerr);
cerr << endl;
}
if (mapdecode) {
pair<const Derivation*, float> map_soln = derivationCollector->getMAP();
vector<string> sentence;
map_soln.first->getTargetSentence(sentence);
VERBOSE(1, "MAP Soln, model score [" << map_soln.second << "]" << endl)
copy(sentence.begin(),sentence.end(),ostream_iterator<string>(*out," "));
(*out) << endl << flush;
}
if (decode) {
pair<const Derivation*, float> max = derivationCollector->getMax();
vector<string> sentence;
max.first->getTargetSentence(sentence);
VERBOSE(1, "sample Soln, model score [" << max.first->getScore() << "]" << endl)
copy(sentence.begin(),sentence.end(),ostream_iterator<string>(*out," "));
(*out) << endl << flush;
}
if (collect_dbyt) {
derivationCollector->outputDerivationsByTranslation(std::cerr);
}
if (derivation_distro) {
std::cout << "BEGIN: derivation probability distribution" << std::endl;
derivationCollector->printDistribution(std::cout);
std::cout << "END: derivation probability distribution" << std::endl;
}
}
if (translate) {
cerr << "TransEntropy " << transCollector->getEntropy() << endl;
pair<const Translation*,float> maxtrans = transCollector->getMax();
(*out) << *maxtrans.first;
(*out) << endl << flush;
if (translation_distro) {
std::cout << "BEGIN: translation probability distribution" << std::endl;
transCollector->printDistribution(std::cout);
std::cout << "END: translation probability distribution" << std::endl;
}
}
if (mbr_decoding) {
pair<const Translation*,float> maxtrans;
// use samples as hyp set
maxtrans = transCollector->getMbr(mbr_size, topNsize);
(*out) << *maxtrans.first;
(*out) << endl << flush;
}
++lineno;
}
#ifdef MPI_ENABLED
MPI_Finalize();
#endif
(*out) << flush;
if (!outputfile.empty())
delete out;
return 0;
}

View File

@ -0,0 +1,476 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "LanguageModelFeature.h"
#include <vector>
#include "Gibbler.h"
using namespace Moses;
using namespace std;
namespace Josiah {
LanguageModelFeature::LanguageModelFeature(const Moses::LanguageModel* lmodel) :
m_lmodel(lmodel) {}
FeatureFunctionHandle LanguageModelFeature::getFunction(const Sample& sample) const {
return FeatureFunctionHandle(new LanguageModelFeatureFunction(sample, m_lmodel));
}
LanguageModelFeatureFunction::LanguageModelFeatureFunction
(const Sample& sample, const LanguageModel* lmodel):
SingleValuedFeatureFunction(sample,lmodel->GetScoreProducerDescription()),
m_lmodel(lmodel) {}
/** Compute total score for sentence */
FValue LanguageModelFeatureFunction::computeScore() {
FValue score = 0;
size_t order = m_lmodel->GetNGramOrder();
vector<const Word*> lmcontext;
const vector<Word>& target = getSample().GetTargetWords();
lmcontext.reserve(target.size() + 2*(order-1));
for (size_t i = 0; i < order-1; ++i) {
lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceStartArray()));
}
for (size_t i = 0; i < target.size(); ++i) {
lmcontext.push_back(&(target[i]));
}
lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceEndArray()));
vector<const Word*> ngram(order);
for (size_t ngramstart = 0; ngramstart < lmcontext.size() - (order -1); ++ngramstart) {
size_t ngramCtr = 0;
for (size_t j = ngramstart; j < ngramstart+order; ++j) {
ngram[ngramCtr++] = lmcontext[j];
}
score += GetValue(ngram);
}
return score;
}
/** Score due to one segment */
FValue LanguageModelFeatureFunction::getSingleUpdateScore(const Moses::TranslationOption* option, const TargetGap& gap) {
size_t order = m_lmodel->GetNGramOrder();
const TargetPhrase& targetPhrase = option->GetTargetPhrase();
vector<const Word*> lmcontext;
lmcontext.reserve(targetPhrase.GetSize() + 2*(order-1));
int start = gap.segment.GetStartPos() - (order-1);
//fill in the pre-context
for (size_t i = 0; i < order-1; ++i) {
if (start+(int)i < 0) {
lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceStartArray()));
} else {
lmcontext.push_back(&(getSample().GetTargetWords()[i+start]));
}
}
size_t startOption = lmcontext.size();
//fill in the target phrase
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
lmcontext.push_back(&(targetPhrase.GetWord(i)));
}
size_t endOption = lmcontext.size();
//fill in the postcontext
for (size_t i = 0; i < order-1; ++i) {
size_t targetPos = i + gap.segment.GetEndPos() + 1;
if (targetPos >= getSample().GetTargetWords().size()) {
if (targetPos == getSample().GetTargetWords().size()) {
lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceEndArray()));
}
} else {
lmcontext.push_back(&(getSample().GetTargetWords()[targetPos]));
}
}
//debug
IFVERBOSE(3) {
VERBOSE(3,"Segment: " << gap.segment << " phrase: " << option->GetTargetPhrase() << endl);
VERBOSE(3,"LM context ");
for (size_t j = 0; j < lmcontext.size(); ++j) {
VERBOSE(3,*(lmcontext[j]) << " ");
}
VERBOSE(3,endl);
}
//score lm
FValue lmscore = 0;
vector<const Word*> ngram(order);
bool useOptionCachedLMScore = false;
size_t ngramCtr;
for (size_t ngramstart = 0; ngramstart < lmcontext.size() - (order -1); ++ngramstart) {
if (ngramstart >= startOption && ngramstart + order - 1 < endOption) {
useOptionCachedLMScore = true;
}
else {
ngramCtr = 0;
for (size_t j = ngramstart; j < ngramstart+order; ++j) {
ngram[ngramCtr++] = lmcontext[j];
}
lmscore += GetValue(ngram);
}
}
if (useOptionCachedLMScore) {
const ScoreComponentCollection& sc = option->GetScoreBreakdown();
lmscore += sc.GetScoreForProducer(m_lmodel);
}
VERBOSE(2,"Language model score: " << lmscore << endl);
return lmscore;
}
/** Score due to two segments. The left and right refer to the target positions.**/
FValue LanguageModelFeatureFunction::getContiguousPairedUpdateScore(const TranslationOption* leftOption,
const TranslationOption* rightOption, const TargetGap& gap) {
//Create the whole segment
const WordsRange& targetSegment = gap.segment;
//create the phrase
size_t lsize = leftOption->GetTargetPhrase().GetSize();
size_t rsize = rightOption->GetTargetPhrase().GetSize();
vector<const Word*> targetPhrase(lsize+rsize);
size_t i = 0;
for (size_t j = 0; j < lsize; ++j, ++i) {
targetPhrase[i] = &(leftOption->GetTargetPhrase().GetWord(j));
}
for (size_t j = 0; j < rsize; ++j, ++i) {
targetPhrase[i] = &(rightOption->GetTargetPhrase().GetWord(j));
}
//set the indices for start and end positions
size_t leftStartPos(0);
size_t leftEndPos(leftOption->GetTargetPhrase().GetSize());
size_t rightStartPos(leftEndPos);
size_t rightEndPos(targetPhrase.size());
size_t order = m_lmodel->GetNGramOrder();
vector<const Word*> lmcontext;
lmcontext.reserve(targetPhrase.size() + 2*(order-1));
int start = targetSegment.GetStartPos() - (order-1);
//fill in the pre-context
for (size_t i = 0; i < order-1; ++i) {
if (start+(int)i < 0) {
lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceStartArray()));
} else {
lmcontext.push_back(&(getSample().GetTargetWords()[i+start]));
}
}
//Offset the indices by pre-context size
leftStartPos += lmcontext.size();
leftEndPos += lmcontext.size();
rightStartPos += lmcontext.size();
rightEndPos += lmcontext.size();
//fill in the target phrase
for (size_t i = 0; i < targetPhrase.size(); ++i) {
lmcontext.push_back(targetPhrase[i]);
}
//fill in the postcontext
for (size_t i = 0; i < order-1; ++i) {
size_t targetPos = i + targetSegment.GetEndPos() + 1;
if (targetPos >= getSample().GetTargetWords().size()) {
if (targetPos == getSample().GetTargetWords().size()) {
lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceEndArray()));
}
} else {
lmcontext.push_back(&(getSample().GetTargetWords()[targetPos]));
}
}
//debug
IFVERBOSE(3) {
VERBOSE(3,"Segment: " << targetSegment << /*" phrase: " << targetPhrase << */endl);
VERBOSE(3,"LM context ");
for (size_t j = 0; j < lmcontext.size(); ++j) {
VERBOSE(3,*(lmcontext[j]) << " ");
}
VERBOSE(3,endl);
}
//score lm
FValue lmscore = 0;
vector<const Word*> ngram(order);
bool useLeftOptionCacheLM(false), useRightOptionCacheLM(false) ;
size_t ngramCtr;
for (size_t ngramstart = 0; ngramstart < lmcontext.size() - (order -1); ++ngramstart) {
if (ngramstart >= leftStartPos && ngramstart + order - 1 < leftEndPos) {
useLeftOptionCacheLM = true;
}
else if (ngramstart >= rightStartPos && ngramstart + order - 1 < rightEndPos) {
useRightOptionCacheLM = true;
}
else {
ngramCtr = 0;
for (size_t j = ngramstart; j < ngramstart+order; ++j) {
ngram[ngramCtr++] = lmcontext[j];
}
lmscore += GetValue(ngram);
}
}
if (useLeftOptionCacheLM) {
const ScoreComponentCollection & sc = leftOption->GetScoreBreakdown();
lmscore += sc.GetScoreForProducer(m_lmodel);
}
if (useRightOptionCacheLM) {
const ScoreComponentCollection & sc = rightOption->GetScoreBreakdown();
lmscore += sc.GetScoreForProducer(m_lmodel);
}
VERBOSE(2,"Language model score: " << lmscore << endl);
return lmscore;
}
FValue LanguageModelFeatureFunction::getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap)
{
const Phrase& leftTgtPhrase = leftOption->GetTargetPhrase();
const Phrase& rightTgtPhrase = rightOption->GetTargetPhrase();
size_t order = m_lmodel->GetNGramOrder();
vector<const Word*> lmcontext;
lmcontext.reserve(max(leftTgtPhrase.GetSize(), rightTgtPhrase.GetSize()) + 2*(order-1));
int start = leftGap.segment.GetStartPos() - (order-1);
//fill in the pre-context
for (size_t i = 0; i < order-1; ++i) {
if (start+(int)i < 0) {
lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceStartArray()));
} else {
lmcontext.push_back(&(getSample().GetTargetWords()[i+start]));
}
}
size_t leftStartPos(lmcontext.size()); // to track option's cached LM Score
//fill in the target phrase
for (size_t i = 0; i < leftTgtPhrase.GetSize(); ++i) {
lmcontext.push_back(&(leftTgtPhrase.GetWord(i)));
}
// to track option's cached LM Score
size_t leftEndPos(lmcontext.size());
size_t rightStartPos(0), rightEndPos(0);
//fill in the postcontext needed for leftmost phrase
//First get words from phrases in between, then from right phrase, then words past right phrase, then end of sentence
size_t gapSize = rightGap.segment.GetStartPos() - leftGap.segment.GetEndPos() - 1;
size_t leftSegmentEndPos = leftGap.segment.GetEndPos();
for (size_t i = 0; i < order - 1; i++) {
int rightOffset = i - gapSize;
if (rightOffset < 0) {
lmcontext.push_back(&(getSample().GetTargetWords()[leftSegmentEndPos + i + 1]));
}
else if (rightOffset < (int)rightTgtPhrase.GetSize() ) {
if (rightOffset == 0) {
rightStartPos = lmcontext.size();
}
lmcontext.push_back(&(rightTgtPhrase.GetWord(rightOffset)));
rightEndPos = lmcontext.size();
}
else if (rightOffset - rightTgtPhrase.GetSize() + rightGap.segment.GetEndPos() + 1 < getSample().GetTargetWords().size() ) {
lmcontext.push_back(&(getSample().GetTargetWords()[(rightOffset - rightTgtPhrase.GetSize() + rightGap.segment.GetEndPos() + 1)]));
}
else {
lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceEndArray()));
break;
}
}
VERBOSE(3,"Left LM Context : ");
for (size_t i = 0; i < lmcontext.size(); i++) {
VERBOSE(3,*lmcontext[i] << " ");
}
VERBOSE(3, endl);
//score lm
FValue lmscore = 0;
vector<const Word*> ngram(order);
size_t ngramCtr;
bool useLeftOptionCacheLM(false), useRightOptionCacheLM(false) ;
for (size_t ngramstart = 0; ngramstart < lmcontext.size() - (order -1); ++ngramstart) {
if (ngramstart >= leftStartPos && ngramstart + order - 1 < leftEndPos) {
useLeftOptionCacheLM = true;
VERBOSE(3, "In flip, Left LM Context, Using cached option LM score for left Option: " << leftOption->GetTargetPhrase() << endl;)
}
else if (ngramstart >= rightStartPos && ngramstart + order - 1 < rightEndPos) {
useRightOptionCacheLM = true;
VERBOSE(3, "In flip, Left LM Context, Using cached option LM score for right Option: " << rightOption->GetTargetPhrase() << endl;)
}
else {
ngramCtr =0;
for (size_t j = ngramstart; j < ngramstart+order; ++j) {
ngram[ngramCtr++] =lmcontext[j];
}
lmscore += GetValue(ngram);
}
}
if (useLeftOptionCacheLM) {
const ScoreComponentCollection & sc = leftOption->GetScoreBreakdown();
lmscore += sc.GetScoreForProducer(m_lmodel);
}
VERBOSE(2,"Left option Language model score: " << lmscore << endl);
//Now for the right target phrase
lmcontext.clear();
//Reset the indices
leftStartPos = 0;
leftEndPos = 0;
rightStartPos = 0;
rightEndPos = 0;
//Fill in the pre-context
size_t i = 0;
if (order <= gapSize) { //no risk of ngram overlaps with left phrase post context
i = order -1;
}
else {//how far back can we go
i = gapSize;
}
size_t leftOffset = gapSize + leftTgtPhrase.GetSize();
for ( ; i > 0 ; --i) {
if (i > leftOffset + leftGap.segment.GetStartPos()) {
lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceStartArray()));
}
else if (i > leftOffset) {
lmcontext.push_back(&(getSample().GetTargetWords()[leftOffset - i + leftGap.segment.GetStartPos() ]));
}
else if ( i > gapSize) {
if (i - gapSize == 1){
leftStartPos = lmcontext.size();
}
lmcontext.push_back(&(leftTgtPhrase.GetWord(leftOffset - i)));
leftEndPos = lmcontext.size();
}
else {
lmcontext.push_back(&(getSample().GetTargetWords()[leftGap.segment.GetEndPos() + gapSize - i + 1 ]));
}
}
//Fill in right target phrase
rightStartPos = lmcontext.size();
//fill in the target phrase
for (size_t i = 0; i < rightTgtPhrase.GetSize(); ++i) {
lmcontext.push_back(&(rightTgtPhrase.GetWord(i)));
}
rightEndPos = lmcontext.size();
//Fill in post context
for (size_t i = 0; i < order-1; ++i) {
if ( i + rightGap.segment.GetEndPos() + 1 < getSample().GetTargetWords().size() ) {
lmcontext.push_back(&(getSample().GetTargetWords()[i + rightGap.segment.GetEndPos() + 1]));
}
else {
lmcontext.push_back(&(m_lmodel->GetImplementation()->GetSentenceEndArray()));
break;
}
}
VERBOSE(3,"Right LM Context : ");
for (size_t i = 0; i < lmcontext.size(); i++) {
VERBOSE(3,*lmcontext[i] << " ");
}
VERBOSE(3, endl);
useRightOptionCacheLM = false;
if ((int) lmcontext.size() - (int) (order -1) < 0 ) {//The left LM context completely subsumes the right LM Context, we're done
VERBOSE(2,"Language model score: " << lmscore << endl);
return lmscore;
}
size_t maxNgram = lmcontext.size() - (order -1);
for (size_t ngramstart = 0; ngramstart < maxNgram; ++ngramstart) {
if (ngramstart >= leftStartPos && ngramstart + order - 1 < leftEndPos) {
useLeftOptionCacheLM = true;
VERBOSE(3, "In flip, Right LM Context, Using cached option LM score for left Option: " << leftOption->GetTargetPhrase() << endl;)
}
if (ngramstart >= rightStartPos && ngramstart + order - 1 < rightEndPos) {
useRightOptionCacheLM = true;
VERBOSE(3, "In flip, Right LM Context, Using cached option LM score for right Option: " << rightOption->GetTargetPhrase() << endl;)
}
else {
ngramCtr = 0;
for (size_t j = ngramstart; j < ngramstart+order; ++j) {
ngram[ngramCtr++] = lmcontext[j];
}
lmscore += GetValue(ngram);
}
}
if (useRightOptionCacheLM) {
const ScoreComponentCollection & sc = rightOption->GetScoreBreakdown();
lmscore += sc.GetScoreForProducer(m_lmodel);
}
VERBOSE(2,"Language model score: " << lmscore << endl);
return lmscore;
}
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
FValue LanguageModelFeatureFunction::getFlipUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap)
{
bool contiguous = (leftGap.segment.GetEndPos() + 1 == rightGap.segment.GetStartPos()) ;
if (contiguous) {
WordsRange segment(leftGap.segment.GetStartPos(), rightGap.segment.GetEndPos());
TargetGap gap(leftGap.leftHypo, rightGap.rightHypo, segment);
return getContiguousPairedUpdateScore(leftOption, rightOption, gap);
} else {
return getDiscontiguousPairedUpdateScore(leftOption, rightOption, leftGap, rightGap);
}
}
float LanguageModelFeatureFunction::GetValue(const std::vector<const Word*>& context) {
auto_ptr<FFState> state(m_lmodel->GetImplementation()->NewState());
return m_lmodel->GetImplementation()->GetValueForgotState(context,*state.get());
}
}

View File

@ -0,0 +1,67 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include "LanguageModel.h"
#include "FeatureFunction.h"
namespace Josiah {
class LanguageModelFeature : public Feature {
public:
LanguageModelFeature(const Moses::LanguageModel* lmodel);
virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
private:
const Moses::LanguageModel* m_lmodel;
};
class LanguageModelFeatureFunction : public SingleValuedFeatureFunction {
public:
LanguageModelFeatureFunction(const Sample& sample, const Moses::LanguageModel* lmodel);
virtual FValue computeScore();
/** Score due to one segment */
virtual FValue getSingleUpdateScore(const Moses::TranslationOption* option, const TargetGap& gap);
/** Score due to two segments. The left and right refer to the target positions.**/
virtual FValue getContiguousPairedUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption,
const TargetGap& gap);
virtual FValue getDiscontiguousPairedUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap);
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
virtual FValue getFlipUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap);
private:
float GetValue(const std::vector<const Word*>& context);
const Moses::LanguageModel* m_lmodel;
};
}

View File

@ -0,0 +1,164 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <sstream>
#include <string>
#include "LexicalReorderingFeature.h"
#include "ScoreComponentCollection.h"
using namespace Moses;
using namespace std;
namespace Josiah {
LexicalReorderingFeature::LexicalReorderingFeature
(Moses::LexicalReordering* lexReorder,size_t index) :
m_mosesLexReorder(lexReorder),
m_index(index) {
size_t featureCount = m_mosesLexReorder->GetNumScoreComponents();
const string& root = "LexicalReordering";
for (size_t i = 1; i <= featureCount; ++i) {
ostringstream namestream;
if (index > 0) {
namestream << index << "-";
}
namestream << i;
m_featureNames.push_back(FName(root,namestream.str()));
}
}
FeatureFunctionHandle LexicalReorderingFeature::getFunction(const Sample& sample) const {
return FeatureFunctionHandle(
new LexicalReorderingFeatureFunction(sample,m_featureNames,m_mosesLexReorder));
}
LexicalReorderingFeatureFunction::LexicalReorderingFeatureFunction
(const Sample& sample, std::vector<FName> featureNames,
LexicalReordering* lexReorder):
FeatureFunction(sample),
m_featureNames(featureNames),
m_mosesLexReorder(lexReorder) {
}
/** Assign the total score of this feature on the current hypo */
void LexicalReorderingFeatureFunction::assignScore(FVector& scores) {
/*
const Hypothesis * currHypo = getSample().GetTargetTail();
const FFState* state = m_mosesLexReorder->EmptyHypothesisState(currHypo->GetInput());
ScoreComponentCollection accumulator;
cerr << *currHypo << endl;
while ((currHypo = (currHypo->GetNextHypo()))) {
state = m_mosesLexReorder->Evaluate(*currHypo,state,&accumulator);
cerr << "AS: " << accumulator << endl;
}*/
vector<float> mosesScores = m_accumulator.GetScoresForProducer(m_mosesLexReorder);
for (size_t i = 0; i < m_featureNames.size(); ++i) {
scores[m_featureNames[i]] = mosesScores[i];
}
}
/** Update the previous state map.*/
void LexicalReorderingFeatureFunction::updateTarget() {
m_prevStates.clear();
m_accumulator.ZeroAll();
const Hypothesis * currHypo = getSample().GetTargetTail();
LRStateHandle prevState(dynamic_cast<const LexicalReorderingState*>(m_mosesLexReorder->EmptyHypothesisState(currHypo->GetInput())));
while ((currHypo = (currHypo->GetNextHypo()))) {
LRStateHandle currState(dynamic_cast<const LexicalReorderingState*>(m_mosesLexReorder->Evaluate(*currHypo,prevState.get(),&m_accumulator)));
for (size_t i = 0; i < currHypo->GetCurrTargetWordsRange().GetNumWordsCovered(); ++i) {
m_prevStates.push_back(prevState);
}
prevState = currState;
}
}
void LexicalReorderingFeatureFunction::addScore
(vector<float>& accumulator, FVector& scores) {
for (size_t i = 0; i < accumulator.size(); ++i) {
scores[m_featureNames[i]] += accumulator[i];
accumulator[i] = 0;
}
}
/** Score due to one segment */
void LexicalReorderingFeatureFunction::doSingleUpdate
(const TranslationOption* option, const TargetGap& gap, FVector& scores) {
vector<float> accumulator(m_mosesLexReorder->GetNumScoreComponents(),0);
//The previous state of the (new) current hypo.
LRStateHandle prevState = m_prevStates[gap.segment.GetStartPos()];
//Evaluate the score of inserting this hypo, and get the prev state
//for the next hypo.
prevState.reset(prevState->Expand(*option,accumulator));
addScore(accumulator,scores);
//if there's a hypo on the right, then evaluate it.
if (gap.rightHypo) {
prevState.reset(prevState->Expand(gap.rightHypo->GetTranslationOption(),accumulator));
addScore(accumulator,scores);
}
}
/* Score due to two segments. The left and right refer to the target positions.**/
void LexicalReorderingFeatureFunction::doContiguousPairedUpdate
(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores) {
vector<float> accumulator(m_mosesLexReorder->GetNumScoreComponents(),0);
//The previous state of the (new) current hypo.
LRStateHandle prevState(m_prevStates[gap.segment.GetStartPos()]);
//Evaluate the hypos in the gap
prevState.reset(prevState->Expand(*leftOption,accumulator));
addScore(accumulator,scores);
prevState.reset(prevState->Expand(*rightOption,accumulator));
addScore(accumulator,scores);
//if there's a hypo on the right, then evaluate it.
if (gap.rightHypo) {
prevState.reset(prevState->Expand(gap.rightHypo->GetTranslationOption(),accumulator));
addScore(accumulator,scores);
}
}
void LexicalReorderingFeatureFunction::doDiscontiguousPairedUpdate
(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
doSingleUpdate(leftOption,leftGap, scores);
doSingleUpdate(rightOption,rightGap, scores);
}
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
void LexicalReorderingFeatureFunction::doFlipUpdate(
const TranslationOption* leftOption,
const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
if (leftGap.segment.GetEndPos() + 1 == rightGap.segment.GetStartPos()) {
TargetGap gap(leftGap.leftHypo,rightGap.rightHypo,
WordsRange(leftGap.segment.GetStartPos(),rightGap.segment.GetEndPos()));
doContiguousPairedUpdate(leftOption,rightOption,gap,scores);
} else {
doDiscontiguousPairedUpdate(leftOption,rightOption,leftGap,rightGap,scores);
}
}
}

View File

@ -0,0 +1,87 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <map>
#include <boost/shared_ptr.hpp>
#include "FeatureFunction.h"
#include "Gibbler.h"
#include "LexicalReordering.h"
namespace Josiah {
typedef boost::shared_ptr<const Moses::LexicalReorderingState> LRStateHandle;
/** Wraps Moses lexical reordering */
class LexicalReorderingFeature : public Feature {
public:
LexicalReorderingFeature(Moses::LexicalReordering* lexReorder,size_t index);
virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
private:
Moses::LexicalReordering* m_mosesLexReorder;
size_t m_index;
std::vector<FName> m_featureNames;
size_t m_beginIndex;
};
class LexicalReorderingFeatureFunction : public FeatureFunction {
public:
LexicalReorderingFeatureFunction
(const Sample&, std::vector<FName> featureNames,
Moses::LexicalReordering* lexReorder);
/** Assign the total score of this feature on the current hypo */
virtual void assignScore(FVector& scores);
virtual void updateTarget();
/** Score due to one segment */
virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores);
/** Score due to two segments. The left and right refer to the target positions.**/
virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores);
virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
private:
void addScore(std::vector<float>& accumulator, FVector& scores);
std::vector<FName> m_featureNames;
Moses::LexicalReordering* m_mosesLexReorder;
//typedef std::map<Moses::WordsRange, const Moses::Hypothesis*> CurrentHypos_t;
//typedef std::map<Moses::WordsRange, const Moses::FFState*> PreviousStates_t;
//CurrentHypos_t m_currentHypos;
//PreviousStates_t m_previousStates;
//maps the word index to the previous state involved in score calculation
std::vector<LRStateHandle> m_prevStates;
ScoreComponentCollection m_accumulator;
};
}

78
josiah/Makefile.am Normal file
View File

@ -0,0 +1,78 @@
lib_LIBRARIES = libjosiah.a
bin_PROGRAMS = josiah samplerank unittest truncate
libjosiah_a_SOURCES = \
AnnealingSchedule.cpp \
Bleu.cpp \
Derivation.cpp \
Decoder.cpp \
Dependency.cpp \
DiscriminativeLMFeature.cpp \
DistortionPenaltyFeature.cpp \
Gain.cpp \
Gibbler.cpp \
GibblerMaxDerivDecoder.cpp \
GibblerMaxTransDecoder.cpp \
GibbsOperator.cpp \
InputSource.cpp \
LanguageModelFeature.cpp \
LexicalReorderingFeature.cpp \
MetaFeature.cpp \
MpiDebug.cpp \
Optimizer.cpp \
ParenthesisFeature.cpp \
PhraseBoundaryFeature.cpp \
PhraseFeature.cpp \
PhrasePairFeature.cpp \
PosProjectionFeature.cpp \
Pos.cpp \
ReorderingFeature.cpp \
SampleCollector.cpp \
Sampler.cpp \
Selector.cpp \
SourceToTargetRatio.cpp \
StatelessFeature.cpp \
TrainingSource.cpp \
TranslationDelta.cpp \
Utils.cpp \
WeightManager.cpp \
WordPenaltyFeature.cpp
josiah_SOURCES = \
GibblerAnnealedExpectedLossTrainer.cpp \
GibblerExpectedLossTraining.cpp \
Josiah.cpp
samplerank_SOURCES = \
SampleRank.cpp \
SampleRankSelector.cpp \
OnlineLearner.cpp \
OnlineTrainingCorpus.cpp
unittest_SOURCES = \
OnlineTrainingCorpus.cpp \
Test.cpp \
TestBleu.cpp \
TestOnlineTrainingCorpus.cpp
truncate_SOURCES = \
Truncate.cpp
AM_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS)
josiah_LDADD = -L$(top_srcdir)/josiah -ljosiah $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt @KENLM_LDFLAGS@ $(BOOST_LDFLAGS) $(BOOST_SERIALIZATION_LIBS) $(BOOST_PROGRAM_OPTIONS_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIBS) $(BOOST_MPI_LIBS)
josiah_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.la $(top_srcdir)/josiah/libjosiah.a
samplerank_LDADD = -L$(top_srcdir)/josiah -ljosiah $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt @KENLM_LDFLAGS@ $(BOOST_LDFLAGS) $(BOOST_SERIALIZATION_LIBS) $(BOOST_PROGRAM_OPTIONS_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIBS) $(BOOST_MPI_LIBS)
samplerank_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.la $(top_srcdir)/josiah/libjosiah.a
unittest_CPPFLAGS = $(AM_CPPFLAGS) -DBOOST_TEST_DYN_LINK
unittest_LDADD = -L$(top_srcdir)/josiah -ljosiah $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt @KENLM_LDFLAGS@ $(BOOST_LDFLAGS) $(BOOST_SERIALIZATION_LIBS) $(BOOST_PROGRAM_OPTIONS_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIBS) $(BOOST_MPI_LIBS) -lboost_unit_test_framework
unittest_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.la $(top_srcdir)/josiah/libjosiah.a
truncate_LDADD = -L$(top_srcdir)/josiah -ljosiah $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt @KENLM_LDFLAGS@ $(BOOST_LDFLAGS) $(BOOST_SERIALIZATION_LIBS) $(BOOST_PROGRAM_OPTIONS_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIBS)
truncate_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.la $(top_srcdir)/josiah/libjosiah.a

109
josiah/MetaFeature.cpp Normal file
View File

@ -0,0 +1,109 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "MetaFeature.h"
using namespace std;
namespace Josiah {
MetaFeature::MetaFeature(const FVector& weights, const FeatureVector& features) :
m_weights(weights),
m_features(features){}
FeatureFunctionHandle MetaFeature::getFunction(const Sample& sample) const {
return FeatureFunctionHandle(new MetaFeatureFunction(sample,*this));
}
FeatureFunctionVector MetaFeature::getFeatureFunctions(const Sample& sample) const {
FeatureFunctionVector ffv;
for (FeatureVector::const_iterator i = m_features.begin(); i != m_features.end(); ++i) {
ffv.push_back((*i)->getFunction(sample));
}
return ffv;
}
const FVector& MetaFeature::getWeights() const {
return m_weights;
}
MetaFeatureFunction::MetaFeatureFunction(const Sample& sample, const MetaFeature& parent)
: SingleValuedFeatureFunction(sample,"core"),
m_parent(parent),
m_featureFunctions(parent.getFeatureFunctions(sample))
{}
FValue MetaFeatureFunction::computeScore() {
FVector scores;
for (FeatureFunctionVector::const_iterator i = m_featureFunctions.begin(); i != m_featureFunctions.end(); ++i) {
(*i)->assignScore(scores);
}
return scores.inner_product(m_parent.getWeights());
}
/** Score due to one segment */
FValue MetaFeatureFunction::getSingleUpdateScore(const Moses::TranslationOption* option, const TargetGap& gap) {
FVector scores;
for (FeatureFunctionVector::const_iterator i = m_featureFunctions.begin(); i != m_featureFunctions.end(); ++i) {
(*i)->doSingleUpdate(option,gap,scores);
}
return scores.inner_product(m_parent.getWeights());
}
/** Score due to two segments. The left and right refer to the target positions.**/
FValue MetaFeatureFunction::getContiguousPairedUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption,
const TargetGap& gap)
{
FVector scores;
for (FeatureFunctionVector::const_iterator i = m_featureFunctions.begin(); i != m_featureFunctions.end(); ++i) {
(*i)->doContiguousPairedUpdate(leftOption,rightOption,gap,scores);
}
return scores.inner_product(m_parent.getWeights());
}
FValue MetaFeatureFunction::getDiscontiguousPairedUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap)
{
FVector scores;
for (FeatureFunctionVector::const_iterator i = m_featureFunctions.begin(); i != m_featureFunctions.end(); ++i) {
(*i)->doDiscontiguousPairedUpdate(leftOption,rightOption,leftGap,rightGap,scores);
}
return scores.inner_product(m_parent.getWeights());
}
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
FValue MetaFeatureFunction::getFlipUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap)
{
FVector scores;
for (FeatureFunctionVector::const_iterator i = m_featureFunctions.begin(); i != m_featureFunctions.end(); ++i) {
(*i)->doFlipUpdate(leftOption,rightOption,leftGap,rightGap,scores);
}
return scores.inner_product(m_parent.getWeights());
}
}

73
josiah/MetaFeature.h Normal file
View File

@ -0,0 +1,73 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include "FeatureFunction.h"
namespace Josiah {
/**
* A feature which is consists of a collection of other features.
**/
class MetaFeature : public Feature {
public:
MetaFeature(const FVector& weights, const FeatureVector& features);
virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
const std::string& getName() const;
FeatureFunctionVector getFeatureFunctions(const Sample& sample) const;
const FVector& getWeights() const;
private:
FVector m_weights;
FeatureVector m_features;
};
class MetaFeatureFunction : public SingleValuedFeatureFunction {
public:
MetaFeatureFunction(const Sample& sample, const MetaFeature& parent);
virtual FValue computeScore();
/** Score due to one segment */
virtual FValue getSingleUpdateScore(const Moses::TranslationOption* option, const TargetGap& gap);
/** Score due to two segments. The left and right refer to the target positions.**/
virtual FValue getContiguousPairedUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption,
const TargetGap& gap);
virtual FValue getDiscontiguousPairedUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap);
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
virtual FValue getFlipUpdateScore(const Moses::TranslationOption* leftOption,const Moses::TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap);
private:
const MetaFeature& m_parent;
FeatureFunctionVector m_featureFunctions;
};
}

41
josiah/MpiDebug.cpp Normal file
View File

@ -0,0 +1,41 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "MpiDebug.h"
using namespace std;
namespace Josiah {
int MpiDebug::verbosity = 0;
ofstream MpiDebug::out;
void MpiDebug::init(const string& fstem, int rank) {
ostringstream fname;
fname << fstem;
fname << ".";
fname << rank;
out.open(fname.str().c_str());
if (!out.good()) {
std::cerr << "Warn: unable to open mpi debug file" << fname.str() << endl;
}
}
}

41
josiah/MpiDebug.h Normal file
View File

@ -0,0 +1,41 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <fstream>
#include <iostream>
#include <sstream>
namespace Josiah {
struct MpiDebug {
static int verbosity;
static std::ofstream out;
static void init(const std::string& fstem, int rank);
};
#ifdef MPI_ENABLED
#define MPI_VERBOSE(level,str) {if (Josiah::MpiDebug::verbosity >= level) { Josiah::MpiDebug::out << str;} }
#else
#define MPI_VERBOSE(level,str)
#endif
}

86
josiah/Ngram.cpp Normal file
View File

@ -0,0 +1,86 @@
// $Id: $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "Ngram.h"
using namespace Josiah;
using namespace std;
Josiah::NgramCollector::NgramCollector(size_t order) : m_order(order) {
FactorCollection &factorCollection = FactorCollection::Instance();
m_start = factorCollection.AddFactor(Output, 0, BOS_);
m_end = factorCollection.AddFactor(Output, 1, BOS_);
}
string Josiah::NgramCollector::ToString(const vector<const Factor*>& ws) const {
ostringstream os;
for (vector<const Factor*>::const_iterator i = ws.begin(); i != ws.end(); ++i)
os << (*i)->GetString() << " ";
return os.str();
}
void Josiah::NgramCollector::collect(Sample& sample) {
const Hypothesis* h = sample.GetSampleHypothesis();
vector<const Factor*> trans;
h->GetTranslation(&trans, 0);
for (int ngramstart = -(m_order-1); ngramstart < (int)trans.size(); ++ngramstart) {
vector<const Factor*> ngram(m_order);
for (int i = ngramstart; i < ngramstart+(int)m_order; ++i) {
if (i < 0) {
ngram[i-ngramstart] = m_start;
} else if (i >= (int)trans.size()) {
ngram[i-ngramstart] = m_end;
} else {
ngram[i-ngramstart] = trans[i];
}
}
m_counts.insert(ngram);
m_ngrams.insert(ngram);
}
}
void Josiah::NgramCollector::dump( std::ostream & out ) const {
//maps n-1-gram prefixes to suffix-count map
map<vector<const Factor*>, multimap<size_t, const Factor*, greater<size_t> > >sortedCounts;
for (set<vector<const Factor*> >::const_iterator i = m_ngrams.begin(); i != m_ngrams.end(); ++i) {
vector<const Factor*> prefix(m_order-1);
copy(i->begin(),i->end()-1,prefix.begin());
const Factor* suffix = i->at(i->size()-1);
cerr << "ngram: " << ToString(*i) << " prefix: " << ToString(prefix) << " suffix: " << suffix->GetString() << endl;
sortedCounts[prefix].insert(pair<size_t,const Factor* >(m_counts.count(*i),suffix));
}
for (map<vector<const Factor*>, multimap<size_t, const Factor*, greater<size_t> > >::const_iterator i = sortedCounts.begin();
i != sortedCounts.end(); ++i) {
for (multimap<size_t, const Factor*, greater<size_t> >::const_iterator j = i->second.begin(); j != i->second.end(); ++j) {
out << j->first << " ";
out << ToString(i->first);
out << j->second->GetString() << endl;
}
}
}

459
josiah/OnlineLearner.cpp Normal file
View File

@ -0,0 +1,459 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#ifdef MPI_ENABLED
#include <boost/mpi/communicator.hpp>
#include <boost/mpi/collectives.hpp>
#endif
#include "OnlineLearner.h"
#include "Utils.h"
#ifdef MPI_ENABLED
namespace mpi = boost::mpi;
#endif
using namespace std;
namespace Josiah {
PerceptronLearner::PerceptronLearner() :
m_learningRate(1.0) {}
void PerceptronLearner::setLearningRate(float learningRate) {
m_learningRate = learningRate;
}
void PerceptronLearner::doUpdate(
const FVector& currentFV,
const FVector& targetFV,
const FVector&,
float currentGain,
float targetGain,
float,
FVector& weights)
{
//Do update if target gain is better than curr gain
if (targetGain > currentGain) {
weights -= m_learningRate * currentFV;
weights += m_learningRate * targetFV;
}
}
MiraLearner::MiraLearner() :
m_slack(0),
m_marginScale(1),
m_fixMargin(false),
m_margin(1),
m_useSlackRescaling(false) {}
void MiraLearner::setSlack(float slack) {
m_slack = slack;
}
void MiraLearner::setMarginScale(float marginScale) {
m_marginScale = marginScale;
}
void MiraLearner::setFixMargin(bool fixMargin) {
m_fixMargin = fixMargin;
}
void MiraLearner::setMargin(float margin) {
m_margin = margin;
}
void MiraLearner::setUseSlackRescaling(bool useSlackRescaling) {
m_useSlackRescaling = useSlackRescaling;
}
void MiraLearner::setScaleLossByTargetGain(bool scaleLossByTargetGain) {
m_scaleLossByTargetGain = scaleLossByTargetGain;
}
void MiraLearner::doUpdate(
const FVector& currFV,
const FVector& targetFV,
const FVector& ,
float currGain,
float targetGain,
float ,
FVector& weights)
{
FValue currScore = currFV.inner_product(weights);
FValue targetScore = targetFV.inner_product(weights);
VERBOSE(1,"currGain: " << currGain << " targetGain " << targetGain << endl);
IFVERBOSE(1) {
cerr << "target deriv has (scaled) gain " << m_marginScale * targetGain << " , fv : " << targetFV << " [ " << targetScore << " ]" << endl;
cerr << "curr deriv has (scaled) gain " << m_marginScale * currGain << " , fv : " << currFV << " [ " << currScore << " ]" << endl;
}
float loss = m_marginScale * (targetGain - currGain);
if (m_scaleLossByTargetGain) {
loss = loss*targetGain;
}
float margin;
if (m_fixMargin || m_useSlackRescaling) {
margin = m_margin;
} else {
margin = loss;
}
//constraint is a.x >= b, where x is the change in weight vector
float b = margin - (targetScore - currScore);
if (b<= 0) {
VERBOSE(1, "MiraLearner: alpha = " << 0 << endl);
return;
}
FVector a = targetFV - currFV;
float norma =inner_product(a,a);
if (norma == 0) {
VERBOSE(1, "MiraLearner: alpha = " << 0 << endl);
return;
}
//Update is min(C , b / ||a||^2) a
//where C is slack
//See Crammer et al, passive-aggressive paper for soln of similar problem.
float alpha = b / norma;
float slack = m_slack;
if (m_useSlackRescaling) {
slack = slack * loss;
}
VERBOSE(1, "MiraLearner: b = " << b << " norma = " << norma <<
" unclipped alpha = " << alpha << endl);
if (slack && alpha > slack) {
alpha = slack;
}
VERBOSE(1, "MiraLearner: alpha = " << alpha << endl);
weights += alpha * a;
}
void MiraPlusLearner::doUpdate(
const FVector& currFV,
const FVector& targetFV,
const FVector& optimalFV,
float currGain,
float targetGain,
float optimalGain,
FVector& weights)
{
FValue currScore = currFV.inner_product(weights);
FValue targetScore = targetFV.inner_product(weights);
FValue optimalGainScore = optimalFV.inner_product(weights);
VERBOSE(1,"currGain: " << currGain << " targetGain " << targetGain << " optimalGain " << optimalGain << endl);
cerr << "currGain: " << currGain << " targetGain " << targetGain << " optimalGain " << optimalGain << endl;
cerr << "currScore: " << currScore << " targetScore " << targetScore << " optimalScore " << optimalGainScore << endl;
IFVERBOSE(1) {
cerr << "Optimal deriv has (scaled) gain " << m_marginScale * optimalGain << " , fv : " << optimalFV << " [ " << optimalGainScore << " ]" << endl;
cerr << "target deriv has (scaled) gain " << m_marginScale * targetGain << " , fv : " << targetFV << " [ " << targetScore << " ]" << endl;
cerr << "curr deriv has (scaled) gain " << m_marginScale * currGain << " , fv : " << currFV << " [ " << currScore << " ]" << endl;
}
vector<float> b;
vector<FVector> distance;
float tgtcurrmargin = m_marginScale * (targetGain - currGain);
float opttgtmargin = m_marginScale * (optimalGain - targetGain);
float optcurrmargin = m_marginScale * (optimalGain - currGain);
if (m_fixMargin) {
tgtcurrmargin = m_margin;
opttgtmargin = m_margin;
optcurrmargin = m_margin;
}
//Score of target - Score of curr >= 1
b.push_back(tgtcurrmargin - (targetScore - currScore));
distance.push_back(targetFV);
distance.back() -= currFV;
VERBOSE(2, cerr << "b[0] = " << b[0] << " distance[0] = " << distance[0] << endl);
if (optimalGain > targetGain) {
//Score of optimal - Score of Target > 1
b.push_back(opttgtmargin - (optimalGainScore - targetScore));
distance.push_back(optimalFV);
distance.back() -= targetFV;
VERBOSE(2, cerr << "b[1] = " << b[1] << " distance[1] = " << distance[1] << endl);
//Score of optimal - Score of curr > 1
b.push_back(optcurrmargin - (optimalGainScore - currScore));
distance.push_back(optimalFV);
distance.back() -= currFV;
VERBOSE(2, cerr << "b[2] = " << b[2] << " distance[2] = " << distance[2] << endl);
}
/*
cerr << "b " << b[0];
if (b.size() > 1) {
cerr << " " << b[1] << " " << b[2];
}
cerr << endl;
*/
vector<float> alpha;
if (m_slack == -1)
alpha = hildreth(distance,b);
else
alpha = hildreth(distance,b, m_slack);
FVector update;
for (size_t k = 0; k < alpha.size(); k++) {
IFVERBOSE(1) {
cerr << "alpha " << alpha[k] << endl;
cerr << "dist " << distance[k] << endl;
}
distance[k] *= alpha[k];
update += distance[k];
}
cerr << "alpha " << alpha[0];
if (alpha.size() > 1) {
cerr << " " << alpha[1] << " " << alpha[2];
}
cerr << endl;
cerr << update << endl;
weights += update;
IFVERBOSE(1) {
cerr << "Mira++ updated weights to " << weights << endl;
}
IFVERBOSE(1) {
//Sanity check
currScore = currFV.inner_product(weights);
targetScore = targetFV.inner_product(weights);
optimalGainScore = optimalFV.inner_product(weights);
cerr << "Updated Current Weights : " << weights << endl;
cerr << "Target score - curr score " << targetScore - currScore << endl;
cerr << "margin * (Target gain - curr gain) " << m_marginScale * (targetGain - currGain) << endl;
cerr << "Optimal score - target score " << optimalGainScore - targetScore << endl;
cerr << "margin * (Optimal gain - target gain) " << m_marginScale * (optimalGain - targetGain) << endl;
cerr << "Optimal score - curr score " << optimalGainScore - currScore << endl;
cerr << "margin * (Optimal gain - curr gain) " << m_marginScale * (optimalGain - currGain) << endl;
}
}
vector<FValue> hildreth (const vector<FVector>& a, const vector<FValue>& b) {
size_t i;
int max_iter = 10000;
float eps = 0.00000001;
float zero = 0.000000000001;
vector<FValue> alpha ( b.size() );
vector<FValue> F ( b.size() );
vector<FValue> kkt ( b.size() );
float max_kkt = -1e100;
size_t K = b.size();
float A[K][K];
bool is_computed[K];
for ( i = 0; i < K; i++ )
{
A[i][i] = a[i].inner_product( a[i]);
is_computed[i] = false;
}
int max_kkt_i = -1;
for ( i = 0; i < b.size(); i++ )
{
F[i] = b[i];
kkt[i] = F[i];
if ( kkt[i] > max_kkt )
{
max_kkt = kkt[i];
max_kkt_i = i;
}
}
int iter = 0;
FValue diff_alpha;
FValue try_alpha;
FValue add_alpha;
while ( max_kkt >= eps && iter < max_iter )
{
diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
try_alpha = alpha[max_kkt_i] + diff_alpha;
add_alpha = 0.0;
if ( try_alpha < 0.0 )
add_alpha = -1.0 * alpha[max_kkt_i];
else
add_alpha = diff_alpha;
alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
if ( !is_computed[max_kkt_i] )
{
for ( i = 0; i < K; i++ )
{
A[i][max_kkt_i] = a[i].inner_product(a[max_kkt_i] ); // for version 1
//A[i][max_kkt_i] = 0; // for version 1
is_computed[max_kkt_i] = true;
}
}
for ( i = 0; i < F.size(); i++ )
{
F[i] -= add_alpha * A[i][max_kkt_i];
kkt[i] = F[i];
if ( alpha[i] > zero )
kkt[i] = abs ( F[i] );
}
max_kkt = -1e100;
max_kkt_i = -1;
for ( i = 0; i < F.size(); i++ )
if ( kkt[i] > max_kkt )
{
max_kkt = kkt[i];
max_kkt_i = i;
}
iter++;
}
return alpha;
}
vector<FValue> hildreth (const vector<FVector>& a, const vector<FValue>& b, FValue C) {
size_t i;
int max_iter = 10000;
FValue eps = 0.00000001;
FValue zero = 0.000000000001;
vector<FValue> alpha ( b.size() );
vector<FValue> F ( b.size() );
vector<FValue> kkt ( b.size() );
float max_kkt = -1e100;
size_t K = b.size();
float A[K][K];
bool is_computed[K];
for ( i = 0; i < K; i++ )
{
A[i][i] = a[i].inner_product( a[i]);
is_computed[i] = false;
}
int max_kkt_i = -1;
for ( i = 0; i < b.size(); i++ )
{
F[i] = b[i];
kkt[i] = F[i];
if ( kkt[i] > max_kkt )
{
max_kkt = kkt[i];
max_kkt_i = i;
}
}
int iter = 0;
FValue diff_alpha;
FValue try_alpha;
FValue add_alpha;
while ( max_kkt >= eps && iter < max_iter )
{
diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
try_alpha = alpha[max_kkt_i] + diff_alpha;
add_alpha = 0.0;
if ( try_alpha < 0.0 )
add_alpha = -1.0 * alpha[max_kkt_i];
else if (try_alpha > C)
add_alpha = C - alpha[max_kkt_i];
else
add_alpha = diff_alpha;
alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
if ( !is_computed[max_kkt_i] )
{
for ( i = 0; i < K; i++ )
{
A[i][max_kkt_i] = a[i].inner_product(a[max_kkt_i] ); // for version 1
//A[i][max_kkt_i] = 0; // for version 1
is_computed[max_kkt_i] = true;
}
}
for ( i = 0; i < F.size(); i++ )
{
F[i] -= add_alpha * A[i][max_kkt_i];
kkt[i] = F[i];
if (alpha[i] > C - zero)
kkt[i]=-kkt[i];
else if (alpha[i] > zero)
kkt[i] = abs(F[i]);
}
max_kkt = -1e100;
max_kkt_i = -1;
for ( i = 0; i < F.size(); i++ )
if ( kkt[i] > max_kkt )
{
max_kkt = kkt[i];
max_kkt_i = i;
}
iter++;
}
return alpha;
}
}

118
josiah/OnlineLearner.h Normal file
View File

@ -0,0 +1,118 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <iostream>
#include <vector>
#ifdef MPI_ENABLED
#include <mpi.h>
#endif
#include <boost/shared_ptr.hpp>
#include "FeatureVector.h"
namespace Josiah {
std::vector<Moses::FValue> hildreth ( const std::vector<Moses::FVector>& a, const std::vector<Moses::FValue>& b );
std::vector<Moses::FValue> hildreth ( const std::vector<Moses::FVector>& a, const std::vector<Moses::FValue>& b, Moses::FValue );
class OnlineLearner {
public:
void virtual doUpdate(
const Moses::FVector& currentFV,
const Moses::FVector& targetFV,
const Moses::FVector& optimaFV,
float currentGain,
float targetGain,
float optimalGain,
Moses::FVector& weights) = 0;
virtual bool usesOptimalSolution() {return false;}
virtual ~OnlineLearner() {}
protected:
};
class PerceptronLearner : public OnlineLearner {
public:
PerceptronLearner();
void setLearningRate(float learningRate);
void virtual doUpdate(
const Moses::FVector& currentFV,
const Moses::FVector& targetFV,
const Moses::FVector& optimalFV,
float currentGain,
float targetGain,
float optimalGain,
Moses::FVector& weights);
private:
float m_learningRate;
};
class MiraLearner : public OnlineLearner {
public:
MiraLearner();
void setSlack(float slack);
void setMarginScale(float marginScale);
void setFixMargin(bool fixMargin);
void setMargin(float margin);
/** For incorporating gain, an alternative to margin rescaling */
void setUseSlackRescaling(bool useSlackRescaling);
void setScaleLossByTargetGain(bool scaleLossByTargetGain);
void virtual doUpdate(
const Moses::FVector& currentFV,
const Moses::FVector& targetMFV,
const Moses::FVector& optimalFV,
float currentGain,
float targetGain,
float optimalGain,
Moses::FVector& weights);
protected:
float m_slack;
float m_marginScale;
bool m_fixMargin;
float m_margin;
bool m_useSlackRescaling;
bool m_scaleLossByTargetGain;
};
class MiraPlusLearner : public MiraLearner {
public:
void virtual doUpdate(
const Moses::FVector& currentFV,
const Moses::FVector& targetFV,
const Moses::FVector& optimaFV,
float currentGain,
float targetGain,
float optimalGain,
Moses::FVector& weights);
virtual bool usesOptimalSolution() {return true;}
};
typedef boost::shared_ptr<OnlineLearner> OnlineLearnerHandle;
}

493
josiah/OnlineLearnerOld.cpp Normal file
View File

@ -0,0 +1,493 @@
/*
* OnlineLearner.cpp
* josiah
*
* Created by Abhishek Arun on 26/06/2009.
* Copyright 2009 __MyCompanyName__. All rights reserved.
*
*/
#ifdef MPI_ENABLED
#include <boost/mpi/communicator.hpp>
#include <boost/mpi/collectives.hpp>
#include "MpiDebug.h"
#endif
#include "OnlineLearner.h"
#include "TranslationDelta.h"
#include "StaticData.h"
#include "Sampler.h"
#include "Gibbler.h"
#include "WeightManager.h"
#ifdef MPI_ENABLED
namespace mpi = boost::mpi;
#endif
namespace Josiah {
FVector OnlineLearner::GetAveragedWeights() {
return m_cumulWeights / m_iteration;
}
void OnlineLearner::UpdateCumul() {
m_cumulWeights += GetCurrWeights();
m_iteration++;
}
vector<FValue> OnlineLearner::hildreth (const vector<FVector>& a, const vector<FValue>& b) {
size_t i;
int max_iter = 10000;
float eps = 0.00000001;
float zero = 0.000000000001;
vector<FValue> alpha ( b.size() );
vector<FValue> F ( b.size() );
vector<FValue> kkt ( b.size() );
float max_kkt = -1e100;
size_t K = b.size();
float A[K][K];
bool is_computed[K];
for ( i = 0; i < K; i++ )
{
A[i][i] = a[i].inner_product( a[i]);
is_computed[i] = false;
}
int max_kkt_i = -1;
for ( i = 0; i < b.size(); i++ )
{
F[i] = b[i];
kkt[i] = F[i];
if ( kkt[i] > max_kkt )
{
max_kkt = kkt[i];
max_kkt_i = i;
}
}
int iter = 0;
FValue diff_alpha;
FValue try_alpha;
FValue add_alpha;
while ( max_kkt >= eps && iter < max_iter )
{
diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
try_alpha = alpha[max_kkt_i] + diff_alpha;
add_alpha = 0.0;
if ( try_alpha < 0.0 )
add_alpha = -1.0 * alpha[max_kkt_i];
else
add_alpha = diff_alpha;
alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
if ( !is_computed[max_kkt_i] )
{
for ( i = 0; i < K; i++ )
{
A[i][max_kkt_i] = a[i].inner_product(a[max_kkt_i] ); // for version 1
//A[i][max_kkt_i] = 0; // for version 1
is_computed[max_kkt_i] = true;
}
}
for ( i = 0; i < F.size(); i++ )
{
F[i] -= add_alpha * A[i][max_kkt_i];
kkt[i] = F[i];
if ( alpha[i] > zero )
kkt[i] = abs ( F[i] );
}
max_kkt = -1e100;
max_kkt_i = -1;
for ( i = 0; i < F.size(); i++ )
if ( kkt[i] > max_kkt )
{
max_kkt = kkt[i];
max_kkt_i = i;
}
iter++;
}
return alpha;
}
vector<float> OnlineLearner::hildreth (const vector<FVector>& a, const vector<FValue>& b, FValue C) {
size_t i;
int max_iter = 10000;
FValue eps = 0.00000001;
FValue zero = 0.000000000001;
vector<FValue> alpha ( b.size() );
vector<FValue> F ( b.size() );
vector<FValue> kkt ( b.size() );
float max_kkt = -1e100;
size_t K = b.size();
float A[K][K];
bool is_computed[K];
for ( i = 0; i < K; i++ )
{
A[i][i] = a[i].inner_product( a[i]);
is_computed[i] = false;
}
int max_kkt_i = -1;
for ( i = 0; i < b.size(); i++ )
{
F[i] = b[i];
kkt[i] = F[i];
if ( kkt[i] > max_kkt )
{
max_kkt = kkt[i];
max_kkt_i = i;
}
}
int iter = 0;
FValue diff_alpha;
FValue try_alpha;
FValue add_alpha;
while ( max_kkt >= eps && iter < max_iter )
{
diff_alpha = A[max_kkt_i][max_kkt_i] <= zero ? 0.0 : F[max_kkt_i]/A[max_kkt_i][max_kkt_i];
try_alpha = alpha[max_kkt_i] + diff_alpha;
add_alpha = 0.0;
if ( try_alpha < 0.0 )
add_alpha = -1.0 * alpha[max_kkt_i];
else if (try_alpha > C)
add_alpha = C - alpha[max_kkt_i];
else
add_alpha = diff_alpha;
alpha[max_kkt_i] = alpha[max_kkt_i] + add_alpha;
if ( !is_computed[max_kkt_i] )
{
for ( i = 0; i < K; i++ )
{
A[i][max_kkt_i] = a[i].inner_product(a[max_kkt_i] ); // for version 1
//A[i][max_kkt_i] = 0; // for version 1
is_computed[max_kkt_i] = true;
}
}
for ( i = 0; i < F.size(); i++ )
{
F[i] -= add_alpha * A[i][max_kkt_i];
kkt[i] = F[i];
if (alpha[i] > C - zero)
kkt[i]=-kkt[i];
else if (alpha[i] > zero)
kkt[i] = abs(F[i]);
}
max_kkt = -1e100;
max_kkt_i = -1;
for ( i = 0; i < F.size(); i++ )
if ( kkt[i] > max_kkt )
{
max_kkt = kkt[i];
max_kkt_i = i;
}
iter++;
}
return alpha;
}
void PerceptronLearner::doUpdate(const TDeltaHandle& curr,
const TDeltaHandle& target,
const TDeltaHandle& noChangeDelta,
const FVector& optimalFV,
const FValue optimalGain,
const GainFunctionHandle& gf) {
//Do update if target gain is better than curr gain
if (target->GetGain(gf) > curr->GetGain(gf)) {
GetCurrWeights() -= curr->getScores();
GetCurrWeights() += target->getScores();
m_numUpdates++;
}
UpdateCumul();
}
void CWLearner::doUpdate(const TDeltaHandle& curr,
const TDeltaHandle& target,
const TDeltaHandle& noChangeDelta,
const FVector& optimalFV,
const FValue optimalGain,
const GainFunctionHandle& gf) {
//we consider the following binary classification task: does the target jump have a higher gain than the curr jump?
//the score for the input features (could also be calculated by m_features * current weights)
float scoreDiff = target->getScore() - curr->getScore();
VERBOSE(1, "ScoreDiff: " << scoreDiff << endl)
//what is the actual gain of target vs current (the gold gain)
float gainDiff = target->GetGain(gf) - curr->GetGain(gf);
VERBOSE(1, "GainDiff: " << gainDiff << endl)
//the gold 1/-1 label
float y = gainDiff > 0 ? 1.0 : -1.0;
VERBOSE(1, "Label: " << y << endl)
//the mean of margin for this task is y * score
float marginMean = y * scoreDiff;
VERBOSE(1, "marginMean: " << marginMean << endl)
//only update at error
if (marginMean < 0) {
//the input feature vector to this task is (f(target) - f(curr))
m_features.clear();
m_features += target->getScores();
m_features += curr->getScores();
VERBOSE(1, "feature delta: " << m_features << endl)
//the variance is based on the input features
float marginVariance = calculateMarginVariance(m_features);
VERBOSE(1, "marginVariance: " << marginVariance << endl)
//get the kkt multiplier
float alpha = kkt(marginMean,marginVariance);
VERBOSE(1, "alpha: " << alpha << endl)
//update the mean parameters
updateMean(alpha, y);
VERBOSE(1, "new weights: " << GetCurrWeights() << endl)
//update the variance parameters
updateVariance(alpha);
VERBOSE(1, "new variance: " << m_currSigmaDiag << endl)
//remember that we made an update
m_numUpdates++;
}
UpdateCumul();
}
void MiraLearner::doUpdate(const TDeltaHandle& curr,
const TDeltaHandle& target,
const TDeltaHandle& noChangeDelta,
const FVector& optimalFV,
const FValue optimalGain,
const GainFunctionHandle& gf) {
vector<float> b;
float scoreDiff = target->getScore() - curr->getScore();
float gainDiff = target->GetGain(gf) - curr->GetGain(gf); //%BLEU
//Scale the margin
gainDiff *= m_marginScaleFactor;
//Or set it to a fixed value
if (m_fixMargin) {
gainDiff = m_margin;
}
bool doMira = false;
if (scoreDiff < gainDiff) { //MIRA Constraints not satisfied, run MIRA
doMira = true;
b.push_back( gainDiff - scoreDiff);
vector<FVector> distance;
FVector dist(target->getScores());
dist -= curr->getScores();
distance.push_back(dist);
vector<float> alpha;
if (m_slack == -1)
alpha = hildreth(distance,b);
else
alpha = hildreth(distance,b, m_slack);
FVector update;
for (size_t k = 0; k < alpha.size(); k++) {
IFVERBOSE(1) {
cerr << "alpha " << alpha[k] << endl;
}
FVector dist = distance[k];
dist *= alpha[k];
update += dist;
m_numUpdates++;
}
//Normalize
VERBOSE(1, "Update wv " << update << endl);
if (m_normalizer)
m_normalizer->Normalize(update);
VERBOSE(1,"After norm, Update wv " << update << endl);
}
else {
IFVERBOSE(1) {
cerr << "Not doing updates cos constraints already satisified" << endl;
cerr << "Target score" << target->getScore() << ", curr score " << curr->getScore() << endl;
cerr << "Target (scaled) gain" << target->GetGain(gf) * m_marginScaleFactor << ", curr gain " << curr->GetGain(gf) * m_marginScaleFactor << endl;
}
}
UpdateCumul();
IFVERBOSE(1) {
if (doMira) {
//Sanity check
curr->updateWeightedScore();
target->updateWeightedScore();
cerr << "Target score - curr score " << target->getScore() - curr->getScore() << endl;
cerr << "Target scaled gain - curr scaled gain " << ((target->GetGain(gf) - curr->GetGain(gf)) * m_marginScaleFactor ) << endl;
}
}
}
/*This Update enforces 3 constraints :
1. Score of target - Score of curr >= 1
2. Score of optimal - Score of Target > 1
3. Score of optimal - Score of curr > 1
*/
void MiraPlusLearner::doUpdate(const TDeltaHandle& curr,
const TDeltaHandle& target,
const TDeltaHandle& noChangeDelta,
const FVector& optimalFV,
const FValue optimalGain,
const GainFunctionHandle& gf) {
FVector currFV = curr->getScores();
currFV -= noChangeDelta->getScores();
currFV += curr->getSample().GetFeatureValues();
FValue currScore = currFV.inner_product(WeightManager::instance().get());
FVector targetFV = target->getScores();
targetFV -= noChangeDelta->getScores();
targetFV += target->getSample().GetFeatureValues();
FValue targetScore = targetFV.inner_product(WeightManager::instance().get());
FValue optimalGainScore = optimalFV.inner_product(WeightManager::instance().get());
float targetGain = target->GetGain(gf);
float currGain = curr->GetGain(gf);
IFVERBOSE(1) {
cerr << "Optimal deriv has (scaled) gain " << m_marginScaleFactor * optimalGain << " , fv : " << optimalFV << " [ " << optimalGainScore << " ]" << endl;
cerr << "target deriv has (scaled) gain " << m_marginScaleFactor * targetGain << " , fv : " << targetFV << " [ " << targetScore << " ]" << endl;
cerr << "curr deriv has (scaled) gain " << m_marginScaleFactor * currGain << " , fv : " << currFV << " [ " << currScore << " ]" << endl;
}
vector<float> b;
vector<FVector> distance;
float tgtcurrmargin = m_marginScaleFactor * (targetGain - currGain);
float opttgtmargin = m_marginScaleFactor * (optimalGain - targetGain);
float optcurrmargin = m_marginScaleFactor * (optimalGain - currGain);
if (m_fixMargin) {
tgtcurrmargin = m_margin;
opttgtmargin = m_margin;
optcurrmargin = m_margin;
}
//Score of target - Score of curr >= 1
b.push_back(tgtcurrmargin - (targetScore - currScore));
distance.push_back(targetFV);
distance.back() -= currFV;
VERBOSE(2, cerr << "b[0] = " << b[0] << " distance[0] = " << distance[0] << endl);
if (optimalGain > targetGain) {
//Score of optimal - Score of Target > 1
b.push_back(opttgtmargin - (optimalGainScore - targetScore));
distance.push_back(optimalFV);
distance.back() -= targetFV;
VERBOSE(2, cerr << "b[1] = " << b[1] << " distance[1] = " << distance[1] << endl);
//Score of optimal - Score of curr > 1
b.push_back(optcurrmargin - (optimalGainScore - currScore));
distance.push_back(optimalFV);
distance.back() -= currFV;
VERBOSE(2, cerr << "b[2] = " << b[2] << " distance[2] = " << distance[2] << endl);
}
vector<float> alpha;
if (m_slack == -1)
alpha = hildreth(distance,b);
else
alpha = hildreth(distance,b, m_slack);
FVector update;
for (size_t k = 0; k < alpha.size(); k++) {
IFVERBOSE(1) {
cerr << "alpha " << alpha[k] << endl;
cerr << "dist " << distance[k] << endl;
}
distance[k] *= alpha[k];
update += distance[k];
}
//Normalize
VERBOSE(2, "Before normalise: " << update << endl);
if (m_normalizer)
m_normalizer->Normalize(update);
VERBOSE(2, "After normalise: " << update << endl);
GetCurrWeights() += update;
IFVERBOSE(1) {
cerr << "Mira++ updated weights to " << GetCurrWeights() << endl;
}
m_numUpdates++;
UpdateCumul();
IFVERBOSE(1) {
//Sanity check
currScore = currFV.inner_product(WeightManager::instance().get());
targetScore = targetFV.inner_product(WeightManager::instance().get());
optimalGainScore = optimalFV.inner_product(WeightManager::instance().get());
cerr << "Updated Current Weights : " << GetCurrWeights() << endl;
cerr << "Target score - curr score " << targetScore - currScore << endl;
cerr << "margin * (Target gain - curr gain) " << m_marginScaleFactor * (targetGain - currGain) << endl;
cerr << "Optimal score - target score " << optimalGainScore - targetScore << endl;
cerr << "margin * (Optimal gain - target gain) " << m_marginScaleFactor * (optimalGain - targetGain) << endl;
cerr << "Optimal score - curr score " << optimalGainScore - currScore << endl;
cerr << "margin * (Optimal gain - curr gain) " << m_marginScaleFactor * (optimalGain - currGain) << endl;
}
}
}

174
josiah/OnlineLearnerOld.h Normal file
View File

@ -0,0 +1,174 @@
#pragma once
#include <cmath>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <vector>
#ifdef MPI_ENABLED
#include <mpi.h>
#endif
#include "FeatureVector.h"
#include "Gain.h"
#include "TranslationDelta.h"
#include "WeightManager.h"
namespace Josiah {
class Sampler;
class WeightNormalizer;
class OnlineLearner {
public :
OnlineLearner(const FVector& initWeights, const std::string& name) :
m_name(name), m_iteration(0) {} //, m_averaging(true)
virtual void doUpdate(const TDeltaHandle& curr,
const TDeltaHandle& target,
const TDeltaHandle& noChangeDelta,
const FVector& optimalFV,
const FValue optimalGain,
const GainFunctionHandle& gf) = 0;
void UpdateCumul() ;
FVector& GetCurrWeights() {return WeightManager::instance().get();}
FVector GetAveragedWeights() ;
virtual ~OnlineLearner() {}
virtual void reset() {}
virtual size_t GetNumUpdates() = 0;
const std::string & GetName() {return m_name;}
protected:
//bool m_averaging;
FVector m_cumulWeights;
std::string m_name;
size_t m_iteration;
std::vector<FValue> hildreth ( const std::vector<FVector>& a, const std::vector<FValue>& b );
std::vector<FValue> hildreth ( const std::vector<FVector>& a, const std::vector<FValue>& b, FValue );
};
class PerceptronLearner : public OnlineLearner {
public :
PerceptronLearner(const FVector& initWeights, const std::string& name, FValue learning_rate = 1.0) : OnlineLearner(initWeights, name), m_learning_rate(learning_rate), m_numUpdates() {}
virtual void doUpdate(const TDeltaHandle& curr,
const TDeltaHandle& target,
const TDeltaHandle& noChangeDelta,
const FVector& optimalFV,
const FValue optimalGain,
const GainFunctionHandle& gf);
virtual ~PerceptronLearner() {}
virtual void reset() {m_numUpdates = 0;}
virtual size_t GetNumUpdates() { return m_numUpdates;}
private:
FValue m_learning_rate;
size_t m_numUpdates;
};
class CWLearner : public OnlineLearner {
public :
CWLearner(const FVector& initWeights, const std::string& name, FValue confidence = 1.644854f, FValue initialVariance = 1.0f) :
OnlineLearner(initWeights, name), m_features(initWeights),m_confidence(confidence), m_epsilon(0.0000001f),m_numUpdates(){
m_currSigmaDiag += initialVariance;
}
virtual void doUpdate(const TDeltaHandle& curr,
const TDeltaHandle& target,
const TDeltaHandle& noChangeDelta,
const FVector& optimalFV,
const FValue optimalGain,
const GainFunctionHandle& gf);
virtual ~CWLearner() {}
virtual void reset() {m_numUpdates = 0;}
virtual size_t GetNumUpdates() { return m_numUpdates;}
private:
FVector m_currSigmaDiag;
FVector m_features;
FValue m_confidence;
FValue m_learning_rate;
FValue m_epsilon;
size_t m_numUpdates;
bool sign(FValue value) { return value > 0.0; }
FValue kkt(FValue marginMean, FValue marginVariance) {
if (marginMean >= m_confidence * marginVariance) return 0.0;
//margin variance approximately == 0 ?
if (marginVariance < 0.0 + m_epsilon && marginVariance > 0.0 - m_epsilon) return 0.0;
FValue v = 1.0 + 2.0 * m_confidence * marginMean;
FValue lambda = (-v + sqrt(v * v - 8.0 * m_confidence * (marginMean - m_confidence * marginVariance))) / (4.0 * m_confidence * marginVariance);
return lambda > 0.0 ? lambda : 0.0;
}
FValue calculateMarginVariance(const FVector& features) {
return (features*features*m_currSigmaDiag).sum();
}
void updateMean(FValue alpha, FValue y) {
WeightManager::instance().get() += alpha*y*m_currSigmaDiag*m_features;
}
void updateVariance(FValue alpha) {
m_currSigmaDiag = 1.0 / (1.0 / m_currSigmaDiag + (2.0 * alpha * m_confidence * m_features * m_features));
}
};
class MiraLearner : public OnlineLearner {
public :
MiraLearner(const FVector& initWeights, const std::string& name, bool fixMargin, FValue margin, FValue slack, FValue scale_margin = 1.0, WeightNormalizer* wn = NULL) : OnlineLearner(initWeights, name), m_numUpdates(), m_fixMargin(fixMargin), m_margin(margin), m_slack(slack), m_marginScaleFactor(scale_margin), m_normalizer(wn) {}
virtual void doUpdate(const TDeltaHandle& curr,
const TDeltaHandle& target,
const TDeltaHandle& noChangeDelta,
const FVector& optimalFV,
const FValue optimalGain,
const GainFunctionHandle& gf);
virtual ~MiraLearner() {}
virtual void reset() {m_numUpdates = 0;}
virtual size_t GetNumUpdates() { return m_numUpdates;}
void SetNormalizer(WeightNormalizer* normalizer) {m_normalizer = normalizer;}
protected:
size_t m_numUpdates;
bool m_fixMargin;
FValue m_margin;
FValue m_slack;
FValue m_marginScaleFactor;
WeightNormalizer* m_normalizer;
};
class MiraPlusLearner : public MiraLearner {
public :
MiraPlusLearner(const FVector& initWeights, const std::string& name, bool fixMargin, FValue margin, FValue slack, FValue scale_margin = 1.0, WeightNormalizer* wn = NULL) : MiraLearner(initWeights, name, fixMargin, margin, slack, scale_margin, wn) {}
virtual void doUpdate(const TDeltaHandle& curr,
const TDeltaHandle& target,
const TDeltaHandle& noChangeDelta,
const FVector& optimalFV,
const FValue optimalGain,
const GainFunctionHandle& gf);
virtual ~MiraPlusLearner() {}
};
class WeightNormalizer {
public :
WeightNormalizer(FValue norm) {m_norm = norm;}
virtual ~WeightNormalizer() {}
virtual void Normalize(FVector& ) = 0;
protected :
FValue m_norm;
};
class L1Normalizer : public WeightNormalizer {
public:
L1Normalizer (FValue norm) : WeightNormalizer(norm) {}
virtual ~L1Normalizer() {}
virtual void Normalize(FVector& weights) {
weights *= (m_norm / weights.l1norm());
}
};
class L2Normalizer : public WeightNormalizer {
public:
L2Normalizer (FValue norm) : WeightNormalizer(norm) {}
virtual ~L2Normalizer() {}
virtual void Normalize(FVector& weights) {
weights *= (m_norm / weights.l2norm());
}
};
}

View File

@ -0,0 +1,158 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <fstream>
#include <stdexcept>
#ifdef MPI_ENABLED
#include <boost/mpi/communicator.hpp>
#include <boost/mpi/collectives.hpp>
namespace mpi=boost::mpi;
#endif
#include "OnlineTrainingCorpus.h"
#include "Selector.h"
#include "Utils.h"
using namespace Moses;
using namespace std;
namespace Josiah {
OnlineTrainingCorpus::OnlineTrainingCorpus (
const std::string& sourceFile,
size_t batchLines,
size_t epochLines,
size_t maxLines,
int mpiSize,
int mpiRank):
m_batchLines(batchLines),
m_epochLines(epochLines),
m_maxLines(maxLines),
m_mpiSize(mpiSize),
m_mpiRank(mpiRank),
m_totalLines(0)
{
if (batchLines && epochLines % batchLines != 0) {
throw runtime_error("Size of epoch should be divisible by batch size");
}
if (maxLines % epochLines != 0) {
throw runtime_error("Maximum lines should be divisible by epoch size");
}
if (batchLines > 1) {
if (epochLines % mpiSize != 0) {
throw runtime_error("When using batching, shards should be of equal size");
}
size_t shardLines = epochLines / mpiSize;
if (shardLines % batchLines != 0) {
throw runtime_error("Shard size should be divisible by batch size");
}
}
//Load the source file
ifstream in(sourceFile.c_str());
if (!in) {
throw runtime_error("Unable to open: " + sourceFile);
}
string line;
while (getline(in,line)) {
m_lines.push_back(line);
}
//Line ids
for (size_t i = 0; i < m_lines.size(); ++i) {
m_lineIds.push_back(i);
}
m_lineIdIndex = 0;
RandomIndex rand;
random_shuffle(m_lineIds.begin(),m_lineIds.end(),rand);
InitEpoch();
}
void OnlineTrainingCorpus::InitEpoch() {
//sentence ids in this epoch
vector<size_t> epoch;
if (m_mpiRank == 0) {
while (epoch.size() < m_epochLines) {
epoch.push_back(m_lineIds[m_lineIdIndex]);
++m_lineIdIndex;
if (m_lineIdIndex >= m_lineIds.size()) m_lineIdIndex = 0;
}
}
//split into shards
m_shard.clear();
#ifdef MPI_ENABLED
mpi::communicator world;
mpi::broadcast(world,epoch,0);
#endif
if (m_batchLines) {
float shard_size = m_epochLines / (float)m_mpiSize;
VERBOSE(1, "Shard size: " << shard_size << endl);
size_t shard_start = (size_t)(shard_size *m_mpiRank);
size_t shard_end = (size_t)(shard_size * (m_mpiRank+1));
if (m_mpiRank == m_mpiSize-1) shard_end = m_epochLines;
VERBOSE(1, "Rank: " << m_mpiRank << " Shard start: " << shard_start << " Shard end: " << shard_end << endl);
for (size_t i = shard_start; i < shard_end; ++i) {
m_shard.push_back(epoch[i]);
}
} else {
//each core gets whole epoch as a shard
m_shard.insert(m_shard.begin(),epoch.begin(),epoch.end());
VERBOSE(1,"Shard contains whole epoch" << endl);
}
}
/** Next batch of sentences. Flags indicate whether we should mix or dump at the end of this batch*/
void OnlineTrainingCorpus::GetNextBatch(
std::vector<std::string>* lines,
std::vector<std::size_t>* lineNumbers,
bool* shouldMix)
{
lines->clear();
lineNumbers->clear();
while (lines->size() < m_batchLines ||
(m_batchLines == 0 && lines->size() < m_epochLines)) {
lineNumbers->push_back(m_shard.back());
lines->push_back(m_lines[m_shard.back()]);
m_shard.pop_back();
VERBOSE(1,"Add to batch: " << lineNumbers->back() << " rank: " << m_mpiRank << endl);
}
if (m_shard.empty()) {
*shouldMix = true;
InitEpoch();
} else {
*shouldMix = false;
}
VERBOSE(1, "Mix?: " << *shouldMix << " rank: " << m_mpiRank << endl);
if (m_batchLines) {
m_totalLines += (m_batchLines*m_mpiSize);
} else {
m_totalLines += m_epochLines;
}
VERBOSE(1,"Total lines: " << m_totalLines << " rank: " << m_mpiRank << endl);
}
bool OnlineTrainingCorpus::HasMore() const {
return m_totalLines < m_maxLines;
}
}

View File

@ -0,0 +1,69 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <string>
#include <vector>
namespace Josiah {
/**
* Used to manage the training corpus - batching,sharding etc.
**/
class OnlineTrainingCorpus {
public:
OnlineTrainingCorpus(
const std::string& sourceFile,
size_t batchLines, //Size of batches
size_t epochLines, //Total lines in each epoch. These will be split into shards
size_t maxLines, //Total lines to be processed
int mpiSize,
int mpiRank);
//NB: maxLines must be divisible by epochLines
// epochLines must be divisible by batchLines.
/** Next batch of sentences. Flags indicate whether we should mix or dump at the end of this batch*/
void GetNextBatch(std::vector<std::string>* lines,
std::vector<std::size_t>* lineNumbers,
bool* shouldMix);
bool HasMore() const;
private:
void InitEpoch();
size_t m_batchLines;
size_t m_epochLines;
size_t m_maxLines;
int m_mpiSize;
int m_mpiRank;
size_t m_totalLines;
std::vector<std::string> m_lines;
std::vector<size_t> m_lineIds;
std::vector<size_t> m_shard;
size_t m_lineIdIndex;
};
}

101
josiah/Optimizer.cpp Normal file
View File

@ -0,0 +1,101 @@
#include "Optimizer.h"
#include <iostream>
using namespace Moses;
using namespace std;
namespace Josiah {
Optimizer::~Optimizer() {}
void Optimizer::Optimize(
FValue f,
const FVector x,
const FVector& gr,
FVector* new_x
) {
++iteration_;
FVector gradient = gr;
if (use_gaussian_prior_) {
gradient -= mean_;
gradient /= variance_;
}
cerr << "OPTIMIZER ITERATION #" << iteration_ << endl;
cerr << " CURR VALUES: " << x << endl;
cerr << " GRADIENT: " << gr << endl;
if (use_gaussian_prior_)
cerr << "P-GRADIENT: " << gradient << endl;
OptimizeImpl(f, x, gradient, new_x);
cerr << "NEW VALUES: " << *new_x << endl;
if (HasConverged()) {
cerr << "OPTIMIZER CONVERGED IN " << iteration_ << " ITERATIONS.\n";
} else if (GetIteration() >= max_iterations_) {
cerr << "OPTIMIZER REACHED MAX ITERATIONS. STOPPING.\n";
SetHasConverged();
}
}
void DumbStochasticGradientDescent::OptimizeImpl(
FValue f,
const FVector& x,
const FVector& gradient,
FVector* new_x) {
FVector g = gradient;
g *= eta_;
*new_x = x;
*new_x += g;
}
void ExponentiatedGradientDescent::OptimizeImpl(
FValue,
const FVector& x,
const FVector& gradient,
FVector* new_x) {
//for (unsigned i = 0; i < eta_.size(); ++i) {
//eta_[i] = eta_[i] * max(min_multiplier_, 1.0f + mu_ * gradient[i] * (eta_[i] * prev_g_[i]));
eta_ *= fvmax(min_multiplier_, 1.0 + mu_ * gradient * eta_ * prev_g_);
//}
cerr << "ETA: " << eta_ << endl;
*new_x = gradient;
*new_x *= eta_;
*new_x += x;
cerr << "New x: " << *new_x << endl;
prev_g_ = gradient;
}
void MetaNormalizedExponentiatedGradientDescent::OptimizeImpl(
FValue,
const FVector& x,
const FVector& gradient,
FVector* new_x) {
cerr << "Curr x: " << x << endl;
//for (unsigned i = 0; i < v_.size(); ++i) {
//v_[i] = gamma_ * v_[i] + ((1 - gamma_) * gradient[i] * gradient[i]);
v_ = gamma_ * v_ + ((1-gamma_) * gradient * gradient);
//}
//for (unsigned i = 0; i < eta_.size(); ++i) {
//eta_[i] = eta_[i] * max(min_multiplier_, 1.0f + ((mu_ * gradient[i] * prev_g_[i])/ v_[i]));
eta_ = eta_ * fvmax(min_multiplier_, 1 + ((mu_ * gradient * prev_g_) / v_));
//}
cerr << "ETA: " << eta_ << endl;
*new_x = gradient;
*new_x *= eta_;
cerr << "Gradient * ETA: " << *new_x << endl;
*new_x += x;
cerr << "New x: " << *new_x << endl;
prev_g_ = gradient;
}
}

135
josiah/Optimizer.h Normal file
View File

@ -0,0 +1,135 @@
#pragma once
#include <vector>
#include "FeatureVector.h"
namespace Josiah {
struct Optimizer {
Optimizer(int max_iterations)
: iteration_(0),
converged_(false),
max_iterations_(max_iterations),
use_gaussian_prior_(false) {}
virtual ~Optimizer();
void SetUseGaussianPrior(const Moses::FValue mean,
const Moses::FValue variance) {
use_gaussian_prior_ = true;
mean_ = mean;
variance_ = variance;
}
void Optimize(
Moses::FValue f, // if known
const Moses::FVector x, // not ref! don't change!
const Moses::FVector& gradient,
Moses::FVector* new_x);
bool HasConverged() const {
return converged_;
}
int GetIteration() const {
return iteration_;
}
void SetIteration(int iteration) {
iteration_ = iteration;
}
protected:
virtual void OptimizeImpl(
float f, // if known
const Moses::FVector& x,
const Moses::FVector& gradient,
Moses::FVector* new_x) = 0;
void SetHasConverged(bool converged = true) {
converged_ = converged;
}
private:
int iteration_;
bool converged_;
int max_iterations_;
bool use_gaussian_prior_;
Moses::FValue mean_; // for gaussian prior
Moses::FValue variance_; // for gaussian prior
};
class DumbStochasticGradientDescent : public Optimizer {
public:
DumbStochasticGradientDescent(Moses::FValue eta, int max_iters) :
Optimizer(max_iters), eta_(eta) {}
virtual void OptimizeImpl(
float f,
const Moses::FVector& x,
const Moses::FVector& gradient,
Moses::FVector* new_x);
private:
Moses::FValue eta_;
};
// see N. Schraudolph (1999) Local Gain Adaptation in Stochastic Gradient
// Descent, Technical Report IDSIA-09-99, p. 2.
// No, this isn't stochastic metadescent, but EGD is described there too
class ExponentiatedGradientDescent : public Optimizer {
public:
ExponentiatedGradientDescent(const Moses::FVector& eta,
Moses::FValue mu, Moses::FValue min_multiplier, int max_iters, const Moses::FVector& prev_gradient) :
Optimizer(max_iters), eta_(eta), mu_(mu), min_multiplier_(min_multiplier), prev_g_(prev_gradient) {
//std::cerr << "Eta : " << eta_ << std::endl;
//std::cerr << "Prev gradient : " << prev_g_ << std::endl;
}
void SetPreviousGradient(const Moses::FVector& prev_g) { prev_g_ = prev_g;}
void SetEta(const Moses::FVector& eta) { eta_ = eta;}
virtual void OptimizeImpl(
Moses::FValue,
const Moses::FVector& x,
const Moses::FVector& gradient,
Moses::FVector* new_x);
protected:
Moses::FVector eta_;
const Moses::FValue mu_;
const Moses::FValue min_multiplier_;
Moses::FVector prev_g_;
};
class MetaNormalizedExponentiatedGradientDescent : public ExponentiatedGradientDescent {
public:
MetaNormalizedExponentiatedGradientDescent(const Moses::FVector& eta,
Moses::FValue mu, Moses::FValue min_multiplier, Moses::FValue gamma, int max_iters, const Moses::FVector& prev_gradient) :
ExponentiatedGradientDescent(eta, mu, min_multiplier, max_iters, prev_gradient), v_(eta), gamma_(gamma) {
std::cerr << " MetaNormalizedExponentiatedGradientDescent, gamma : " << gamma << std::endl;
}
virtual void OptimizeImpl(
Moses::FValue f,
const Moses::FVector& x,
const Moses::FVector& gradient,
Moses::FVector* new_x);
private:
Moses::FVector v_;
Moses::FValue gamma_;
};
}

View File

@ -0,0 +1,390 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <algorithm>
#include <iterator>
#include "ParenthesisFeature.h"
using namespace std;
namespace Josiah {
ParenthesisFeature::ParenthesisFeature(const std::string lefts, const std::string rights) :
m_lefts(lefts), m_rights(rights) {}
FeatureFunctionHandle ParenthesisFeature::getFunction(const Sample& sample) const {
return FeatureFunctionHandle(new ParenthesisFeatureFunction(sample, m_lefts, m_rights));
}
void ParenthesisCounts::count(vector<Word>::const_iterator begin, vector<Word>::const_iterator end,
const string& lefts, const string& rights)
{
size_t segmentSize = end - begin;
for (size_t i = 0; i < m_ll.size(); ++i) {
m_ll[i].resize(segmentSize);
m_lr[i].resize(segmentSize);
m_rl[i].resize(segmentSize);
m_rr[i].resize(segmentSize);
m_leftPositions[i].clear();
m_rightPositions[i].clear();
}
//iterate right to left
reverse_iterator<vector<Word>::const_iterator> ri(end);
reverse_iterator<vector<Word>::const_iterator> rend(begin);
size_t wpos = segmentSize;
for ( ;ri != rend; ++ri) {
--wpos;
const string& text = ri->operator[](0)->GetString();
size_t lid = string::npos;
size_t rid = string::npos;
if (text.size() == 1) {
lid = lefts.find(text);
rid = rights.find(text);
}
for (size_t pid = 0; pid < m_ll.size(); ++pid) {
size_t curr_lr = m_lr[pid].size() > wpos+1 ? m_lr[pid][wpos+1] : 0;
size_t curr_rr = m_rr[pid].size() > wpos+1 ? m_rr[pid][wpos+1] : 0;
if (pid == lid) {
//found a left parenthesis
++curr_lr;
m_leftPositions[pid].push_back(wpos);
}
if (pid == rid) {
//found a right parenthesis
++curr_rr;
m_rightPositions[pid].push_back(wpos);
}
m_lr[pid][wpos] = curr_lr;
m_rr[pid][wpos] = curr_rr;
}
}
assert(wpos == 0);
//iterate left to right
vector<Word>::const_iterator i = begin;
for (; i != end; ++i) {
const string& text = i->operator[](0)->GetString();
size_t lid = string::npos;
size_t rid = string::npos;
if (text.size() == 1) {
lid = lefts.find(text);
rid = rights.find(text);
}
for (size_t pid = 0; pid < m_ll.size(); ++pid) {
size_t curr_ll = wpos > 0 ? m_ll[pid][wpos-1] : 0;
size_t curr_rl = wpos > 0 ? m_rl[pid][wpos-1] : 0;
if (pid == lid) ++curr_ll;
if (pid == rid) ++curr_rl;
m_ll[pid][wpos] = curr_ll;
m_rl[pid][wpos] = curr_rl;
}
++wpos;
}
}
void ParenthesisFeatureFunction::updateTarget() {
const vector<Word>& words = getSample().GetTargetWords();
m_counts.count(words.begin(),words.end(),m_lefts,m_rights);
}
void ParenthesisFeatureFunction::assignScore(FVector& scores) {
//count number of mismatches of each type
getViolations(m_counts,scores);
}
struct WordsRangeCovers {
WordsRangeCovers(const WordsRange& range) : m_range(range) {}
bool operator() (size_t pos) {return m_range.covers(pos);}
const WordsRange& m_range;
};
void ParenthesisFeatureFunction::getViolations(const ParenthesisCounts& counts, FVector& violations,
const ParenthesisCounts* outsideCounts, const WordsRange* segment)
{
for (size_t pid = 0; pid < m_numValues; ++pid) {
//left violations to left of segment
if (outsideCounts) {
size_t leftsInSegment = count_if(outsideCounts->leftPositions()[pid].begin(),
outsideCounts->leftPositions()[pid].end(),
WordsRangeCovers(*segment));
size_t rightsInSegment = count_if(outsideCounts->rightPositions()[pid].begin(),
outsideCounts->rightPositions()[pid].end(),
WordsRangeCovers(*segment));
for (size_t i = 0; i < outsideCounts->leftPositions()[pid].size(); ++i) {
size_t ppos = outsideCounts->leftPositions()[pid][i];
if (ppos >= segment->GetStartPos()) continue;
size_t lr = outsideCounts->lr(pid,ppos);
size_t rr = outsideCounts->rr(pid,ppos);
//cerr << "lr: " << lr << " rr: " << rr << " ";
//account for contents of segment
lr += counts.leftPositions()[pid].size() - leftsInSegment;
rr += counts.rightPositions()[pid].size() - rightsInSegment;
//cerr << "lr: " << lr << " rr: " << rr << " rpos " << counts.rightPositions().size() << " ris: " << rightsInSegment << endl;
if (lr > rr) {
violations[m_names[pid]] = violations[m_names[pid]] + 1;
}
}
}
//right violations to left of segment
//Ignore since the new segment cannot change these
//left violations inside the segment
for (size_t i = 0; i < counts.leftPositions()[pid].size(); ++i) {
size_t ppos = counts.leftPositions()[pid][i];
size_t lr = counts.lr(pid,ppos);
size_t rr = counts.rr(pid,ppos);
if (outsideCounts) {
if (segment->GetEndPos()+1 < outsideCounts->segmentLength()) {
lr += outsideCounts->lr(pid,segment->GetEndPos()+1);
rr += outsideCounts->rr(pid,segment->GetEndPos()+1);
}
}
if (lr > rr) {
violations[m_names[pid]] = violations[m_names[pid]] + 1;
}
}
//right violations in the segment
for (size_t i = 0; i < counts.rightPositions()[pid].size(); ++i) {
size_t ppos = counts.rightPositions()[pid][i];
size_t rl = counts.rl(pid,ppos);
size_t ll = counts.ll(pid,ppos);
if (outsideCounts) {
if (segment->GetStartPos() > 0) {
rl += outsideCounts->rl(pid,segment->GetStartPos()-1);
ll += outsideCounts->ll(pid,segment->GetStartPos()-1);
}
}
if (rl > ll) {
violations[m_names[pid]] = violations[m_names[pid]] + 1;
}
}
//left violations to right of segment
//Ignore since the new segment cannot change these
//right violations to right of segment
if (outsideCounts) {
size_t leftsInSegment = count_if(outsideCounts->leftPositions()[pid].begin(),
outsideCounts->leftPositions()[pid].end(),
WordsRangeCovers(*segment));
size_t rightsInSegment = count_if(outsideCounts->rightPositions()[pid].begin(),
outsideCounts->rightPositions()[pid].end(),
WordsRangeCovers(*segment));
for (size_t i = 0; i < outsideCounts->rightPositions()[pid].size(); ++i) {
size_t ppos = outsideCounts->rightPositions()[pid][i];
if (ppos <= segment->GetEndPos()) continue;
size_t rl = outsideCounts->rl(pid,ppos);
size_t ll = outsideCounts->ll(pid,ppos);
//account for segment
rl += counts.rightPositions()[pid].size() - rightsInSegment;
ll += counts.leftPositions()[pid].size() - leftsInSegment;
if (rl > ll) {
violations[m_names[pid]] = violations[m_names[pid]] + 1;
}
}
}
}
}
void ParenthesisFeatureFunction::getViolations(const ParenthesisCounts& leftSegmentCounts, const ParenthesisCounts& rightSegmentCounts,
const WordsRange& leftSegment, const WordsRange& rightSegment,
const ParenthesisCounts& outsideCounts, FVector& violations)
{
for (size_t pid = 0; pid < m_numValues; ++pid) {
//count the existing parentheses in the left and right segments
size_t leftsInLeftSegment = count_if(outsideCounts.leftPositions()[pid].begin(),
outsideCounts.leftPositions()[pid].end(),
WordsRangeCovers(leftSegment));
size_t leftsInRightSegment = count_if(outsideCounts.leftPositions()[pid].begin(),
outsideCounts.leftPositions()[pid].end(),
WordsRangeCovers(rightSegment));
size_t rightsInLeftSegment = count_if(outsideCounts.rightPositions()[pid].begin(),
outsideCounts.rightPositions()[pid].end(),
WordsRangeCovers(leftSegment));
size_t rightsInRightSegment = count_if(outsideCounts.rightPositions()[pid].begin(),
outsideCounts.rightPositions()[pid].end(),
WordsRangeCovers(rightSegment));
//check left parentheses in left segment
for (size_t i = 0; i < leftSegmentCounts.leftPositions()[pid].size(); ++i) {
size_t ppos = leftSegmentCounts.leftPositions()[pid][i];
size_t lr = leftSegmentCounts.lr(pid,ppos);
size_t rr = leftSegmentCounts.rr(pid,ppos);
if (leftSegment.GetEndPos()+1 < outsideCounts.segmentLength()) {
lr += outsideCounts.lr(pid,leftSegment.GetEndPos()+1);
rr += outsideCounts.rr(pid,leftSegment.GetEndPos()+1);
}
//account for right segment
lr += (rightSegmentCounts.leftPositions()[pid].size() - leftsInRightSegment);
rr += (rightSegmentCounts.rightPositions()[pid].size() - rightsInRightSegment);
if (lr > rr) {
violations[m_names[pid]] = violations[m_names[pid]] + 1;
}
}
//check right parentheses in left segment
for (size_t i = 0; i < leftSegmentCounts.rightPositions()[pid].size(); ++i) {
size_t ppos = leftSegmentCounts.rightPositions()[pid][i];
size_t rl = leftSegmentCounts.rl(pid,ppos);
size_t ll = leftSegmentCounts.ll(pid,ppos);
if (leftSegment.GetStartPos() > 0) {
rl += outsideCounts.rl(pid, leftSegment.GetStartPos()-1);
ll += outsideCounts.ll(pid, leftSegment.GetStartPos()-1);
}
if (rl > ll) {
violations[m_names[pid]] = violations[m_names[pid]] + 1;
}
}
//check left parentheses in right segment
for (size_t i = 0; i < rightSegmentCounts.leftPositions()[pid].size(); ++i) {
size_t ppos = rightSegmentCounts.leftPositions()[pid][i];
size_t lr = rightSegmentCounts.lr(pid,ppos);
size_t rr = rightSegmentCounts.rr(pid,ppos);
if (rightSegment.GetEndPos()+1 < outsideCounts.segmentLength()) {
lr += outsideCounts.lr(pid, rightSegment.GetEndPos()+1);
rr += outsideCounts.rr(pid, rightSegment.GetEndPos()+1);
}
if (lr > rr) {
violations[m_names[pid]] = violations[m_names[pid]] + 1;
}
}
//check for right parentheses in right segment
for (size_t i = 0; i < rightSegmentCounts.rightPositions()[pid].size(); ++i) {
size_t ppos = rightSegmentCounts.rightPositions()[pid][i];
size_t rl = rightSegmentCounts.rl(pid,ppos);
size_t ll = rightSegmentCounts.ll(pid,ppos);
if (rightSegment.GetStartPos() > 0) {
rl += outsideCounts.rl(pid,rightSegment.GetStartPos()-1);
ll += outsideCounts.ll(pid, rightSegment.GetStartPos()-1);
}
//account for left segment
rl += (leftSegmentCounts.rightPositions()[pid].size() - rightsInLeftSegment);
ll += (leftSegmentCounts.leftPositions()[pid].size() - leftsInLeftSegment);
if (rl > ll) {
violations[m_names[pid]] = violations[m_names[pid]] + 1;
}
}
//check for left parentheses outside of both segments
for (size_t i = 0; i < outsideCounts.leftPositions()[pid].size(); ++i) {
size_t ppos = outsideCounts.leftPositions()[pid][i];
//ignore if parenthesis is in the left segment, right segment, or to the right of the right segment
if (ppos >= rightSegment.GetStartPos() || leftSegment.covers(ppos)) continue;
size_t lr = outsideCounts.lr(pid,ppos);
size_t rr = outsideCounts.rr(pid,ppos);
if (ppos < rightSegment.GetStartPos()) {
//account for right segment
lr += (rightSegmentCounts.leftPositions()[pid].size() - leftsInRightSegment);
rr += (rightSegmentCounts.rightPositions()[pid].size() - rightsInRightSegment);
}
if (ppos < leftSegment.GetStartPos()) {
//account for left segment
lr += (leftSegmentCounts.leftPositions()[pid].size() - leftsInLeftSegment);
rr += (leftSegmentCounts.rightPositions()[pid].size() - rightsInLeftSegment);
}
if (lr > rr) {
violations[m_names[pid]] = violations[m_names[pid]] + 1;
}
}
//check for right parentheses outside of both segments
for (size_t i = 0; i < outsideCounts.rightPositions()[pid].size(); ++i) {
size_t ppos = outsideCounts.rightPositions()[pid][i];
//ignore if parenthesis is in the right segment, left segment, or to the left of left segment
if (ppos <= leftSegment.GetEndPos() || rightSegment.covers(ppos)) continue;
size_t rl = outsideCounts.rl(pid,ppos);
size_t ll = outsideCounts.ll(pid,ppos);
if (ppos > leftSegment.GetEndPos()) {
//account for the left segment
rl += (leftSegmentCounts.rightPositions()[pid].size() - rightsInLeftSegment);
ll += (leftSegmentCounts.leftPositions()[pid].size() - leftsInLeftSegment);
}
if (ppos > rightSegment.GetEndPos()) {
//account for right segment
rl += (rightSegmentCounts.rightPositions()[pid].size() - rightsInRightSegment);
ll += (rightSegmentCounts.leftPositions()[pid].size() - leftsInRightSegment);
}
if (rl > ll) {
violations[m_names[pid]] = violations[m_names[pid]] + 1;
}
}
}
}
void ParenthesisFeatureFunction::scoreUpdate(const Phrase& phrase, const WordsRange& segment, FVector& scores) {
m_leftSegmentCounts.count(phrase.begin(), phrase.end(), m_lefts, m_rights);
getViolations(m_leftSegmentCounts,scores,&m_counts,&segment);
}
void ParenthesisFeatureFunction::doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores) {
scoreUpdate(option->GetTargetPhrase(),gap.segment,scores);
}
void ParenthesisFeatureFunction::doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores)
{
Phrase phrase(leftOption->GetTargetPhrase());
phrase.Append(rightOption->GetTargetPhrase());
scoreUpdate(phrase,gap.segment,scores);
}
void ParenthesisFeatureFunction::doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
{
const Phrase& leftPhrase = leftOption->GetTargetPhrase();
m_leftSegmentCounts.count(leftPhrase.begin(),leftPhrase.end(),m_lefts,m_rights);
const Phrase& rightPhrase = rightOption->GetTargetPhrase();
m_rightSegmentCounts.count(rightPhrase.begin(),rightPhrase.end(),m_lefts,m_rights);
getViolations(m_leftSegmentCounts,m_rightSegmentCounts,leftGap.segment,rightGap.segment,m_counts,scores);
}
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
void ParenthesisFeatureFunction::doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
{
if (leftGap.segment.GetEndPos()+1 == rightGap.segment.GetStartPos()) {
TargetGap gap(leftGap.leftHypo, rightGap.rightHypo,WordsRange(leftGap.segment.GetStartPos(), rightGap.segment.GetEndPos()));
doContiguousPairedUpdate(leftOption,rightOption,gap,scores);
} else {
doDiscontiguousPairedUpdate(leftOption,rightOption,leftGap,rightGap,scores);
}
}
}

139
josiah/ParenthesisFeature.h Normal file
View File

@ -0,0 +1,139 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include "FeatureFunction.h"
#include "Gibbler.h"
namespace Josiah {
/**
* Store the various counts at each position.
**/
class ParenthesisCounts {
public:
ParenthesisCounts(size_t numValues):
m_ll(numValues), m_rl(numValues), m_lr(numValues), m_rr(numValues)
,m_leftPositions(numValues),m_rightPositions(numValues) {}
//getters
size_t numValues() const {return m_ll.size();}
size_t segmentLength() const {return m_ll[0].size();}
size_t ll(size_t pid, size_t position) const {return m_ll[pid][position];}
size_t rl(size_t pid, size_t position) const {return m_rl[pid][position];}
size_t lr(size_t pid, size_t position) const {return m_lr[pid][position];}
size_t rr(size_t pid, size_t position) const {return m_rr[pid][position];}
const std::vector<std::vector<size_t> >& leftPositions() const {return m_leftPositions;}
const std::vector<std::vector<size_t> >& rightPositions() const {return m_rightPositions;}
//Initialise counts
void count(std::vector<Word>::const_iterator begin, std::vector<Word>::const_iterator end,
const std::string& lefts, const std::string& rights);
private:
std::vector<std::vector<size_t> > m_ll; //left brackets to left
std::vector<std::vector<size_t> > m_rl; //right brackets to left
std::vector<std::vector<size_t> > m_lr; //left brackets to right
std::vector<std::vector<size_t> > m_rr; //right brackets to right
std::vector<std::vector<size_t> > m_leftPositions; //positions of left parentheses
std::vector<std::vector<size_t> > m_rightPositions; //positions of right parentheses
};
/**
* Feature that checks for matching between brackets and similar construcions.
**/
class ParenthesisFeature : public Feature {
public:
ParenthesisFeature(const std::string lefts, const std::string rights);
virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
private:
std::string m_lefts,m_rights;
};
class ParenthesisFeatureFunction: public FeatureFunction {
public:
ParenthesisFeatureFunction(const Sample& sample,const std::string lefts, const std::string rights) :
FeatureFunction(sample),
m_numValues(lefts.size()), m_lefts(lefts), m_rights(rights),
m_counts(m_numValues), m_leftSegmentCounts(m_numValues), m_rightSegmentCounts(m_numValues) {
for (size_t i = 0; i < lefts.size(); ++i) {
m_names.push_back(FName("par",lefts.substr(i,1)));
}
}
/** Update the target words.*/
virtual void updateTarget();
/** Assign the total score of this feature on the current hypo */
virtual void assignScore(FVector& scores);
/** Score due to one segment */
virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores);
/** Score due to two segments. The left and right refer to the target positions.**/
virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores);
virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) ;
virtual ~ParenthesisFeatureFunction() {}
private:
/** Violations from a segment, with optional outside counts. If outside counts and segment are missing, then
it is assumed that we are */
void getViolations(const ParenthesisCounts& counts, FVector& violations,
const ParenthesisCounts* outsideCounts=NULL, const WordsRange* segment=NULL);
/** Violations from a pair of segments, with outside counts */
void getViolations(const ParenthesisCounts& leftSegmentCounts, const ParenthesisCounts& rightSegmentCounts,
const WordsRange& leftSegment, const WordsRange& rightSegment,
const ParenthesisCounts& outsideCounts, FVector& scores);
void scoreUpdate(const Moses::Phrase& phrase, const Moses::WordsRange& segment, FVector& scores);
size_t m_numValues;
//left and right parenthesis characters
std::string m_lefts;
std::string m_rights;
//Counts for current target
ParenthesisCounts m_counts;
//counts for current segments
ParenthesisCounts m_leftSegmentCounts;
ParenthesisCounts m_rightSegmentCounts;
std::vector<FName> m_names;
};
}

View File

@ -0,0 +1,235 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "Gibbler.h"
#include "PhraseBoundaryFeature.h"
using namespace Moses;
using namespace std;
namespace Josiah {
const string PhraseBoundaryFeature::STEM = "pb";
const string PhraseBoundaryFeature::SEP = ":";
const string PhraseBoundaryFeature::SOURCE = "src";
const string PhraseBoundaryFeature::TARGET = "tgt";
const string PhraseBoundaryFeature::BOS = "<s>";
const string PhraseBoundaryFeature::EOS = "</s>";
PhraseBoundaryFeature::PhraseBoundaryFeature(
const FactorList& sourceFactors,
const FactorList& targetFactors,
const vector<string>& sourceVocabFiles,
const vector<string>& targetVocabFiles) :
m_sourceFactors(sourceFactors), m_targetFactors(targetFactors)
{
assert(sourceFactors.size() == sourceVocabFiles.size());
assert(targetFactors.size() == targetVocabFiles.size());
m_sourceVocabs.resize(sourceVocabFiles.size());
for (size_t i = 0; i < sourceVocabFiles.size(); ++i) {
loadVocab(sourceVocabFiles[i],m_sourceVocabs[i]);
}
m_targetVocabs.resize(targetVocabFiles.size());
for (size_t i = 0; i < targetVocabFiles.size(); ++i) {
loadVocab(targetVocabFiles[i],m_targetVocabs[i]);
}
}
FeatureFunctionHandle PhraseBoundaryFeature::getFunction(const Sample& sample) const {
return FeatureFunctionHandle(new PhraseBoundaryFeatureFunction(sample,*this));
}
void PhraseBoundaryFeature::addSourceFeatures(
const Word* leftWord, const Word* rightWord, FVector& scores) const {
addFeatures(leftWord,rightWord,m_sourceFactors,SOURCE, m_sourceVocabs,scores);
}
void PhraseBoundaryFeature::addTargetFeatures(
const Word* leftWord, const Word* rightWord, FVector& scores) const {
addFeatures(leftWord,rightWord,m_targetFactors,TARGET, m_targetVocabs,scores);
}
void PhraseBoundaryFeature::addFeatures(
const Word* leftWord, const Word* rightWord,
const FactorList& factors, const string& side,
const vector<set<string> >& vocabs, FVector& scores) const
{
for (size_t i = 0; i < factors.size(); ++i) {
ostringstream name;
name << side << SEP;
name << factors[i];
name << SEP;
if (leftWord) {
const string& leftWordText = leftWord->GetFactor(factors[i])->GetString();
if (vocabs[i].size() != 0 &&
vocabs[i].find(leftWordText) == vocabs[i].end()) {
continue;
}
name << leftWordText;
} else {
name << BOS;
}
name << SEP;
if (rightWord) {
const string& rightWordText = rightWord->GetFactor(factors[i])->GetString();
if (vocabs[i].size() != 0 &&
vocabs[i].find(rightWordText) == vocabs[i].end()) {
continue;
}
name << rightWordText;
} else {
name << EOS;
}
FName fName(STEM,name.str());
++scores[fName];
}
}
PhraseBoundaryFeatureFunction::PhraseBoundaryFeatureFunction
(const Sample& sample, const PhraseBoundaryFeature& parent) :
FeatureFunction(sample), m_parent(parent) {}
/** Update the target words.*/
void PhraseBoundaryFeatureFunction::updateTarget() {}
/** Assign the total score of this feature on the current hypo */
void PhraseBoundaryFeatureFunction::assignScore(FVector& scores) {
const Hypothesis* currHypo = getSample().GetTargetTail();
scoreOptions(NULL,&(currHypo->GetNextHypo()->GetTranslationOption()),scores);
while ((currHypo = (currHypo->GetNextHypo()))) {
const TranslationOption* leftOption =
&(currHypo->GetTranslationOption());
const TranslationOption* rightOption = NULL;
if (currHypo->GetNextHypo()) {
rightOption = &(currHypo->GetNextHypo()->GetTranslationOption());
}
scoreOptions(leftOption,rightOption,scores);
}
}
/** Score due to one segment */
void PhraseBoundaryFeatureFunction::doSingleUpdate(
const TranslationOption* option, const TargetGap& gap, FVector& scores) {
const TranslationOption* leftOption = NULL;
if (gap.leftHypo->GetPrevHypo()) {
leftOption = &(gap.leftHypo->GetTranslationOption());
}
const TranslationOption* rightOption = option;
scoreOptions(leftOption,rightOption,scores);
leftOption = option;
if (gap.rightHypo) {
rightOption = &(gap.rightHypo->GetTranslationOption());
} else {
rightOption = NULL;
}
scoreOptions(leftOption,rightOption,scores);
}
/** Score due to two segments. The left and right refer to the target positions.**/
void PhraseBoundaryFeatureFunction::doContiguousPairedUpdate(
const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores)
{
const TranslationOption* prevOption = NULL;
if (gap.leftHypo->GetPrevHypo()) {
prevOption = &(gap.leftHypo->GetTranslationOption());
}
const TranslationOption* nextOption = NULL;
if (gap.rightHypo) {
nextOption = &(gap.rightHypo->GetTranslationOption());
}
scoreOptions(prevOption,leftOption,scores);
scoreOptions(leftOption,rightOption,scores);
scoreOptions(rightOption,nextOption,scores);
}
void PhraseBoundaryFeatureFunction::doDiscontiguousPairedUpdate(
const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
doSingleUpdate(leftOption,leftGap,scores);
doSingleUpdate(rightOption,rightGap,scores);
}
/** Score due to flip. Again, left and right refer to order on the
<emph>target</emph> side. */
void PhraseBoundaryFeatureFunction::doFlipUpdate(
const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
if (leftGap.segment.GetEndPos()+1 == rightGap.segment.GetStartPos()) {
//contiguous
TargetGap gap(leftGap.leftHypo, rightGap.rightHypo,
WordsRange(leftGap.segment.GetStartPos(),
rightGap.segment.GetEndPos()));
doContiguousPairedUpdate(leftOption,rightOption,gap,scores);
} else {
//discontiguous
doDiscontiguousPairedUpdate(leftOption,rightOption,
leftGap,rightGap,scores);
}
}
static const Word* getBeginWord(const Phrase& phrase) {
return &(*phrase.begin());
}
static const Word* getEndWord(const Phrase& phrase) {
return &(phrase.GetWord(phrase.GetSize()-1));
}
void PhraseBoundaryFeatureFunction::scoreOptions(
const TranslationOption* leftOption, const TranslationOption* rightOption,
FVector& scores)
{
//source
const Word* leftSourceWord = NULL;
const Word* rightSourceWord = NULL;
if (leftOption) {
leftSourceWord = getEndWord(*(leftOption->GetSourcePhrase()));
}
if (rightOption) {
rightSourceWord = getBeginWord(*(rightOption->GetSourcePhrase()));
}
m_parent.addSourceFeatures(leftSourceWord,rightSourceWord,scores);
//target
const Word* leftTargetWord = NULL;
const Word* rightTargetWord = NULL;
if (leftOption) {
leftTargetWord = getEndWord(leftOption->GetTargetPhrase());
}
if (rightOption) {
rightTargetWord = getBeginWord(rightOption->GetTargetPhrase());
}
m_parent.addTargetFeatures(leftTargetWord,rightTargetWord,scores);
}
void PhraseBoundaryFeature::loadVocab(
const std::string& filename, std::set<std::string>& vocab) {
if (filename.empty()) return;
ifstream in(filename.c_str());
assert(in);
string line;
while(getline(in,line)) {
vocab.insert(line);
}
}
}

View File

@ -0,0 +1,113 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include<set>
#include<vector>
#include "FeatureFunction.h"
namespace Josiah {
/**
* Bigrams at phrase boundaries.
**/
class PhraseBoundaryFeature : public Feature {
public:
PhraseBoundaryFeature(
const Moses::FactorList& sourceFactors,
const Moses::FactorList& targetFactors,
const std::vector<std::string>& sourceVocabFiles,
const std::vector<std::string>& targetVocabFiles);
virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
/** If either word is null, then eos or bos is assumed */
void addSourceFeatures(
const Moses::Word* leftWord, const Moses::Word* rightWord,
FVector& scores) const;
void addTargetFeatures(
const Moses::Word* leftWord, const Moses::Word* rightWord,
FVector& scores) const;
private:
void addFeatures(
const Moses::Word* leftWord, const Moses::Word* rightWord,
const Moses::FactorList& factors, const std::string& side,
const std::vector<std::set<std::string> >& vocabs,
FVector& scores) const;
void loadVocab(const std::string& filename, std::set<std::string>& vocab);
static const std::string SEP;
static const std::string STEM;
static const std::string SOURCE;
static const std::string TARGET;
static const std::string BOS;
static const std::string EOS;
Moses::FactorList m_sourceFactors;
Moses::FactorList m_targetFactors;
std::vector<std::set<std::string> > m_sourceVocabs;
std::vector<std::set<std::string> > m_targetVocabs;
};
class PhraseBoundaryFeatureFunction : public FeatureFunction {
public:
PhraseBoundaryFeatureFunction(const Sample& sample, const PhraseBoundaryFeature& parent);
/** Update the target words.*/
virtual void updateTarget();
/** Assign the total score of this feature on the current hypo */
virtual void assignScore(FVector& scores);
/** Score due to one segment */
virtual void doSingleUpdate(
const TranslationOption* option, const TargetGap& gap, FVector& scores);
/** Score due to two segments. The left and right refer to the target positions.**/
virtual void doContiguousPairedUpdate(
const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores);
virtual void doDiscontiguousPairedUpdate(
const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
/** Score due to flip. Again, left and right refer to order on the
<emph>target</emph> side. */
virtual void doFlipUpdate(
const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) ;
private:
void scoreOptions(
const TranslationOption* leftOption, const TranslationOption* rightOption,
FVector& scores);
const PhraseBoundaryFeature& m_parent;
};
}

127
josiah/PhraseFeature.cpp Normal file
View File

@ -0,0 +1,127 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "PhraseFeature.h"
#include <sstream>
#include "Gibbler.h"
#include "StaticData.h"
#include "WeightManager.h"
using namespace std;
using namespace Moses;
namespace Josiah {
set<PhraseFeature*> PhraseFeature::s_phraseFeatures;
PhraseFeature::PhraseFeature
(PhraseDictionaryFeature* dictionary, size_t index) : m_phraseDictionary(dictionary) {
//pre-calculate the feature names
const string& root = m_phraseDictionary->GetScoreProducerDescription();
size_t featureCount = m_phraseDictionary->GetNumScoreComponents();
for (size_t i = 1; i <= featureCount; ++i) {
ostringstream namestream;
if (index > 0) {
namestream << index << "-";
}
namestream << i;
m_featureNames.push_back(FName(root,namestream.str()));
}
s_phraseFeatures.insert(this);
}
void PhraseFeature::updateWeights(const FVector& weights) {
for (set<PhraseFeature*>::iterator i = s_phraseFeatures.begin();
i != s_phraseFeatures.end(); ++i) {
PhraseFeature* pf = *i;
vector<float> newWeights(pf->m_featureNames.size());
for (size_t j = 0; j < pf->m_featureNames.size(); ++j) {
FValue weight = weights[pf->m_featureNames[j]];
newWeights[j] = weight;
}
ScoreComponentCollection mosesWeights = StaticData::Instance().GetAllWeights();
mosesWeights.Assign(pf->m_phraseDictionary,newWeights);
(const_cast<StaticData&>(StaticData::Instance()))
.SetAllWeights(mosesWeights);
//pf->m_phraseDictionary->GetFeature()->SetWeightTransModel(newWeights);
}
}
FeatureFunctionHandle PhraseFeature::getFunction(const Sample& sample ) const {
return FeatureFunctionHandle
(new PhraseFeatureFunction(sample,m_phraseDictionary,m_featureNames));
}
PhraseFeatureFunction::PhraseFeatureFunction(const Sample& sample, Moses::PhraseDictionaryFeature* phraseDictionary, std::vector<FName> featureNames) :
FeatureFunction(sample),
m_featureNames(featureNames),
m_phraseDictionary(phraseDictionary) {}
/** Assign the total score of this feature on the current hypo */
void PhraseFeatureFunction::assignScore(FVector& scores) {
for (size_t i = 0; i < m_featureNames.size(); ++i) {
scores[m_featureNames[i]] = 0;
}
const Hypothesis* currHypo = getSample().GetTargetTail();
while ((currHypo = (currHypo->GetNextHypo()))) {
assign(&(currHypo->GetTranslationOption()), scores);
}
}
/** Score due to one segment */
void PhraseFeatureFunction::doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores) {
assign(option,scores);
}
/** Score due to two segments. The left and right refer to the target positions.**/
void PhraseFeatureFunction::doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores) {
assign(leftOption,scores);
assign(rightOption,scores);
}
void PhraseFeatureFunction::doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
assign(leftOption,scores);
assign(rightOption,scores);
}
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
void PhraseFeatureFunction::doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
assign(leftOption,scores);
assign(rightOption,scores);
}
/** Add the phrase features into the feature vector */
void PhraseFeatureFunction::assign(const TranslationOption* option, FVector& scores) {
const ScoreComponentCollection& breakdown = option->GetScoreBreakdown();
vector<float> mosesScores= breakdown.GetScoresForProducer(m_phraseDictionary);
for (size_t i = 0; i < m_featureNames.size(); ++i) {
scores[m_featureNames[i]] += mosesScores[i];
}
}
}

76
josiah/PhraseFeature.h Normal file
View File

@ -0,0 +1,76 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <vector>
#include "FeatureFunction.h"
#include "PhraseDictionary.h"
namespace Josiah {
/** The Moses phrase features. */
class PhraseFeature : public Feature {
public:
PhraseFeature(PhraseDictionaryFeature* dictionary, size_t id);
virtual FeatureFunctionHandle getFunction(const Sample& sample ) const;
/** Inform all phrase features that the weights have been updated so
that the new weights can be passed to moses */
static void updateWeights(const FVector& weights);
private:
static std::set<PhraseFeature*> s_phraseFeatures;
Moses::PhraseDictionaryFeature* m_phraseDictionary;
std::vector<FName> m_featureNames;
};
class PhraseFeatureFunction : public FeatureFunction {
public:
PhraseFeatureFunction(const Sample& sample, Moses::PhraseDictionaryFeature* phraseDictionary, std::vector<FName> featureNames);
/** Assign the total score of this feature on the current hypo */
virtual void assignScore(FVector& scores);
/** Score due to one segment */
virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores);
/** Score due to two segments. The left and right refer to the target positions.**/
virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores);
virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
private:
void assign(const TranslationOption* option, FVector& scores);
std::vector<FName> m_featureNames;
Moses::PhraseDictionaryFeature* m_phraseDictionary;
};
}

View File

@ -0,0 +1,78 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <sstream>
#include "AlignmentInfo.h"
#include "Gibbler.h"
#include "PhrasePairFeature.h"
using namespace Moses;
using namespace std;
namespace Josiah {
const std::string PhrasePairFeature::PREFIX = "pp";
PhrasePairFeature::PhrasePairFeature
(Moses::FactorType sourceFactorId, Moses::FactorType targetFactorId)
: m_sourceFactorId(sourceFactorId), m_targetFactorId(targetFactorId) {}
const Moses::Factor* PhrasePairFeature::getSourceFactor
(const Moses::Word& word) const {
return word[m_sourceFactorId];
}
const Moses::Factor* PhrasePairFeature::getTargetFactor
(const Moses::Word& word) const {
return word[m_targetFactorId];
}
void PhrasePairFeature::assign
(const TranslationOption* option, FVector& scores) const {
const TargetPhrase& target = option->GetTargetPhrase();
const Phrase* source = option->GetSourcePhrase();
const AlignmentInfo& align = target.GetAlignmentInfo();
// cerr << source->GetStringRep(vector<FactorType>(1));
// cerr << "|" << target.GetStringRep(vector<FactorType>(1));
for (AlignmentInfo::const_iterator i = align.begin(); i != align.end(); ++i) {
const Factor* sourceFactor =
getSourceFactor(source->GetWord(i->first));
const Factor* targetFactor =
getTargetFactor(target.GetWord(i->second));
ostringstream namestr;
namestr << sourceFactor->GetString();
namestr << ":";
namestr << targetFactor->GetString();
FName name(PhrasePairFeature::PREFIX,namestr.str());
++scores[name];
// cerr << " " << name;
}
// cerr << endl;
}
}

View File

@ -0,0 +1,50 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include "StatelessFeature.h"
namespace Josiah {
/**
* The 'phrase pair' features of Watanabe et al. These are formed by
* pairing aligned words (or other factors) between the source and target
* side of a phrase pair.
**/
class PhrasePairFeature : public StatelessFeature {
public:
static const std::string PREFIX;
PhrasePairFeature(Moses::FactorType sourceFactorId,
Moses::FactorType targetFactorId);
const Moses::Factor* getSourceFactor(const Moses::Word& word) const;
const Moses::Factor* getTargetFactor(const Moses::Word& word) const;
/** Scores due to this translation option */
virtual void assign
(const Moses::TranslationOption* option, FVector& scores) const;
private:
Moses::FactorType m_sourceFactorId;
Moses::FactorType m_targetFactorId;
};
}

175
josiah/Pos.cpp Normal file
View File

@ -0,0 +1,175 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "Pos.h"
using namespace std;
using namespace Moses;
namespace Josiah {
static string ToString(const TagSequence& ws)
{
ostringstream os;
for (TagSequence::const_iterator i = ws.begin(); i != ws.end(); ++i)
os << (*i)->GetString() << ",";
return os.str();
}
static ostream& operator<<(ostream& out, const TagSequence& ws)
{
out << ToString(ws);
return out;
}
template<class P>
static void getPosTags(const P& words, TagSequence& tags, FactorType factorType) {
for (typename P::const_iterator i = words.begin(); i != words.end(); ++i) {
tags.push_back(i->operator[](factorType));
/*cerr << "F0: " << *(i->operator[](0)) << endl;
const Factor* f1 = i->operator[](1);
if (f1) {
cerr << "F1: " << *f1 << endl;
} else {
cerr << "F1: " << "missing" << endl;
}*/
}
}
static void getSegmentWords(const vector<Word>& words, const WordsRange& segment, vector<Word>& segmentWords) {
for (size_t i = segment.GetStartPos(); i <= segment.GetEndPos(); ++i) {
segmentWords.push_back(words[i]);
}
}
float Josiah::PosFeatureFunction::computeScore() {
m_sourceTags.clear();
TagSequence targetTags;
getPosTags(getSample().GetSourceWords(), m_sourceTags, m_sourceFactorType);
getPosTags(getSample().GetTargetWords(), targetTags, m_targetFactorType);
//cerr << "Source " << m_sourceTags << endl;
//cerr << "Target " << targetTags << endl;
return computeScore(m_sourceTags, targetTags);
}
float Josiah::PosFeatureFunction::getSingleUpdateScore(const Moses::TranslationOption* option, const TargetGap& gap)
{
const WordsRange& sourceSegment = option->GetSourceWordsRange();
TagSequence newTargetTags;
getPosTags(option->GetTargetPhrase(), newTargetTags, m_targetFactorType);
return getSingleUpdateScore(sourceSegment,gap.segment, newTargetTags);
}
float Josiah::PosFeatureFunction::getContiguousPairedUpdateScore
(const TranslationOption * leftOption, const TranslationOption * rightOption, const TargetGap& gap)
{
//just treat this as one segment
WordsRange sourceSegment(min(leftOption->GetStartPos(),rightOption->GetStartPos()),
max(leftOption->GetEndPos(), rightOption->GetEndPos()));
TagSequence newTargetTags;
Phrase targetPhrase = leftOption->GetTargetPhrase();
targetPhrase.Append(rightOption->GetTargetPhrase());
getPosTags(targetPhrase, newTargetTags, m_targetFactorType);
return getSingleUpdateScore(sourceSegment, gap.segment, newTargetTags);
}
float Josiah::PosFeatureFunction::getDiscontiguousPairedUpdateScore
(const TranslationOption * leftOption, const TranslationOption * rightOption,
const TargetGap& leftGap, const TargetGap& rightGap) {
//treat as two gaps
return getSingleUpdateScore(leftOption,leftGap) + getSingleUpdateScore(rightOption,rightGap);
}
float Josiah::PosFeatureFunction::getFlipUpdateScore(const TranslationOption * leftOption, const TranslationOption * rightOption,
const TargetGap& leftGap, const TargetGap& rightGap)
{
pair<WordsRange,WordsRange> sourceSegments(leftOption->GetSourceWordsRange(), rightOption->GetSourceWordsRange());
pair<WordsRange,WordsRange> targetSegments(leftGap.segment, rightGap.segment);
return getFlipUpdateScore(sourceSegments, targetSegments);
}
void Josiah::PosFeatureFunction::getCurrentTargetTags(TagSequence& tags) const
{
getPosTags(getSample().GetTargetWords(), tags, m_targetFactorType);
}
bool Josiah::SourceVerbPredicate::operator ( )( const Factor * tag )
{
const string& tagString = tag->GetString();
//This works for TreeTagger de
return !tagString.empty() && tagString[0] == 'V';
}
bool Josiah::TargetVerbPredicate::operator ( )( const Factor * tag )
{
//This is for lopar en
const string& tagString = tag->GetString();
//cerr << tagString << " " << (tagString.length() > 1 && !tagString.compare(0,2,"md")) << endl;
return (!tagString.empty() && tagString[0] == 'v') || (tagString.length() > 1 && !tagString.compare(0,2,"md"));
}
VerbDifferenceFeature::VerbDifferenceFeature(FactorType sourceFactorType, FactorType targetFactorType) :
m_sourceFactorType(sourceFactorType),
m_targetFactorType(targetFactorType)
{}
FeatureFunctionHandle VerbDifferenceFeature::getFunction( const Sample& sample ) const {
return FeatureFunctionHandle(new VerbDifferenceFeatureFunction(sample, m_sourceFactorType, m_targetFactorType));
}
float Josiah::VerbDifferenceFeatureFunction::computeScore( const TagSequence & sourceTags, const TagSequence & targetTags ) const
{
SourceVerbPredicate svp;
int sourceVerbs = (int)count_if(sourceTags.begin(), sourceTags.end(), svp);
TargetVerbPredicate tvp;
int targetVerbs = (int)count_if(targetTags.begin(), targetTags.end(), tvp);
//cerr << "ComputeScore: source " << sourceVerbs << " target: " << targetVerbs << endl;
return targetVerbs - sourceVerbs;
}
float Josiah::VerbDifferenceFeatureFunction::getSingleUpdateScore(
const Moses::WordsRange& sourceSegment, const Moses::WordsRange& targetSegment,
const TagSequence& newTargetTags) const
{
TargetVerbPredicate tvp;
int targetVerbs = (int)count_if(newTargetTags.begin(), newTargetTags.end(), tvp);
//cerr << "SingleUpdate: new " << newTargetVerbs << " old " << oldTargetVerbs << endl;
return targetVerbs;
}
}

135
josiah/Pos.h Normal file
View File

@ -0,0 +1,135 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <algorithm>
#include <iostream>
#include "TypeDef.h"
#include "FeatureFunction.h"
#include "Gibbler.h"
namespace Josiah {
typedef std::vector<const Moses::Factor*> TagSequence;
class Sample;
/**
* Abstract base class for feature functions which use source/target pos tags.
**/
class PosFeatureFunction : public SingleValuedFeatureFunction {
public:
PosFeatureFunction(const Sample& sample,
const std::string& name, Moses::FactorType sourceFactorType, Moses::FactorType targetFactorType)
: SingleValuedFeatureFunction(sample,name), m_sourceFactorType(sourceFactorType), m_targetFactorType(targetFactorType) {
//assert(targetFactorType < StaticData::Instance().GetMaxNumFactors(Output));
//assert(sourceFactorType < StaticData::Instance().GetMaxNumFactors(Input));
}
//These methods must be implemented by a subclass
/** Full score of sample*/
virtual float computeScore(const TagSequence& sourceTags, const TagSequence& targetTags) const = 0;
/**Change in score when updating one segment*/
virtual float getSingleUpdateScore(const Moses::WordsRange& sourceSegment, const Moses::WordsRange& targetSegment,
const TagSequence& newTargetTags) const = 0;
/**Change in score when flipping two segments. Note that both pairs are in target order */
virtual float getFlipUpdateScore(const std::pair<Moses::WordsRange,Moses::WordsRange>& sourceSegments,
const std::pair<Moses::WordsRange,Moses::WordsRange>& targetSegments) const = 0;
/** All tags */
void getCurrentTargetTags(TagSequence& tags) const;
/** Compute full score of a sample from scratch **/
virtual float computeScore();
/** Change in score when updating one segment */
virtual float getSingleUpdateScore(const Moses::TranslationOption* option, const TargetGap& gap);
/** Change in score when updating two segments **/
virtual float getContiguousPairedUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap);
virtual float getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap);
/** Change in score when flipping */
virtual float getFlipUpdateScore(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap);
virtual ~PosFeatureFunction() {}
protected:
const TagSequence& getSourceTags() {return m_sourceTags;}
private:
TagSequence m_sourceTags;
FactorType m_sourceFactorType;
FactorType m_targetFactorType;
};
//FIXME: These should be configurable because they will change for different tag sets.
struct SourceVerbPredicate {
bool operator()(const Factor* tag);
};
struct TargetVerbPredicate {
bool operator()(const Factor* tag);
};
/**
* Feature which counts the difference between the verb counts on each side (target-source).
**/
class VerbDifferenceFeature : public Feature {
public:
VerbDifferenceFeature(FactorType sourceFactorType, FactorType targetFactorType);
virtual FeatureFunctionHandle getFunction( const Sample& sample ) const;
private:
FactorType m_sourceFactorType;
FactorType m_targetFactorType;
};
class VerbDifferenceFeatureFunction: public PosFeatureFunction {
public:
VerbDifferenceFeatureFunction(const Sample& sample, FactorType sourceFactorType, FactorType targetFactorType) :
PosFeatureFunction(sample, "VerbDifference", sourceFactorType, targetFactorType) {}
virtual float computeScore(const TagSequence& sourceTags, const TagSequence& targetTags) const;
virtual float getSingleUpdateScore (
const Moses::WordsRange& sourceSegment, const Moses::WordsRange& targetSegment,
const TagSequence& newTargetTags) const;
virtual float getFlipUpdateScore(const std::pair<Moses::WordsRange,Moses::WordsRange>& sourceSegments,
const std::pair<Moses::WordsRange,Moses::WordsRange>& targetSegments) const
{return 0;} //flipping can't change the verb difference
virtual ~VerbDifferenceFeatureFunction() {}
private:
};
}

View File

@ -0,0 +1,156 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "PosProjectionFeature.h"
using namespace std;
using namespace Moses;
namespace Josiah {
void PosProjectionFeatureFunction::updateTarget() {
m_tagProjection.clear();
for (const Hypothesis* currHypo = getSample().GetTargetTail()->GetNextHypo();
currHypo != NULL; currHypo = currHypo->GetNextHypo()) {
const Phrase* sourcePhrase = currHypo->GetSourcePhrase();
for (Phrase::const_iterator i = sourcePhrase->begin();
i != sourcePhrase->end(); ++i) {
m_tagProjection.push_back(i->GetFactor(m_sourceFactorType));
//cerr << " " << i->GetFactor(0)->GetString();
}
}
}
PosProjectionBigramFeature::PosProjectionBigramFeature(Moses::FactorType sourceFactorType,const std::string& tags):
m_sourceFactorType(sourceFactorType)
{
if (tags != "*") {
vector<string> tagList = Tokenize(tags, ",");
copy(tagList.begin(), tagList.end(), inserter(m_tags, m_tags.end()));
VERBOSE(1, "PosProjectionBigramFeature configured with " << m_tags.size() << " tags" << endl);
} else {
VERBOSE(1, "PosProjectionBigramFeature will consider all tags" << endl);
}
}
FeatureFunctionHandle PosProjectionBigramFeature::getFunction( const Sample& sample ) const {
return FeatureFunctionHandle(new PosProjectionBigramFeatureFunction(sample, m_sourceFactorType, m_tags));
}
const string PosProjectionBigramFeatureFunction::ROOT = "ppf";
PosProjectionBigramFeatureFunction::PosProjectionBigramFeatureFunction
(const Sample& sample, Moses::FactorType sourceFactorType,const set<string>& tags) :
PosProjectionFeatureFunction(sample, sourceFactorType), m_tags(tags){}
void PosProjectionBigramFeatureFunction::countBigrams
(const TagSequence& tagSequence, FVector& counts) {
//cerr << "Tag bigrams ";
for (TagSequence::const_iterator tagIter = tagSequence.begin();
tagIter+1 != tagSequence.end(); ++tagIter) {
const string& currTag = (*tagIter)->GetString();
if (m_tags.size() && m_tags.find(currTag) == m_tags.end()) continue;
const string& nextTag = (*(tagIter+1))->GetString();
if (m_tags.size() && m_tags.find(nextTag) == m_tags.end()) continue;
FName name(ROOT, currTag + ":" + nextTag);
++counts[name];
}
//cerr << endl;
}
/** Assign the total score of this feature on the current hypo */
void PosProjectionBigramFeatureFunction::assignScore(FVector& scores) {
countBigrams(getCurrentTagProjection(),scores);
}
/** Score due to one segment */
void PosProjectionBigramFeatureFunction::doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores)
{
//no change
}
/** Score due to two segments. The left and right refer to the target positions.**/
void PosProjectionBigramFeatureFunction::doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores)
{
//no change
}
void PosProjectionBigramFeatureFunction::doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
{
//no change
}
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
void PosProjectionBigramFeatureFunction::doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
{
bool contiguous = (leftGap.segment.GetEndPos()+1 == rightGap.segment.GetStartPos());
//changes the projection, so recalculate
TagSequence tagProjection;
if (leftGap.leftHypo->GetPrevHypo()) {
//include word to left of gap
const Phrase* leftPhrase = leftGap.leftHypo->GetSourcePhrase();
tagProjection.push_back(leftPhrase->GetWord(leftPhrase->GetSize()-1).GetFactor(sourceFactorType()));
}
//include words to go in left gap
for (Phrase::const_iterator i = leftOption->GetSourcePhrase()->begin();
i != leftOption->GetSourcePhrase()->end(); ++i) {
tagProjection.push_back(i->GetFactor(sourceFactorType()));
}
if (contiguous) {
//include words to go in right gap
for (Phrase::const_iterator i = rightOption->GetSourcePhrase()->begin();
i != rightOption->GetSourcePhrase()->end(); ++i) {
tagProjection.push_back(i->GetFactor(sourceFactorType()));
}
//and word to right of gap
if (rightGap.rightHypo) {
const Phrase* rightPhrase = rightGap.rightHypo->GetSourcePhrase();
tagProjection.push_back(rightPhrase->GetWord(0).GetFactor(sourceFactorType()));
}
} else {
//word to right of left gap
const Phrase* rightPhrase = leftGap.rightHypo->GetSourcePhrase();
tagProjection.push_back(rightPhrase->GetWord(0).GetFactor(sourceFactorType()));
}
countBigrams(tagProjection,scores);
if (!contiguous) {
//right gap
tagProjection.clear();
//word to the left
const Phrase* leftPhrase = rightGap.leftHypo->GetSourcePhrase();
tagProjection.push_back(leftPhrase->GetWord(leftPhrase->GetSize()-1).GetFactor(sourceFactorType()));
//words to go in right gap
for (Phrase::const_iterator i = rightOption->GetSourcePhrase()->begin();
i != rightOption->GetSourcePhrase()->end(); ++i) {
tagProjection.push_back(i->GetFactor(sourceFactorType()));
}
if (rightGap.rightHypo) {
const Phrase* rightPhrase = rightGap.rightHypo->GetSourcePhrase();
tagProjection.push_back(rightPhrase->GetWord(0).GetFactor(sourceFactorType()));
}
countBigrams(tagProjection,scores);
}
}
}

View File

@ -0,0 +1,94 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include "FeatureFunction.h"
#include "Gibbler.h"
#include "Pos.h"
namespace Josiah {
/**
* Features derived from projection of source pos tags onto target.
**/
class PosProjectionFeatureFunction: public FeatureFunction {
public:
PosProjectionFeatureFunction(const Sample& sample, Moses::FactorType sourceFactorType)
: FeatureFunction(sample),
m_sourceFactorType(sourceFactorType) {}
/** Update the target words.*/
virtual void updateTarget();
virtual ~PosProjectionFeatureFunction() {}
protected:
/** All projected tags */
const TagSequence& getCurrentTagProjection() const
{ return m_tagProjection;}
Moses::FactorType sourceFactorType() const {return m_sourceFactorType;}
private:
Moses::FactorType m_sourceFactorType;
TagSequence m_tagProjection;
};
class PosProjectionBigramFeature : public Feature {
public:
PosProjectionBigramFeature(Moses::FactorType sourceFactorType,const std::string& tags);
virtual FeatureFunctionHandle getFunction( const Sample& sample ) const;
private:
Moses::FactorType m_sourceFactorType;
std::set<std::string> m_tags; //tags to be considered - empty means consider all tags
};
class PosProjectionBigramFeatureFunction : public PosProjectionFeatureFunction {
public:
PosProjectionBigramFeatureFunction(const Sample& sample, Moses::FactorType sourceFactorType,const std::set<std::string>& tags);
/** Assign the total score of this feature on the current hypo */
virtual void assignScore(FVector& scores);
/** Score due to one segment */
virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores);
/** Score due to two segments. The left and right refer to the target positions.**/
virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores);
virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) ;
private:
static const std::string ROOT;
//Count the bigrams in the given tag sequence
void countBigrams(const TagSequence& tagSequence, FVector& counts);
const std::set<std::string>& m_tags; //tags to be considered - empty means consider all tags
};
}

View File

@ -0,0 +1,292 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <stdexcept>
#include <fstream>
#include <sstream>
#include <boost/lexical_cast.hpp>
#include "ReorderingFeature.h"
#include "Gibbler.h"
#include "Util.h"
using namespace Moses;
using namespace std;
using boost::lexical_cast;
namespace Josiah {
string ReorderingFeatureTemplate::BOS = "<s>";
ReorderingFeature::ReorderingFeature(const vector<string>& msd,
const std::vector<std::string>& msdVocab)
{
const static string SOURCE = "source";
const static string TARGET = "target";
const static string PREV = "prev";
const static string CURR = "curr";
for (vector<string>::const_iterator i = msdVocab.begin(); i != msdVocab.end();
++i) {
vector<string> msdVocabConfig = Tokenize(*i,":");
if (msdVocabConfig.size() != 3) {
ostringstream errmsg;
errmsg << "msdvocab configuration '" << *i << "' has incorrect format";
throw runtime_error(errmsg.str());
}
size_t factorId = lexical_cast<size_t>(msdVocabConfig[0]);
bool source = true;
if (msdVocabConfig[1] == TARGET) {
source = false;
} else if (msdVocabConfig[1] != SOURCE) {
throw runtime_error("msd vocab config has invalid source/target identifier");
}
string filename = msdVocabConfig[2];
vocab_t* vocab = NULL;
if (source) {
vocab = &(m_sourceVocabs[factorId]);
} else {
vocab = &(m_targetVocabs[factorId]);
}
loadVocab(filename,vocab);
}
for (vector<string>::const_iterator i = msd.begin(); i != msd.end(); ++i) {
vector<string> msdConfig = Tokenize(*i,":");
if (msdConfig.size() != 4) {
ostringstream errmsg;
errmsg << "msd configuration '" << *i << "' has incorrect format";
throw runtime_error(errmsg.str());
}
size_t factorId = lexical_cast<size_t>(msdConfig[1]);
bool source = true;
if (msdConfig[2] == TARGET) {
source = false;
} else if (msdConfig[2] != SOURCE) {
throw runtime_error("msd config has invalid source/target identifier");
}
bool curr = true;
if (msdConfig[3] == PREV) {
curr = false;
} else if (msdConfig[3] != CURR) {
throw runtime_error("msd config has invalid curr/prev identifier");
}
if (msdConfig[0] == "edge") {
m_templates.push_back(new EdgeReorderingFeatureTemplate(factorId,source,curr));
} else {
ostringstream errmsg;
errmsg << "Unknown msd feature type '" << msdConfig[0] << "'" << endl;
throw runtime_error(errmsg.str());
}
//set vocabulary, if necessary
vocab_t* vocab = NULL;
if (source) {
if (m_sourceVocabs.find(factorId) != m_sourceVocabs.end()) {
vocab = &(m_sourceVocabs[factorId]);
}
} else {
if (m_targetVocabs.find(factorId) != m_targetVocabs.end()) {
vocab = &(m_targetVocabs[factorId]);
}
}
m_templates.back()->setVocab(vocab);
}
}
FeatureFunctionHandle ReorderingFeature::getFunction(const Sample& sample) const {
return FeatureFunctionHandle(new ReorderingFeatureFunction(sample, *this));
}
const std::vector<ReorderingFeatureTemplate*>& ReorderingFeature::getTemplates() const {
return m_templates;
}
void ReorderingFeature::loadVocab(string filename, vocab_t* vocab) {
VERBOSE(1, "Loading vocabulary for reordering feature from " << filename << endl);
vocab->clear();
ifstream in(filename.c_str());
if (!in) {
ostringstream errmsg;
errmsg << "Unable to load vocabulary from " << filename;
throw runtime_error(errmsg.str());
}
string line;
while (getline(in,line)) {
vocab->insert(line);
}
}
bool ReorderingFeatureTemplate::checkVocab(const std::string& word) const {
if (!m_vocab) return true;
return m_vocab->find(word) != m_vocab->end();
}
ReorderingFeatureFunction::ReorderingFeatureFunction(const Sample& sample, const ReorderingFeature& parent)
: FeatureFunction(sample), m_parent(parent)
{}
/** Assign features for the following options, assuming they are contiguous on the target side */
void ReorderingFeatureFunction::assign(const TranslationOption* prevOption, const TranslationOption* currOption, FVector& scores) {
for (vector<ReorderingFeatureTemplate*>::const_iterator i = m_parent.getTemplates().begin();
i != m_parent.getTemplates().end(); ++i) {
(*i)->assign(prevOption,currOption,getMsd(prevOption, currOption), scores);
}
}
const string& ReorderingFeatureFunction::getMsd(const TranslationOption* prevOption, const TranslationOption* currOption) {
int prevStart = -1;
int prevEnd = -1;
if (prevOption) {
prevStart = prevOption->GetSourceWordsRange().GetStartPos();
prevEnd = prevOption->GetSourceWordsRange().GetEndPos();
}
int currStart = currOption->GetSourceWordsRange().GetStartPos();
int currEnd = currOption->GetSourceWordsRange().GetEndPos();
static string monotone = "msd:m";
static string swap = "msd:s";
static string discontinuous = "msd:d";
if (prevEnd + 1 == currStart) {
return monotone;
} else if (currEnd + 1 == prevStart) {
return swap;
} else {
return discontinuous;
}
}
void EdgeReorderingFeatureTemplate::assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption,
const std::string& prefix, FVector& scores)
{
static const string sourcePrev = "s:p:";
static const string sourceCurr = "s:c:";
static const string targetPrev = "t:p:";
static const string targetCurr = "t:c:";
const Word* edge = NULL;
const string* position = NULL;
if (m_source && m_curr) {
edge = &(currOption->GetSourcePhrase()->GetWord(0));
position = &sourceCurr;
} else if (m_source && !m_curr) {
if (prevOption) {
const Phrase* sourcePhrase = prevOption->GetSourcePhrase();
edge = &(sourcePhrase->GetWord(sourcePhrase->GetSize()-1));
}
position = &sourcePrev;
} else if (!m_source && m_curr) {
edge = &(currOption->GetTargetPhrase().GetWord(0));
position = &targetCurr;
} else {
if (prevOption) {
const Phrase& targetPhrase = prevOption->GetTargetPhrase();
edge = &(targetPhrase.GetWord(targetPhrase.GetSize()-1));
}
position = &targetPrev;
}
ostringstream namestr;
namestr << *position;
namestr << m_factor;
namestr << ":";
if (edge) {
const string& word = edge->GetFactor(m_factor)->GetString();
if (!checkVocab(word)) return;
namestr << word;
} else {
namestr << BOS;
}
FName name(prefix,namestr.str());
++scores[name];
}
/** Assign the total score of this feature on the current hypo */
void ReorderingFeatureFunction::assignScore(FVector& scores)
{
const Hypothesis* currHypo = getSample().GetTargetTail();
const TranslationOption* prevOption = NULL;
while ((currHypo = (currHypo->GetNextHypo()))) {
const TranslationOption* currOption = &(currHypo->GetTranslationOption());
assign(prevOption,currOption,scores);
prevOption = currOption;
}
}
/** Score due to one segment */
void ReorderingFeatureFunction::doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores)
{
if (gap.leftHypo) {
assign(&(gap.leftHypo->GetTranslationOption()), option, scores);
}
if (gap.rightHypo) {
assign(option,&(gap.rightHypo->GetTranslationOption()), scores);
}
}
/** Score due to two segments. The left and right refer to the target positions.**/
void ReorderingFeatureFunction::doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores)
{
if (gap.leftHypo) {
assign(&(gap.leftHypo->GetTranslationOption()), leftOption,scores);
}
assign(leftOption,rightOption,scores);
if (gap.rightHypo) {
assign(rightOption, &(gap.rightHypo->GetTranslationOption()), scores);
}
}
void ReorderingFeatureFunction::doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
{
if (leftGap.leftHypo) {
assign(&(leftGap.leftHypo->GetTranslationOption()),leftOption,scores);
}
assign(leftOption, &(leftGap.rightHypo->GetTranslationOption()), scores);
assign(&(rightGap.leftHypo->GetTranslationOption()),rightOption,scores);
if (rightGap.rightHypo) {
assign(rightOption, &(rightGap.rightHypo->GetTranslationOption()),scores);
}
}
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
void ReorderingFeatureFunction::doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores)
{
if (leftGap.segment.GetEndPos()+1 == rightGap.segment.GetStartPos()) {
TargetGap gap(leftGap.leftHypo, rightGap.rightHypo, WordsRange(leftGap.segment.GetStartPos(),rightGap.segment.GetEndPos()));
doContiguousPairedUpdate(leftOption,rightOption,gap,scores);
} else {
doDiscontiguousPairedUpdate(leftOption,rightOption,leftGap,rightGap,scores);
}
}
}

137
josiah/ReorderingFeature.h Normal file
View File

@ -0,0 +1,137 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <boost/unordered_set.hpp>
#include "FeatureFunction.h"
namespace Josiah {
typedef boost::unordered_set<std::string> vocab_t;
/**
* Used to define different types of reordering features.
**/
class ReorderingFeatureTemplate {
public:
ReorderingFeatureTemplate(): m_vocab(NULL) {}
static std::string BOS;
virtual void assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption,
const std::string& prefix, FVector& scores) = 0;
void setVocab(vocab_t* vocab) {m_vocab = vocab;}
bool checkVocab(const std::string& word) const ;
virtual ~ReorderingFeatureTemplate() {}
private:
vocab_t* m_vocab;
};
class EdgeReorderingFeatureTemplate : public ReorderingFeatureTemplate {
public:
EdgeReorderingFeatureTemplate(size_t factor, bool source, bool curr) : m_factor(factor), m_source(source), m_curr(curr) {}
virtual void assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption,
const std::string& prefix, FVector& scores);
private:
size_t m_factor;
bool m_source; //source or target?
bool m_curr; //curr of prev?
};
/**
* Features related to the ordering between segments.
**/
class ReorderingFeature : public Feature {
public:
/**
* The msd vector will indicate which types of msd features are to be included. Each element is made
* up of four parts, separated by colons. The fields are:
* type: The type of feature (currently only edge is supported)
* factor_id: An integer representing the factor
* source/target: One of two possible values indicating whether the
* source or target words are used.
* prev/curr: Indicates whether the feature uses the previous or
current segment
*
* The msdVocab configuration items specify a vocabulary file for
* the source or target of a given factor. The format of these config
* strings is factor_id:source/target:filename
*
*/
ReorderingFeature(const std::vector<std::string>& msd,
const std::vector<std::string>& msdVocab);
virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
const std::vector<ReorderingFeatureTemplate*>& getTemplates() const;
private:
std::vector<ReorderingFeatureTemplate*> m_templates;
std::map<size_t,vocab_t> m_sourceVocabs;
std::map<size_t,vocab_t > m_targetVocabs;
void loadVocab(std::string filename, vocab_t* vocab);
};
class ReorderingFeatureFunction : public FeatureFunction {
public:
ReorderingFeatureFunction(const Sample& sample, const ReorderingFeature& parent);
/** Assign the total score of this feature on the current hypo */
virtual void assignScore(FVector& scores);
/** Score due to one segment */
virtual void doSingleUpdate(const TranslationOption* option, const TargetGap& gap, FVector& scores);
/** Score due to two segments. The left and right refer to the target positions.**/
virtual void doContiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores);
virtual void doDiscontiguousPairedUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
virtual void doFlipUpdate(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
private:
const ReorderingFeature& m_parent;
/** Assign features for the following tow options, assuming they are contiguous on the target side */
void assign(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption, FVector& scores);
/** Monotone, swapped or discontinuous? The segments are assumed to have contiguous translations on the target side. */
const std::string& getMsd(const Moses::TranslationOption* prevOption, const Moses::TranslationOption* currOption);
};
}

View File

@ -0,0 +1,19 @@
#include "SampleCollector.h"
#include "Gibbler.h"
#include "GibbsOperator.h"
using namespace std;
namespace Josiah {
void PrintSampleCollector::collect(Sample& sample) {
cout << "Sampled hypothesis: \"";
sample.GetSampleHypothesis()->ToStream(cout);
cout << "\"" << " " << "Feature values: " << sample.GetFeatureValues() << endl;
}
void SampleCollector::addSample( Sample & sample) {
collect(sample);
++m_n;
}
}

41
josiah/SampleCollector.h Normal file
View File

@ -0,0 +1,41 @@
#pragma once
#include <vector>
namespace Josiah {
class Sample;
/**
* Used by the operators to collect samples, for example to count ngrams, or just to print
* them out.
**/
class SampleCollector {
public:
SampleCollector(): m_n(0) {}
virtual void addSample(Sample& sample);
/** Number of samples */
size_t N() const {return m_n;}
virtual ~SampleCollector() {}
void reset() {
m_n = 0;
}
void SetN(size_t n) { m_n = n;}
protected:
/** The actual collection.*/
virtual void collect(Sample& sample) = 0;
private:
size_t m_n;
};
class PrintSampleCollector : public virtual SampleCollector {
public:
virtual void collect(Sample& sample);
virtual ~PrintSampleCollector() {}
};
}

431
josiah/SampleRank.cpp Normal file
View File

@ -0,0 +1,431 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <algorithm>
#include <functional>
#include <iostream>
#include <iomanip>
#include <fstream>
#ifdef MPI_ENABLED
#include <mpi.h>
#include <boost/mpi/communicator.hpp>
#include <boost/mpi/collectives.hpp>
namespace mpi=boost::mpi;
#endif
#include <boost/program_options.hpp>
#include <boost/algorithm/string.hpp>
#include "Bleu.h"
#include "Decoder.h"
#include "GibbsOperator.h"
#include "Gibbler.h"
#include "InputSource.h"
#include "MpiDebug.h"
#include "OnlineLearner.h"
#include "OnlineTrainingCorpus.h"
#include "PhraseFeature.h"
#include "Sampler.h"
#include "SampleRankSelector.h"
#include "Utils.h"
using namespace std;
using namespace Josiah;
using namespace Moses;
using boost::is_any_of;
namespace po = boost::program_options;
static void MixWeights(size_t size, size_t rank) {
#ifdef MPI_ENABLED
FVector avgWeights;
FVector& currWeights = WeightManager::instance().get();
MPI_VERBOSE(1, "Before mixing, current weights " << currWeights << endl);
mpi::communicator world;
mpi::reduce(world,currWeights,avgWeights, FVectorPlus(),0);
if (rank == 0) {
avgWeights /= size;
}
mpi::broadcast(world,avgWeights,0);
WeightManager::instance().get() = avgWeights;
MPI_VERBOSE(1, "After mixing, current weights: " << avgWeights << endl);
#endif
}
int main(int argc, char** argv) {
int rank = 0, size = 1;
#ifdef MPI_ENABLED
MPI_Init(&argc,&argv);
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Comm_rank(comm,&rank);
MPI_Comm_size(comm,&size);
cerr << "MPI rank: " << rank << endl;
cerr << "MPI size: " << size << endl;
#endif
size_t iterations;
string feature_file;
int debug;
int mpidebug;
string mpidebugfile;
int burning_its;
string inputfile;
string mosesini;
bool help;
uint32_t seed;
string weightfile;
vector<string> ref_files;
size_t batchLines;
size_t epochLines;
size_t epochs;
string weight_dump_stem;
size_t weight_dump_batches;
size_t weight_dump_samples;
bool weight_dump_current;
size_t lag;
string learnerName;
bool chiang_target;
bool always_update;
bool update_target;
float cwInitialVariance, cwConfidence;
float perceptron_lr;
float fixed_temperature;
float fixed_temperature_scaling;
bool slack_rescaling, scale_loss_by_target_gain;
vector<float> burnin_anneal;
bool closestBestNeighbour;
bool approxDocBleu;
float approxDocBleuDecay;
bool fix_margin;
float margin, slack;
float tolerance;
bool ignoreUWP;
bool disableUWP;
bool l1Normalise, l2Normalise;
float norm, scale_margin;
float flip_prob, merge_split_prob, retrans_prob;
size_t merge_split_toptions, retrans_toptions;
bool enable_trans_options_cache;
bool use_alignment_info;
po::options_description desc("Allowed options");
desc.add_options()
("help",po::value( &help )->zero_tokens()->default_value(false), "Print this help message and exit")
("config,f",po::value<string>(&mosesini),"Moses ini file")
("verbosity,v", po::value<int>(&debug)->default_value(0), "Verbosity level")
("mpi-debug-level", po::value<int>(&MpiDebug::verbosity)->default_value(0), "Verbosity level for debugging messages used in mpi.")
("mpi-debug-file", po::value<string>(&mpidebugfile), "Debug file stem for use by mpi processes")
("random-seed,e", po::value<uint32_t>(&seed), "Random seed")
("iterations,s", po::value<size_t>(&iterations)->default_value(10),
"Number of sampler iterations")
("burn-in,b", po::value<int>(&burning_its)->default_value(1), "Duration (in sampling iterations) of burn-in period")
("input-file,i",po::value<string>(&inputfile),"Input file containing tokenised source")
("weights,w",po::value<string>(&weightfile),"Weight file")
("ref,r", po::value<vector<string> >(&ref_files), "Reference translation files for training")
("extra-feature-config,X", po::value<string>(&feature_file), "Configuration file for extra (non-Moses) features")
("batch-lines", po::value<size_t>(&batchLines)->default_value(1), "Number of lines in each training batch")
("epoch-lines", po::value<size_t>(&epochLines)->default_value(1000), "Number of lines in each epoch")
("epochs", po::value<size_t>(&epochs)->default_value(1), "Number of training epochs")
("weight-dump-stem", po::value<string>(&weight_dump_stem)->default_value(""), "Stem of filename to use for dumping weights - leave empty for no dumping")
("weight-dump-batches", po::value<size_t>(&weight_dump_batches)->default_value(0), "Number of batches to process before dumping weights")
("weight-dump-samples", po::value<size_t>(&weight_dump_samples)->default_value(0), "Number of samples to process before dumping weights")
("weight-dump-current",po::value<bool>(&weight_dump_current)->zero_tokens()->default_value(false), "Dump the current weights, instead of the averaged weights")
("lag", po::value<size_t>(&lag)->default_value(1), "How often to collect weight updates for the average weights.")
("learner", po::value(&learnerName)->default_value("perceptron"), "Use this online learner")
("always-update", po::value<bool>(&always_update)->zero_tokens()->default_value(false),
"Always call the update, even if ranking is correct")
("update-target", po::value<bool>(&update_target)->zero_tokens()->default_value(false),
"Update towards target, not chosen")
("chiang-target", po::value<bool>(&chiang_target)->zero_tokens()->default_value(false),
"Use Chiang's gain+score to choose the target")
("cw-initial-variance", po::value<float>(&cwInitialVariance)->default_value(1.0f), "Initial variance for CW Learning")
("cw-confidence", po::value<float>(&cwConfidence)->default_value(1.644854f), "Initial confidence value for CW Learning, use value in probit([0.5,1.0])")
("perc-lr", po::value<float>(&perceptron_lr)->default_value(1.0f), "Perceptron learning rate")
("use-slack-rescaling", po::value<bool>(&slack_rescaling)->zero_tokens()->default_value(false), "Use slack rescaling in mira (default is margin rescaling)")
("scale-loss-by-target-gain", po::value<bool>(&scale_loss_by_target_gain)->zero_tokens()->default_value(false), "Scale the loss by the target gain")
("fixed-temperature", po::value<float>(&fixed_temperature)->default_value(1.0f), "Temperature for fixed temp sample acceptor")
("scale-fixed-temperature", po::value<float>(&fixed_temperature_scaling)->default_value(1.0f), "Scaling applied to fixed temperature at the end of an epoch")
("burnin-anneal", po::value<vector<float> >(&burnin_anneal)->multitoken(), "Specify (start stop floor ratio) for burnin annealing")
("closest-best-neighbour", po::value(&closestBestNeighbour)->zero_tokens()->default_value(false), "Closest best neighbour")
("use-approx-doc-bleu", po::value(&approxDocBleu)->zero_tokens()->default_value(false), "Compute approx doc bleu as gain")
("approx-doc-bleu-decay", po::value<float>(&approxDocBleuDecay)->default_value(0.9), "Decay factor for approx doc bleu")
("fix-margin", po::value(&fix_margin)->zero_tokens()->default_value(false), "Do MIRA update with a specified margin")
("margin", po::value<float>(&margin)->default_value(1.0f), "Margin size")
("slack", po::value<float>(&slack)->default_value(-1.0f), "Slack")
("tolerance", po::value<float>(&tolerance)->default_value(0.0f), "Difference between chosen bleu and target bleu must be greater than this to force a weight update")
("ignore-uwp", po::value<bool>(&ignoreUWP)->zero_tokens()->default_value(false), "Ignore unknown word penalty weight when training")
("disable-uwp", po::value<bool>(&disableUWP)->zero_tokens()->default_value(false), "Disable the unknown word penalty weight when training")
("l1normalise", po::value<bool>(&l1Normalise)->zero_tokens()->default_value(false), "L1normalise weight vector during MIRA samplerank training")
("l2normalise", po::value<bool>(&l2Normalise)->zero_tokens()->default_value(false), "L2normalise weight vector during MIRA samplerank training")
("norm", po::value<float>(&norm)->default_value(1.0f), "Normalise weight vector to this value")
("margin-scale", po::value<float>(&scale_margin)->default_value(1.0f), "Scale margin by this factor")
("enable-trans-options-cache", po::value<bool>(&enable_trans_options_cache)->zero_tokens()->default_value(false), "Enable the translation options cache")
("flip-prob", po::value<float>(&flip_prob)->default_value(0.6f), "Probability of applying flip operator during random scan")
("merge-split-prob", po::value<float>(&merge_split_prob)->default_value(0.2f), "Probability of applying merge-split operator during random scan")
("retrans-prob", po::value<float>(&retrans_prob)->default_value(0.2f), "Probability of applying retrans operator during random scan")
("merge-split-toptions", po::value<size_t>(&merge_split_toptions)->default_value(20), "Maximum number of translation options for merge-split")
("retrans-toptions", po::value<size_t>(&retrans_toptions)->default_value(20), "Maximum number of translation options for retrans")
("use-alignment-info",po::value<bool>(&use_alignment_info)->zero_tokens()->default_value(false), "Load the alignment info from the phrase table")
;
po::options_description cmdline_options;
cmdline_options.add(desc);
po::variables_map vm;
po::store(po::command_line_parser(argc,argv).
options(cmdline_options).run(), vm);
po::notify(vm);
if (help) {
std::cout << "Usage: " + string(argv[0]) + " -f mosesini-file [options]" << std::endl;
std::cout << desc << std::endl;
return 0;
}
if (weightfile.empty()) {
std::cerr << "Setting all feature weights to zero" << std::endl;
WeightManager::init();
} else {
std::cerr << "Loading feature weights from " << weightfile << std::endl;
WeightManager::init(weightfile);
}
if (mosesini.empty()) {
cerr << "Error: No moses ini file specified" << endl;
return 1;
}
if (mpidebugfile.length()) {
MpiDebug::init(mpidebugfile,rank);
}
float opProb = flip_prob + merge_split_prob + retrans_prob;
if (fabs(1.0 - opProb) > 0.00001) {
std::cerr << "Incorrect usage: specified operator probs should sum up to 1" << std::endl;
return 0;
}
if (burnin_anneal.size() && burnin_anneal.size() != 4) {
cerr << "Error: --burnin-anneal requires 4 arguments" << endl;
return 1;
}
if (weight_dump_stem.size() && !weight_dump_samples && !weight_dump_batches) {
cerr << "Error: If weight_dump_stem is set then must specify either " << endl;
cerr << " --weight_dump_samples or --weight_dump_batches" << endl;
return 1;
}
if (weight_dump_samples && weight_dump_batches) {
cerr << "Error: Must specify either --weight-dump-samples or --weight-dump-batches" << endl;
return 1;
}
//set up moses
vector<string> extraArgs;
extraArgs.push_back("-ttable-limit");
size_t ttableLimit = max(merge_split_toptions, retrans_toptions);
ostringstream ttableLimitConfig;
ttableLimitConfig << ttableLimit;
extraArgs.push_back(ttableLimitConfig.str());
if (!enable_trans_options_cache) {
extraArgs.push_back("-persistent-cache-size");
extraArgs.push_back("0");
}
if (use_alignment_info) {
extraArgs.push_back("-use-alignment-info");
}
initMoses(mosesini,debug,extraArgs);
FeatureVector features;
FVector coreWeights;
configure_features_from_file(feature_file, features,disableUWP,coreWeights);
std::cerr << "Using " << features.size() << " features" << std::endl;
if (vm.count("random-seed")) {
RandomNumberGenerator::instance().setSeed(seed + rank);
}
auto_ptr<Bleu> bleu(new Bleu());
if (approxDocBleu) {
bleu->SetSmoothingWeight(approxDocBleuDecay);
}
auto_ptr<Gain> gain(bleu);
gain->LoadReferences(ref_files,inputfile);
Sampler sampler;
sampler.SetLag(1); //thinning factor for sample collection
//configure the sampler
MergeSplitOperator mso(merge_split_prob,merge_split_toptions);
FlipOperator fo(flip_prob);
TranslationSwapOperator tso(retrans_prob,retrans_toptions);
sampler.AddOperator(&mso);
sampler.AddOperator(&tso);
sampler.AddOperator(&fo);
//Target Assigner
TargetAssignerHandle tgtAssigner;
if (closestBestNeighbour) {
tgtAssigner.reset(new ClosestBestNeighbourTgtAssigner());
} else if (chiang_target) {
tgtAssigner.reset(new ChiangBestNeighbourTgtAssigner());
}
else {
tgtAssigner.reset(new BestNeighbourTgtAssigner());
}
//Add the learner
OnlineLearnerHandle onlineLearner;
if (learnerName == "perceptron") {
boost::shared_ptr<PerceptronLearner> perceptron(new PerceptronLearner());
perceptron->setLearningRate(perceptron_lr);
onlineLearner = perceptron;
} else if (learnerName == "mira+") {
boost::shared_ptr<MiraPlusLearner> mp(new MiraPlusLearner());
mp->setSlack(slack);
mp->setMarginScale(scale_margin);
mp->setFixMargin(fix_margin);
mp->setMargin(margin);
onlineLearner = mp;
} else if (learnerName == "mira") {
boost::shared_ptr<MiraLearner> m(new MiraLearner());
m->setSlack(slack);
m->setMarginScale(scale_margin);
m->setFixMargin(fix_margin);
m->setMargin(margin);
m->setUseSlackRescaling(slack_rescaling);
m->setScaleLossByTargetGain(scale_loss_by_target_gain);
onlineLearner = m;
} else {
throw runtime_error("Unknown learner: " + learnerName);
}
sampler.SetIterations(iterations);
sampler.SetBurnIn(burning_its);
OnlineTrainingCorpus trainingCorpus
(inputfile,
batchLines,
epochLines,
epochs*epochLines,
size,
rank);
bool byBatch = false;
size_t weightDumpFrequency = weight_dump_samples;
if (!weightDumpFrequency) {
weightDumpFrequency = weight_dump_batches;
byBatch = true;
}
WeightCollectorHandle weightCollector(
new WeightCollector(weightDumpFrequency,byBatch,weight_dump_stem,size,rank));
weightCollector->SetL1Normalise(l1Normalise);
weightCollector->SetDumpCurrent(weight_dump_current);
weightCollector->SetLag(lag);
while (trainingCorpus.HasMore()) {
vector<string> lines;
vector<size_t> lineNumbers;
bool shouldMix;
trainingCorpus.GetNextBatch(&lines,&lineNumbers, &shouldMix);
//Makes sure that t-options get sorted by the appropriate weights
FVector currentWeights;
if (coreWeights != FVector()) {
currentWeights = coreWeights;
} else if (weightCollector->getBatchCount()) {
currentWeights = weightCollector->getAverageWeights();
} else {
currentWeights = WeightManager::instance().get();
}
setMosesWeights(currentWeights);
//Generate random hypotheses
vector<TranslationHypothesis> translations;
for (size_t i = 0; i < lines.size(); ++i) {
translations.push_back(TranslationHypothesis(lines[i]));
cerr << "Source sentence: " << lines[i] << endl;
cerr << "Seed hypothesis: " << *(translations.back().getHypothesis()) << endl;
}
//The selector for this sentence
GainFunctionHandle gf = gain->GetGainFunction(lineNumbers);
SampleRankSelector selector(gf, onlineLearner, tgtAssigner, weightCollector);
selector.SetTemperature(fixed_temperature);
//burnin annealing
auto_ptr<AnnealingSchedule> annealer;
if (burnin_anneal.size()) {
annealer.reset(new ExponentialAnnealingSchedule
(burnin_anneal[0],burnin_anneal[1], burnin_anneal[2], burnin_anneal[3]));
} else {
//fixed temp
annealer.reset(new ExponentialAnnealingSchedule
(fixed_temperature,fixed_temperature,fixed_temperature,1));
}
selector.SetBurninAnnealer(annealer.get());
selector.SetIgnoreUnknownWordPenalty(ignoreUWP);
selector.SetTolerance(tolerance);
selector.SetAlwaysUpdate(always_update);
selector.SetUpdateTarget(update_target);
sampler.SetSelector(&selector);
sampler.Run(translations,features);
//cerr << "Performed " << onlineLearner->GetNumUpdates() << " updates for this sentence" << endl;
if (size == 1) {
cerr << "Batch count: " << weightCollector->getBatchCount() << endl;
cerr << "Curr Weights : " << WeightManager::instance().get() << endl;
cerr << "Average Weights : " << weightCollector->getAverageWeights() << endl;
} else {
MPI_VERBOSE(1,"Batch count: " << weightCollector->getBatchCount() << endl);
MPI_VERBOSE(1,"Current Weights : " << WeightManager::instance().get() << endl);
MPI_VERBOSE(1,"Average Weights : " << weightCollector->getAverageWeights() << endl);
}
//PhraseFeature::updateWeights(WeightManager::instance().get());
if (approxDocBleu) {
//This sends the smoothing stats from gf to gain, and resets gf's smoothing stats
gf->UpdateSmoothingStats();
}
if (shouldMix) {
MixWeights(size,rank);
fixed_temperature *= fixed_temperature_scaling;
VERBOSE(1,"Fixed temperature scaled by " << fixed_temperature_scaling << " to " << fixed_temperature << endl);
}
weightCollector->endBatch();
}
#ifdef MPI_ENABLED
MPI_Finalize();
#endif
return 0;
}

View File

@ -0,0 +1,402 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "SampleRankSelector.h"
#ifdef MPI_ENABLED
#include <mpi.h>
#include <boost/mpi/communicator.hpp>
#include <boost/mpi/collectives.hpp>
namespace mpi=boost::mpi;
#endif
#include "Gibbler.h"
#include "MpiDebug.h"
using namespace Moses;
using namespace std;
namespace Josiah {
DeltaGain::DeltaGain(const GainFunctionHandle& gainFunction,const SampleVector& samples, size_t sampleId) :
m_gainFunction(gainFunction),
m_samples(samples),
m_sampleId(sampleId) {}
float DeltaGain::operator()(const TDeltaHandle& delta) {
if (m_translations.size() == 0) {
for (size_t i = 0; i < m_samples.size(); ++i) {
Translation translation;
if (i == m_sampleId) {
delta->getNewSentence(translation);
} else {
const vector<Word>& targetWords = m_samples[i]->GetTargetWords();
translation.reserve(targetWords.size());
for (size_t j = 0; j < targetWords.size(); ++j) {
translation.push_back(targetWords[j].GetFactor(0));
}
}
m_translations.push_back(translation);
}
} else {
m_translations[m_sampleId].clear();
delta->getNewSentence(m_translations[m_sampleId]);
}
return m_gainFunction->Evaluate(m_translations);
}
SampleRankSelector::SampleRankSelector(
const GainFunctionHandle& gainFunction,
const OnlineLearnerHandle& onlineLearner,
const TargetAssignerHandle& assigner,
const WeightCollectorHandle& weightCollector) :
m_gainFunction(gainFunction),
m_onlineLearner(onlineLearner),
m_assigner(assigner),
m_weightCollector(weightCollector),
m_burnin(false),
m_ignoreUnknownWordPenalty(false),
m_tolerance(0.0),
m_alwaysUpdate(false),
m_updateTarget(false)
{
m_unknownWordPenaltyName = StaticData::Instance().GetTranslationSystem
(TranslationSystem::DEFAULT).GetUnknownWordPenaltyProducer()
->GetScoreProducerDescription();
}
void SampleRankSelector::SetSamples(const SampleVector& samples) {
m_samples = samples;
m_optimalGain.clear();
m_optimalGain.resize(samples.size());
m_optimalGainSolutionScores.clear();
m_optimalGainSolutionScores.resize(samples.size());
}
void SampleRankSelector::SetIgnoreUnknownWordPenalty(bool ignore) {
m_ignoreUnknownWordPenalty = ignore;
}
void WeightCollector::SetL1Normalise(bool l1normalise) {
m_l1normalise = l1normalise;
}
void SampleRankSelector::SetAlwaysUpdate(bool alwaysUpdate) {
m_alwaysUpdate = alwaysUpdate;
}
void SampleRankSelector::SetUpdateTarget(bool updateTarget) {
m_updateTarget = updateTarget;
}
TDeltaHandle SampleRankSelector::Select(
size_t sampleId,
const TDeltaVector& deltas,
const TDeltaHandle& noChangeDelta,
size_t iteration)
{
//choose by sampling.
if (m_burnin) {
//DeltaGain gain(m_gainFunction,m_samples,sampleId);
TDeltaHandle chosenDelta = m_burninSamplingSelector.Select(sampleId,deltas,noChangeDelta,iteration);
//cerr << "BURN " << gain(chosenDelta) << endl;
return chosenDelta;
}
TDeltaHandle chosenDelta = m_samplingSelector.Select(sampleId,deltas,noChangeDelta,iteration);
DeltaGain gain(m_gainFunction,m_samples,sampleId);
float chosenGain = gain(chosenDelta);
float chosenScore = chosenDelta->getScore();
//oracle
int target = m_assigner->getTarget(deltas, chosenDelta, gain);
if (target == -1) return chosenDelta;
//Only update if necessary, because it can be expensive
if (m_onlineLearner->usesOptimalSolution()) {
UpdateGainOptimalSol(deltas, noChangeDelta, sampleId, gain, target);
}
float targetScore = deltas[target]->getScore();
float targetGain = gain(deltas[target]);
// cerr << "CS " << chosenScore << " TS " << targetScore <<
// " CG " << chosenGain << " TG " << targetGain << endl;
//FVector oldWeights = WeightManager::instance().get();
if (m_alwaysUpdate ||
(chosenScore > targetScore && chosenGain+m_tolerance < targetGain) ||
(chosenScore < targetScore && chosenGain-m_tolerance > targetGain) ) {
FVector chosenScores = chosenDelta->getSample().GetFeatureValues() - noChangeDelta->getScores() + chosenDelta->getScores();
FVector targetScores = chosenDelta->getSample().GetFeatureValues() - noChangeDelta->getScores() + deltas[target]->getScores();
if (m_ignoreUnknownWordPenalty) {
chosenScores[m_unknownWordPenaltyName] = 0;
targetScores[m_unknownWordPenaltyName] = 0;
}
m_onlineLearner->doUpdate(chosenScores,
targetScores,
m_optimalGainSolutionScores[sampleId],
chosenGain,
targetGain,
m_optimalGain[sampleId],
WeightManager::instance().get());
}
//cerr << "WEIGHTS: " << WeightManager::instance().get() << endl;
//cerr << "BLEU: " << chosenGain << endl;
m_weightCollector->updateWeights();
//cerr << "WDIFF " << (WeightManager::instance().get() - oldWeights).l1norm() <<endl;
//For approx doc bleu
const Hypothesis* h = chosenDelta->getSample().GetSampleHypothesis();
vector<const Factor*> trans;
h->GetTranslation(&trans, 0);
m_gainFunction->AddSmoothingStats(sampleId, trans);
if (m_updateTarget) {
return deltas[target];
} else {
return chosenDelta;
}
}
void SampleRankSelector::UpdateGainOptimalSol(
const TDeltaVector& deltas,
const TDeltaHandle& noChangeDelta,
size_t sampleId,
DeltaGain& gain,
int target)
{
if (m_assigner->m_name != "Best") {
//need to find the best solution, since we were
//not using the best neighbour target assigner
BestNeighbourTgtAssigner tgtAssigner;
target = tgtAssigner.getTarget(deltas, noChangeDelta, gain);
}
if (target == -1) return;
float chosenGain = gain(deltas[target]);
if (chosenGain > m_optimalGain[sampleId]) {
m_optimalGain[sampleId] = chosenGain;
m_optimalGainSolutionScores[sampleId] = deltas[target]->getSample().GetFeatureValues();
m_optimalGainSolutionScores[sampleId] += deltas[target]->getScores();
m_optimalGainSolutionScores[sampleId] -= noChangeDelta->getScores();
if (m_ignoreUnknownWordPenalty) {
m_optimalGainSolutionScores[sampleId][m_unknownWordPenaltyName] = 0;
}
VERBOSE(1,"New optimal gain " << m_optimalGain[sampleId] << endl);
}
}
void SampleRankSelector::SetTemperature(float temp) {
m_samplingSelector.SetTemperature(temp);
}
void SampleRankSelector::SetBurninAnnealer(AnnealingSchedule* schedule) {
m_burninSamplingSelector.SetAnnealingSchedule(schedule);
}
void SampleRankSelector::BeginBurnin() {
m_burnin = true;
}
void SampleRankSelector::EndBurnin() {
m_burnin = false;
}
void SampleRankSelector::SetTolerance(float tolerance) {
m_tolerance = tolerance;
}
size_t BestNeighbourTgtAssigner::getTarget(const TDeltaVector& deltas, const TDeltaHandle& noChangeDelta,
DeltaGain& gf) {
//Only do best neighbour for the moment
float bestGain = -1;
int bestGainIndex = -1;
for (TDeltaVector::const_iterator i = deltas.begin(); i != deltas.end(); ++i) {
float gain = gf(*i);
if (gain > bestGain) {
bestGain = gain;
bestGainIndex = i - deltas.begin();
}
}
IFVERBOSE(2) {
if (bestGainIndex > -1) {
cerr << "best nbr has score " << deltas[bestGainIndex]->getScore() << " and gain " << bestGain << endl;
//cerr << "No change has score " << noChangeDelta->getScore() << " and gain " << noChangeDelta->getGain() << endl;
}
}
return bestGainIndex;
}
size_t ClosestBestNeighbourTgtAssigner::getTarget(const TDeltaVector& deltas, const TDeltaHandle& chosenDelta,
DeltaGain& gf) {
//Only do best neighbour for the moment
float minScoreDiff = 10e10;
int closestBestNbr = -1;
float chosenGain = gf(chosenDelta);;
float chosenScore = chosenDelta->getScore();
for (TDeltaVector::const_iterator i = deltas.begin(); i != deltas.end(); ++i) {
if (gf(*i) > chosenGain ) {
float scoreDiff = chosenScore - (*i)->getScore();
if (scoreDiff < minScoreDiff) {
minScoreDiff = scoreDiff;
closestBestNbr = i - deltas.begin();
}
}
}
return closestBestNbr;
}
size_t ChiangBestNeighbourTgtAssigner::getTarget
(const TDeltaVector& deltas, const TDeltaHandle& noChangeDelta, DeltaGain& gf) {
float bestGain = -1e10;
int bestGainIndex = -1;
for (TDeltaVector::const_iterator i = deltas.begin(); i != deltas.end(); ++i) {
float gain = gf(*i) + (*i)->getScore();
if (gain > bestGain) {
bestGain = gain;
bestGainIndex = i - deltas.begin();
}
}
IFVERBOSE(2) {
if (bestGainIndex > -1) {
cerr << "best nbr has score " << deltas[bestGainIndex]->getScore() << " and gain " << bestGain << endl;
}
}
return bestGainIndex;
}
static void DumpWeights(const string& weightDumpStem,
FVector averagedWeights, size_t size, size_t rank) {
static size_t epoch = 0;
#ifdef MPI_ENABLED
MPI_VERBOSE(1, "Before averaging, this node's average weights: " << averagedWeights << endl);
mpi::communicator world;
FVector totalWeights;
mpi::reduce(world, averagedWeights, totalWeights, FVectorPlus(),0);
#endif
if (rank == 0) {
#ifdef MPI_ENABLED
averagedWeights = totalWeights / size;
#endif
MPI_VERBOSE(1, "After averaging, average weights: " << averagedWeights << endl);
ostringstream filename;
filename << weightDumpStem << "_" << epoch;
VERBOSE(1, "Dumping weights for epoch " << epoch << " to " << filename.str() << endl);
averagedWeights.save(filename.str());
}
++epoch;
}
WeightCollector::WeightCollector(size_t frequency, bool byBatch,
const std::string& weightDumpStem, size_t size, size_t rank):
m_frequency(frequency),
m_byBatch(byBatch),
m_weightDumpStem(weightDumpStem),
m_updates(0),m_allUpdates(0),m_batches(0), m_size(size), m_rank(rank),
m_l1normalise(false),m_lag(1), m_dumpCurrent(false)
{
if (m_frequency == 0) m_weightDumpStem = "";
if (weightDumpStem.size()) {
if (byBatch) {
cerr << "Weight dumping by batch, frequency = " << frequency << endl;
} else {
cerr << "Weight dumping by sample, frequency = " << frequency << endl;
}
} else {
cerr << "No weight dumping " << endl;
}
m_unknownWordPenaltyName = StaticData::Instance().GetTranslationSystem(
TranslationSystem::DEFAULT).GetUnknownWordPenaltyProducer()
->GetScoreProducerDescription();
}
void WeightCollector::updateWeights() {
++m_allUpdates;
if (m_allUpdates % m_lag) return;
++m_updates;
FVector weights = WeightManager::instance().get();
VERBOSE(1,"CURR_WEIGHTS " << weights << endl);
if (m_l1normalise) {
float uwp = weights[m_unknownWordPenaltyName];
// if (m_ignoreUnknownWordPenalty) {
weights[m_unknownWordPenaltyName] = 0;
// }
weights /= weights.l1norm();
// if (m_ignoreUnknownWordPenalty) {
weights[m_unknownWordPenaltyName] = uwp;
// }
}
m_totalWeights += weights;
IFVERBOSE(1) {
VERBOSE(1,"AVE_WEIGHTS " << getAverageWeights() << endl);
}
// cerr << "WT " << (WeightManager::instance().get())[FName("PhraseModel_1")] << " ";
// cerr << getAverageWeights() << endl;
if (m_weightDumpStem.length() && !m_byBatch && m_updates % m_frequency == 0) {
if (m_dumpCurrent) {
DumpWeights(m_weightDumpStem, WeightManager::instance().get(), m_size, m_rank);
} else {
DumpWeights(m_weightDumpStem, getAverageWeights(), m_size, m_rank);
}
}
}
void WeightCollector::endBatch() {
++m_batches;
// cerr << "Batch " << m_batches << " rank " << m_rank << endl;
if (m_weightDumpStem.length() && m_byBatch && m_batches % m_frequency == 0) {
DumpWeights(m_weightDumpStem, getAverageWeights(), m_size, m_rank);
}
}
FVector WeightCollector::getAverageWeights() {
assert(m_updates);
return m_totalWeights/m_updates;
}
size_t WeightCollector::getBatchCount() {
return m_batches;
}
void WeightCollector::SetLag(size_t lag) {
if (lag == 0) lag = 1;
m_lag = lag;
}
void WeightCollector::SetDumpCurrent(bool dumpCurrent) {
m_dumpCurrent = dumpCurrent;
}
}

192
josiah/SampleRankSelector.h Normal file
View File

@ -0,0 +1,192 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <boost/shared_ptr.hpp>
#include "Utils.h"
#include "Gain.h"
#include "OnlineLearner.h"
#include "Sampler.h"
#include "Selector.h"
namespace Josiah {
/** Calculates the gain due to a delta.
**/
class DeltaGain {
public:
DeltaGain(const GainFunctionHandle& gainFunction, const SampleVector& samples, size_t sampleId);
float operator()(const TDeltaHandle& delta) ;
private:
const GainFunctionHandle& m_gainFunction;
const SampleVector& m_samples;
size_t m_sampleId;
//cached list of translations
std::vector<Translation> m_translations;
};
/**
* Used to choose the oracle translation hypothesis.
**/
class TargetAssigner {
public:
TargetAssigner(const std::string& name) : m_name(name) {}
virtual ~TargetAssigner(){}
size_t virtual getTarget(const TDeltaVector& deltas, const TDeltaHandle& noChangeDelta,
DeltaGain& gf) = 0;
std::string m_name;
};
class BestNeighbourTgtAssigner : public TargetAssigner {
public:
BestNeighbourTgtAssigner() : TargetAssigner("Best") {}
virtual ~BestNeighbourTgtAssigner(){}
size_t virtual getTarget(const TDeltaVector& deltas, const TDeltaHandle& noChangeDelta,
DeltaGain& gf);
};
class ClosestBestNeighbourTgtAssigner : public TargetAssigner {
public:
ClosestBestNeighbourTgtAssigner(): TargetAssigner("CBN") {}
virtual ~ClosestBestNeighbourTgtAssigner(){}
size_t virtual getTarget(const TDeltaVector& deltas, const TDeltaHandle& noChangeDelta,
DeltaGain& gf);
};
class ChiangBestNeighbourTgtAssigner : public TargetAssigner {
public:
ChiangBestNeighbourTgtAssigner(): TargetAssigner("Chiang"){}
virtual ~ChiangBestNeighbourTgtAssigner(){}
size_t virtual getTarget(const TDeltaVector& deltas, const TDeltaHandle& noChangeDelta,
DeltaGain& gf);
};
typedef boost::shared_ptr<TargetAssigner> TargetAssignerHandle;
/**
* In charge of collecting the weights and writing them to file.
**/
class WeightCollector {
public:
/**
* If frequency is non-zero, then dump weights to file. Count
* by samples, or by batch.
**/
WeightCollector(size_t frequency, bool byBatch,
const std::string& weightDumpStem, size_t size, size_t rank);
void updateWeights();
void endBatch();
FVector getAverageWeights();
size_t getBatchCount();
void SetL1Normalise(bool l1normalise);
void SetLag(size_t lag);
void SetDumpCurrent(bool dumpCurrent);
private:
size_t m_frequency;
bool m_byBatch;
std::string m_weightDumpStem;
FVector m_totalWeights;
size_t m_updates;
size_t m_allUpdates;
size_t m_batches;
size_t m_size;
size_t m_rank;
bool m_l1normalise;
std::string m_unknownWordPenaltyName;
size_t m_lag;
//dump current weights instead of average
bool m_dumpCurrent;
};
typedef boost::shared_ptr<WeightCollector> WeightCollectorHandle;
/**
* Implements the Sample Rank algorithm, by accepting the proposed list of deltas, choosing one,
* updating the weights accordingly, and returning the delta to apply.
**/
class SampleRankSelector : public DeltaSelector {
public:
SampleRankSelector(const GainFunctionHandle& gainFunction,
const OnlineLearnerHandle& onlineLearner,
const TargetAssignerHandle& assigner,
const WeightCollectorHandle& weightCollector);
/** Body of SampleRank algorithm */
virtual TDeltaHandle Select(size_t sampleId,
const TDeltaVector& deltas,
const TDeltaHandle& noChangeDelta,
size_t iteration);
virtual void BeginBurnin();
virtual void EndBurnin();
virtual void SetSamples(const SampleVector& samples);
void SetTemperature(float temp);
void SetBurninAnnealer(AnnealingSchedule* schedule);
void SetIgnoreUnknownWordPenalty(bool ignore);
void SetTolerance(float tolerance);
void SetAlwaysUpdate(bool alwaysUpdate);
void SetUpdateTarget(bool updateTarget);
virtual ~SampleRankSelector() {}
private:
void UpdateGainOptimalSol(
const TDeltaVector& deltas,
const TDeltaHandle& noChangeDelta,
size_t sampleId,
DeltaGain& gain,
int target);
const GainFunctionHandle m_gainFunction;
const OnlineLearnerHandle m_onlineLearner;
const TargetAssignerHandle m_assigner;
const WeightCollectorHandle m_weightCollector;
bool m_burnin;
SamplingSelector m_samplingSelector;
SamplingSelector m_burninSamplingSelector;
//feature values for optimal gain solution
std::vector<FVector> m_optimalGainSolutionScores;
//gain of optimal gain solution
std::vector<FValue> m_optimalGain;
//The current batch of samples
SampleVector m_samples;
bool m_ignoreUnknownWordPenalty;
std::string m_unknownWordPenaltyName;
//difference between chosen bleu and target bleu must be greater
//than this to force a weight update
float m_tolerance;
//always call the updater, even if the ranking is correct. I think
//this is what Aron Culotta does.
bool m_alwaysUpdate;
//Jump to the target instead of the chosen
bool m_updateTarget;
};
}

129
josiah/Sampler.cpp Normal file
View File

@ -0,0 +1,129 @@
#include "Sampler.h"
#include "GibbsOperator.h"
#include "Hypothesis.h"
#include "TranslationOptionCollection.h"
#include "Gibbler.h"
#include "SampleCollector.h"
using namespace std;
namespace Josiah {
void Sampler::AddOperator(GibbsOperator* o) {
m_operators.push_back(o);
}
GibbsOperator* Sampler::SampleNextOperator(const std::vector<GibbsOperator*>& operators) {
double random = RandomNumberGenerator::instance().next();
size_t position = 1;
double sum = operators[0]->GetScanProb();
for (; position < operators.size() && sum < random; ++position) {
sum += operators[position]->GetScanProb();
}
return operators[position-1];
}
void Sampler::Run(const vector<TranslationHypothesis>& translations, const FeatureVector& features, bool raoBlackwell) {
SampleVector samples;
for (size_t i = 0; i < translations.size(); ++i) {
samples.push_back(SampleHandle(new Sample(translations[i].getHypothesis(),
translations[i].getWords(), features, raoBlackwell)));
}
m_selector->SetSamples(samples);
map<GibbsOperator*, size_t> samplesPerOperator; // to keep track of number of samples per operator
for (size_t k = 0; k < m_reheatings; ++k) {
if (m_burninIts) {
m_selector->BeginBurnin();
//do some burn-in
size_t allSamples = 0;
for (size_t its = 0; its < m_burninIts; ++allSamples) {
VERBOSE(2,"Gibbs burnin iteration: " << its << endl);
doSample(samples,translations,its);
if (allSamples % m_lag == 0) //increment now
++its;
}
m_selector->EndBurnin();
}
//Sample now
size_t samplesCollected = 0;
size_t allSamples = 0;
while(samplesCollected < m_iterations) {
VERBOSE(2,"Gibbs sampling iteration: " << allSamples << "Collected: " << samplesCollected << endl);
GibbsOperator* currOperator = doSample(samples,translations,samplesCollected);
if (currOperator) {
++samplesPerOperator[currOperator];
++allSamples;
if (allSamples % m_lag == 0) {//collect and increment now
collectSample(*samples[0]);
++samplesCollected;
}
}
}
VERBOSE(1,"Sampled " << allSamples << ", collected " << samplesCollected << endl);
IFVERBOSE(1) {
for (map<GibbsOperator*, size_t>::const_iterator it = samplesPerOperator.begin(); it != samplesPerOperator.end(); ++it) {
cerr << "Sampled operator " << (it->first)->name() << ": " << it->second << " times." << endl;
}
}
}
}
void Sampler::collectSample(Sample& sample) {
for (size_t j = 0; j < m_collectors.size(); ++j) {
m_collectors[j]->addSample(sample);
}
sample.ResetConditionalFeatureValues(); //for Rao-Blackellisation
}
GibbsOperator* Sampler::doSample(const SampleVector& samples,
const vector<TranslationHypothesis>& translations,
size_t iteration) {
//choose an operator, and sample
GibbsOperator* currOperator = SampleNextOperator(m_operators);
TDeltaHandle noChangeDelta;
TDeltaVector deltas;
size_t sampleIndex = RandomNumberGenerator::instance().getRandomIndexFromZeroToN(samples.size());
currOperator->propose(*samples[sampleIndex],*(translations[sampleIndex].getToc()),deltas,noChangeDelta);
VERBOSE(2,"Created " << deltas.size() << " delta(s) with operator " << currOperator->name() << endl);
if (deltas.size()) {
TDeltaHandle selectedDelta = m_selector->Select(sampleIndex, deltas,noChangeDelta, iteration);
if (selectedDelta.get() != noChangeDelta.get()) {
selectedDelta->apply(*noChangeDelta);
if (m_checkFeatures) {
samples[sampleIndex]->CheckFeatureConsistency();
}
}
return currOperator;
} else {
return NULL;
}
//cerr << "Sampled sentence " << sampleIndex << " and updated to " << endl;
//for (size_t i = 0 ; i < samples.size(); ++i) {
/* const vector<Word>& words = samples[sampleIndex]->GetTargetWords();
for (size_t j = 0; j < words.size(); ++j) {
cerr << words[j];
}
cerr << endl;
cerr << "FV " << samples[sampleIndex]->GetFeatureValues() << endl;
cerr << "Iteration " << iteration << " Score " << inner_product(samples[sampleIndex]->GetFeatureValues(),
WeightManager::instance().get()) << endl; */
//}
}
}

70
josiah/Sampler.h Normal file
View File

@ -0,0 +1,70 @@
#pragma once
#include <vector>
#include <boost/shared_ptr.hpp>
#include "Word.h"
#include "AnnealingSchedule.h"
#include "Decoder.h"
#include "FeatureFunction.h"
#include "Gibbler.h"
#include "Selector.h"
namespace Moses {
class Hypothesis;
class TranslationOptionCollection;
class Word;
}
using namespace Moses;
namespace Josiah {
class SampleCollector;
class GibbsOperator;
#define SAMPLEMAX 1000000
class Sampler {
private:
std::vector<SampleCollector*> m_collectors;
std::vector<GibbsOperator*> m_operators;
DeltaSelector* m_selector;
size_t m_iterations;
size_t m_burninIts;
size_t m_reheatings;
const AnnealingSchedule* m_as;
size_t m_lag;
bool m_checkFeatures;
void collectSample(Sample& sample);
GibbsOperator* SampleNextOperator(const std::vector<GibbsOperator*>& );
GibbsOperator* doSample(const SampleVector& samples,
const std::vector<TranslationHypothesis>& translations,
size_t iteration);
public:
Sampler(): m_selector(NULL), m_iterations(10), m_reheatings(1), m_as(NULL),
m_lag(0), m_checkFeatures(false) {}
void Run(const std::vector<TranslationHypothesis>& translations,
const FeatureVector& features,
bool raoBlackwell = false) ;
void AddOperator(GibbsOperator* o);
void AddCollector(SampleCollector* c) {m_collectors.push_back(c);}
void SetSelector(DeltaSelector* selector) {m_selector = selector;}
void SetIterations(size_t iterations) {m_iterations = iterations;}
void SetReheatings(size_t r) {m_reheatings = r;}
void SetLag(size_t l) {m_lag = l;}
void SetBurnIn(size_t burnin_its) {m_burninIts = burnin_its;}
void SetCheckFeatures(bool checkFeatures) {m_checkFeatures = checkFeatures;}
};
}

134
josiah/Selector.cpp Normal file
View File

@ -0,0 +1,134 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "Selector.h"
#include <fstream>
#include "StaticData.h"
#include "Util.h"
using namespace Moses;
using namespace std;
namespace Josiah {
static void getScores(const TDeltaVector& deltas, vector<double>& scores) {
for (TDeltaVector::const_iterator i = deltas.begin(); i != deltas.end(); ++i) {
scores.push_back((*i)->getScore());
}
}
static void normalize(vector<double>& scores) {
double sum = scores[0];
for (size_t i = 1; i < scores.size(); ++i) {
sum = log_sum(sum,scores[i]);
}
transform(scores.begin(),scores.end(),scores.begin(),bind2nd(minus<double>(),sum));
}
static void getNormalisedScores(const TDeltaVector& deltas, vector<double>& scores, float temp) {
getScores(deltas, scores);
IFVERBOSE(2) {
cerr << "Before annealing, scores are :";
copy(scores.begin(),scores.end(),ostream_iterator<double>(cerr," "));
cerr << endl;
}
//do annealling
transform(scores.begin(),scores.end(),scores.begin(),bind2nd(multiplies<double>(), 1.0/temp));
IFVERBOSE(2) {
cerr << "After annealing, scores are :";
copy(scores.begin(),scores.end(),ostream_iterator<double>(cerr," "));
cerr << endl;
}
normalize(scores);
}
static size_t getSample(const vector<double>& scores, double random) {
size_t position = 1;
double sum = scores[0];
for (; position < scores.size() && sum < random; ++position) {
sum = log_sum(sum,scores[position]);
}
size_t chosen = position-1;
VERBOSE(3,"The chosen sample is " << chosen << endl);
return chosen;
}
SamplingSelector::SamplingSelector() :
m_annealingSchedule(NULL), m_temperature(1) {}
void SamplingSelector::SetAnnealingSchedule(AnnealingSchedule* annealingSchedule) {
m_annealingSchedule = annealingSchedule;
}
void SamplingSelector::SetTemperature(float temperature) {
assert(temperature != 0);
m_temperature = temperature;
m_annealingSchedule = NULL;
}
TDeltaHandle SamplingSelector::Select(size_t, const TDeltaVector& deltas, const TDeltaHandle&, size_t iteration)
{
float T = m_temperature;
if (m_annealingSchedule) {
T = m_annealingSchedule->GetTemperatureAtTime(iteration);
}
vector<double> scores;
getNormalisedScores(deltas,scores,T);
double random = log(RandomNumberGenerator::instance().next());
size_t chosen = getSample(scores, random);
/*
cerr << "deltas: " << endl;
for (size_t i = 0; i < deltas.size(); ++i) {
cerr << scores[i] << endl;
}
cerr << "chosen " << chosen << endl;
cerr << random << endl;
*/
return deltas[chosen];
}
RandomNumberGenerator::RandomNumberGenerator() :m_dist(0,1), m_generator(), m_random(m_generator,m_dist) {
uint32_t seed;
std::ifstream r("/dev/urandom");
if (r) {
r.read((char*)&seed,sizeof(uint32_t));
}
if (r.fail() || !r) {
std::cerr << "Warning: could not read from /dev/urandom. Seeding from clock" << std::endl;
seed = time(NULL);
}
std::cerr << "Seeding random number sequence to " << seed << endl;
m_generator.seed(seed);
}
RandomNumberGenerator RandomNumberGenerator::s_instance;
}

124
josiah/Selector.h Normal file
View File

@ -0,0 +1,124 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
/**
* Strategies used to select samples proposed by the gibbs operators.
**/
#include <vector>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_real.hpp>
#include <boost/random/variate_generator.hpp>
#include "AnnealingSchedule.h"
#include "Gibbler.h"
#include "TranslationDelta.h"
namespace Josiah {
/**
* Abstract base class for sample selection strategy.
**/
class DeltaSelector {
public:
virtual TDeltaHandle Select( size_t sampleId, //which sample is selected from the batch
const TDeltaVector& deltas,
const TDeltaHandle& noChangeDelta,
size_t iteration) = 0;
virtual void BeginBurnin() {}
virtual void EndBurnin() {}
virtual void SetSamples(const SampleVector& samples) {}
virtual ~DeltaSelector() {}
};
/**
* Selector that samples the delta by converting the scores to probabilities.
**/
class SamplingSelector : public DeltaSelector {
public:
SamplingSelector();
virtual TDeltaHandle Select( size_t sampleId,
const TDeltaVector& deltas,
const TDeltaHandle& noChangeDelta,
size_t iteration);
void SetAnnealingSchedule(AnnealingSchedule* annealingSchedule);
void SetTemperature(float temperature);
private:
//Note that the annealingSchedule overrides the temperature
AnnealingSchedule* m_annealingSchedule;
float m_temperature;
};
typedef boost::mt19937 base_generator_type;
template<class T>
T log_sum (T log_a, T log_b)
{
T v;
if (log_a < log_b) {
v = log_b+log ( 1 + exp ( log_a-log_b ));
} else {
v = log_a+log ( 1 + exp ( log_b-log_a ));
}
return ( v );
}
/**
* Wraps the random number generation and enables seeding.
**/
class RandomNumberGenerator {
//mersenne twister - and why not?
public:
static RandomNumberGenerator& instance() {return s_instance;}
double next() {return m_random();}
void setSeed(uint32_t seed){
m_generator.seed(seed);
std::cerr << "Setting random seed to " << seed << std::endl;
}
size_t getRandomIndexFromZeroToN(size_t n) {
return (size_t)(next()*n);
}
private:
static RandomNumberGenerator s_instance;
RandomNumberGenerator();
boost::uniform_real<> m_dist;
base_generator_type m_generator;
boost::variate_generator<base_generator_type&, boost::uniform_real<> > m_random;
};
struct RandomIndex {
ptrdiff_t operator() (ptrdiff_t max) {
return static_cast<ptrdiff_t>(RandomNumberGenerator::instance().getRandomIndexFromZeroToN(max));
}
};
};

View File

@ -0,0 +1,61 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "SourceToTargetRatio.h"
using namespace std;
using namespace Moses;
namespace Josiah {
FeatureFunctionHandle SourceToTargetRatioFeature::getFunction(const Sample& sample) const {
return FeatureFunctionHandle(new SourceToTargetRatioFeatureFunction(sample));
}
FValue Josiah::SourceToTargetRatioFeatureFunction::computeScore() {
return 1.0 - ((float) m_src_len /(float) getSample().GetTargetWords().size());
}
/** Score due to one segment */
FValue Josiah::SourceToTargetRatioFeatureFunction::getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap) {
return 1.0 - ((float) m_src_len / (float) (getSample().GetTargetWords().size() + option->GetTargetPhrase().GetSize() - gap.segment.GetNumWordsCovered()));
}
/** Score due to two segments **/
FValue Josiah::SourceToTargetRatioFeatureFunction::getContiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,const TargetGap& gap)
{
return 1.0 - ((float) m_src_len /(float) (getSample().GetTargetWords().size()
+ leftOption->GetTargetPhrase().GetSize()
+ rightOption->GetTargetPhrase().GetSize() - gap.segment.GetNumWordsCovered()));
}
FValue Josiah::SourceToTargetRatioFeatureFunction::getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap)
{
return 1.0 - ((float) m_src_len /(float) (getSample().GetTargetWords().size() + leftOption->GetTargetPhrase().GetSize()
+ rightOption->GetTargetPhrase().GetSize() -
(leftGap.segment.GetNumWordsCovered() + rightGap.segment.GetNumWordsCovered() )) ) ;
}
/** Score due to flip */
FValue Josiah::SourceToTargetRatioFeatureFunction::getFlipUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap) {
return computeScore();
}
}

View File

@ -0,0 +1,66 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <algorithm>
#include <iostream>
#include "TypeDef.h"
#include "FeatureFunction.h"
#include "Gibbler.h"
namespace Josiah {
class Sample;
class SourceToTargetRatioFeature : public Feature {
public:
virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
};
class SourceToTargetRatioFeatureFunction: public SingleValuedFeatureFunction {
public:
SourceToTargetRatioFeatureFunction(const Sample& sample) : SingleValuedFeatureFunction(sample,"SourceToTargetRatio")
{ m_src_len = sample.GetSourceSize();}
virtual FValue computeScore();
/** Score due to one segment */
virtual FValue getSingleUpdateScore(const TranslationOption* option, const TargetGap& gap) ;
/** Score due to two segments **/
virtual FValue getContiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& gap) ;
virtual FValue getDiscontiguousPairedUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap);
/** Score due to flip */
virtual FValue getFlipUpdateScore(const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap) ;
virtual ~SourceToTargetRatioFeatureFunction() {}
private:
size_t m_src_len;
};
}

106
josiah/StatelessFeature.cpp Normal file
View File

@ -0,0 +1,106 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <sstream>
#include "Gibbler.h"
#include "StatelessFeature.h"
using namespace Moses;
using namespace std;
namespace Josiah {
FeatureFunctionHandle StatelessFeature::getFunction(const Sample& sample) const {
return FeatureFunctionHandle(new StatelessFeatureFunction(sample,this));
}
StatelessFeatureAdaptor::StatelessFeatureAdaptor(
const MosesFeatureHandle& mosesFeature):
m_mosesFeature(mosesFeature)
{
assert(!mosesFeature->ComputeValueInTranslationOption());
for (size_t i = 0; i < mosesFeature->GetNumScoreComponents(); ++i) {
ostringstream id;
id << i;
m_featureNames.push_back(FName(mosesFeature->GetScoreProducerDescription(),id.str()));
}
}
void StatelessFeatureAdaptor::assign
(const Moses::TranslationOption* toption, FVector& scores) const {
ScoreComponentCollection scc;
m_mosesFeature->Evaluate(toption->GetTargetPhrase(),&scc);
vector<float> mosesScores = scc.GetScoresForProducer(m_mosesFeature.get());
for (size_t i = 0; i < m_featureNames.size(); ++i) {
scores[m_featureNames[i]] = mosesScores.at(i);
}
}
StatelessFeatureFunction::StatelessFeatureFunction
(const Sample& sample, const StatelessFeature* parent):
FeatureFunction(sample), m_parent(parent) {}
void StatelessFeatureFunction::assignScore(FVector& scores) {
const Hypothesis* currHypo = getSample().GetTargetTail();
while ((currHypo = (currHypo->GetNextHypo()))) {
m_parent->assign(&(currHypo->GetTranslationOption()), scores);
}
}
/** Score due to one segment */
void StatelessFeatureFunction::doSingleUpdate
(const TranslationOption* option, const TargetGap& gap, FVector& scores) {
m_parent->assign(option,scores);
}
/** Score due to two segments. The left and right refer to the target positions.**/
void StatelessFeatureFunction::doContiguousPairedUpdate
(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores) {
m_parent->assign(leftOption,scores);
m_parent->assign(rightOption,scores);
}
void StatelessFeatureFunction::doDiscontiguousPairedUpdate
(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
m_parent->assign(leftOption,scores);
m_parent->assign(rightOption,scores);
}
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
void StatelessFeatureFunction::doFlipUpdate
(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores) {
//do nothing
}
}

92
josiah/StatelessFeature.h Normal file
View File

@ -0,0 +1,92 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <string>
#include <boost/shared_ptr.hpp>
#include "../moses/src/FeatureFunction.h"
#include "FeatureFunction.h"
namespace Josiah {
typedef boost::shared_ptr<Moses::StatelessFeatureFunction> MosesFeatureHandle;
/**
* Stateless Gibbler feature
**/
class StatelessFeature : public Feature {
public:
virtual FeatureFunctionHandle getFunction(const Sample& sample) const;
/** Scores due to this translation option */
virtual void assign
(const Moses::TranslationOption* option, FVector& scores) const = 0;
};
/**
* Wraps a Moses stateless feature to give a gibbler feature.
**/
class StatelessFeatureAdaptor : public StatelessFeature {
public:
StatelessFeatureAdaptor(
const MosesFeatureHandle& mosesFeature);
virtual void assign(const Moses::TranslationOption* toption, FVector& scores) const;
private:
MosesFeatureHandle m_mosesFeature;
std::vector<FName> m_featureNames;
};
class StatelessFeatureFunction : public FeatureFunction {
public:
StatelessFeatureFunction
(const Sample& sample, const StatelessFeature* parent);
/** Assign the total score of this feature on the current hypo */
virtual void assignScore(FVector& scores);
/** Score due to one segment */
virtual void doSingleUpdate
(const TranslationOption* option, const TargetGap& gap, FVector& scores);
/** Score due to two segments. The left and right refer to the target positions.**/
virtual void doContiguousPairedUpdate
(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& gap, FVector& scores);
virtual void doDiscontiguousPairedUpdate
(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
/** Score due to flip. Again, left and right refer to order on the <emph>target</emph> side. */
virtual void doFlipUpdate
(const TranslationOption* leftOption,const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap, FVector& scores);
public:
const StatelessFeature* m_parent;
};
}

25
josiah/Test.cpp Normal file
View File

@ -0,0 +1,25 @@
/**********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
//Supplies the main for the moses test module
#define BOOST_TEST_MODULE josiah
#include <boost/test/unit_test.hpp>

479
josiah/TestBleu.cpp Normal file
View File

@ -0,0 +1,479 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <boost/test/unit_test.hpp>
#include "FactorCollection.h"
#include "Util.h"
#include "Bleu.h"
using namespace Josiah;
using namespace Moses;
using namespace std;
BOOST_AUTO_TEST_SUITE(bleu)
static void checkNgram(const string& ngram, size_t count, const NGramMap& ngrams) {
Translation t;
TextToTranslation(ngram,t);
NGramMap::const_iterator i = ngrams.find(t);
size_t actualCount = 0;
if (i != ngrams.end()) {
actualCount = i->second;
}
BOOST_CHECK_MESSAGE(actualCount == count,ngram);
}
BOOST_AUTO_TEST_CASE(ref_stats_single) {
Translation ref,src;
TextToTranslation("give me the statistics on this sentence , give me",ref);
TextToTranslation("the source is not really important",src);
vector<Translation> refs;
refs.push_back(ref);
Bleu bleu;
bleu.AddReferences(refs,src);
NGramMap ngrams = bleu.GetReferenceStats(0);
BOOST_CHECK_EQUAL(ngrams.size(),(size_t)31);
checkNgram("give", 2, ngrams);
checkNgram("me", 2, ngrams);
checkNgram("the", 1, ngrams);
checkNgram("statistics", 1, ngrams);
checkNgram("on", 1, ngrams);
checkNgram("this", 1, ngrams);
checkNgram("sentence", 1, ngrams);
checkNgram(",", 1, ngrams);
checkNgram("give me", 2, ngrams);
checkNgram("me the", 1, ngrams);
checkNgram("the statistics", 1, ngrams);
checkNgram("statistics on", 1, ngrams);
checkNgram("on this", 1, ngrams);
checkNgram("this sentence", 1, ngrams);
checkNgram("sentence ,", 1, ngrams);
checkNgram(", give", 1, ngrams);
checkNgram("give me the", 1, ngrams);
checkNgram("me the statistics", 1, ngrams);
checkNgram("the statistics on", 1, ngrams);
checkNgram("statistics on this", 1, ngrams);
checkNgram("on this sentence", 1, ngrams);
checkNgram("sentence , give", 1, ngrams);
checkNgram(", give me", 1, ngrams);
checkNgram("give me the statistics", 1, ngrams);
checkNgram("me the statistics on", 1, ngrams);
checkNgram("the statistics on this", 1, ngrams);
checkNgram("statistics on this sentence", 1, ngrams);
checkNgram("on this sentence ,", 1, ngrams);
checkNgram("this sentence , give", 1, ngrams);
checkNgram("sentence , give me", 1, ngrams);
}
BOOST_AUTO_TEST_CASE(ref_stats_multi) {
Translation ref1,ref2,ref3,src;
TextToTranslation("what is this saying ?", ref1);
TextToTranslation("what saying is this ? ?", ref2);
TextToTranslation("what is this is this ?", ref3);
TextToTranslation("not important", src);
vector<Translation> refs;
refs.push_back(ref1);
refs.push_back(ref2);
refs.push_back(ref3);
Bleu bleu;
bleu.AddReferences(refs,src);
NGramMap ngrams = bleu.GetReferenceStats(0);
BOOST_CHECK_EQUAL(ngrams.size(),(size_t)31);
checkNgram("what", 1, ngrams);
checkNgram("is", 2, ngrams);
checkNgram("this", 2, ngrams);
checkNgram("saying", 1, ngrams);
checkNgram("?", 2, ngrams);
checkNgram("what is", 1, ngrams);
checkNgram("is this", 2, ngrams);
checkNgram("this saying", 1, ngrams);
checkNgram("saying ?", 1, ngrams);
checkNgram("what saying", 1, ngrams);
checkNgram("saying is", 1, ngrams);
checkNgram("this ?", 1, ngrams);
checkNgram("? ? ", 1, ngrams);
checkNgram("this is", 1, ngrams);
checkNgram("what is this", 1, ngrams);
checkNgram("is this saying", 1, ngrams);
checkNgram("this saying ?", 1, ngrams);
checkNgram("what saying is", 1, ngrams);
checkNgram("saying is this", 1, ngrams);
checkNgram("is this ?", 1, ngrams);
checkNgram("this ? ?", 1, ngrams);
checkNgram("is this is", 1, ngrams);
checkNgram("this is this", 1, ngrams);
checkNgram("what is this saying", 1, ngrams);
checkNgram("is this saying ?", 1, ngrams);
checkNgram("what saying is this", 1, ngrams);
checkNgram("saying is this ?", 1, ngrams);
checkNgram("is this ? ?", 1, ngrams);
checkNgram("what is this is", 1, ngrams);
checkNgram("is this is this", 1, ngrams);
checkNgram("this is this ?", 1, ngrams);
}
BOOST_AUTO_TEST_CASE(ref_length_single) {
Translation ref,src;
TextToTranslation("give me the statistics on this sentence , give me",ref);
TextToTranslation("the source is not really important",src);
vector<Translation> refs;
refs.push_back(ref);
Bleu bleu;
bleu.AddReferences(refs,src);
vector<size_t> actual = bleu.GetReferenceLengths(0);
vector<size_t> expected;
expected.push_back(10);
BOOST_CHECK_EQUAL_COLLECTIONS(actual.begin(),actual.end(),expected.begin(),expected.end());
}
BOOST_AUTO_TEST_CASE(ref_length_multi) {
Translation ref1,ref2,ref3,src;
TextToTranslation("what is this saying ?", ref1);
TextToTranslation("what saying is this ? ?", ref2);
TextToTranslation("what is this is this ?", ref3);
TextToTranslation("not important", src);
vector<Translation> refs;
refs.push_back(ref1);
refs.push_back(ref2);
refs.push_back(ref3);
Bleu bleu;
bleu.AddReferences(refs,src);
vector<size_t> actual = bleu.GetReferenceLengths(0);
size_t expected[] = {5,6,6};
BOOST_CHECK_EQUAL_COLLECTIONS(actual.begin(),actual.end(),expected, expected+3);
}
BOOST_AUTO_TEST_CASE(multi_sentence) {
Translation ref1,ref2,src;
TextToTranslation("fee fye fo fum",ref1);
TextToTranslation("hee ha haw", ref2);
TextToTranslation("ra ra ra", src);
Bleu bleu;
vector<Translation> refs;
refs.push_back(ref1);
bleu.AddReferences(refs,src);
refs.clear();
refs.push_back(ref2);
bleu.AddReferences(refs,src);
NGramMap ngrams = bleu.GetReferenceStats(0);
checkNgram("fee", 1, ngrams);
checkNgram("fye", 1, ngrams);
checkNgram("fo", 1, ngrams);
checkNgram("fum", 1, ngrams);
checkNgram("fee fye", 1, ngrams);
checkNgram("fye fo", 1, ngrams);
checkNgram("fo fum", 1, ngrams);
checkNgram("fee fye fo", 1, ngrams);
checkNgram("fye fo fum", 1, ngrams);
checkNgram("fee fye fo fum", 1, ngrams);
BOOST_CHECK_EQUAL(ngrams.size(),(size_t)10);
vector<size_t> actual = bleu.GetReferenceLengths(0);
size_t expected[] = {4};
BOOST_CHECK_EQUAL_COLLECTIONS(actual.begin(),actual.end(),expected,expected+1);
ngrams = bleu.GetReferenceStats(1);
checkNgram("hee", 1, ngrams);
checkNgram("ha", 1, ngrams);
checkNgram("haw", 1, ngrams);
checkNgram("hee ha", 1, ngrams);
checkNgram("ha haw", 1, ngrams);
checkNgram("hee ha haw", 1, ngrams);
BOOST_CHECK_EQUAL(ngrams.size(),(size_t)6);
actual = bleu.GetReferenceLengths(1);
expected[0] = 3;
BOOST_CHECK_EQUAL_COLLECTIONS(actual.begin(),actual.end(),expected,expected+1);
}
BOOST_AUTO_TEST_CASE(source_length) {
Translation ref, src1, src2;
TextToTranslation("ref whatever", ref);
TextToTranslation("the first source sentence", src1);
TextToTranslation("the second source sentence a bit longer than the first",src2);
vector<Translation> refs;
refs.push_back(ref);
Bleu bleu;
bleu.AddReferences(refs,src1);
bleu.AddReferences(refs,src2);
BOOST_CHECK_EQUAL(bleu.GetSourceLength(0), (size_t)4);
BOOST_CHECK_EQUAL(bleu.GetSourceLength(1), (size_t)10);
}
BOOST_AUTO_TEST_CASE(evaluate_1ref_1hyp_nobp) {
Translation ref, src, hyp;
TextToTranslation("this is the correct one , this one",ref);
TextToTranslation("is this is the one , this one", hyp);
TextToTranslation("whatever",src);
vector<Translation> refs;
refs.push_back(ref);
Bleu bleu;
bleu.AddReferences(refs,src);
vector<size_t> sentenceIds;
sentenceIds.push_back(0);
GainFunctionHandle gf = bleu.GetGainFunction(sentenceIds);
float actual = gf->Evaluate(hyp);
float sm = BLEU_SMOOTHING;
// precisions: 7/8, 5/7, 3/6 and 1/5
float log_expected = log(7+sm) - log(8+sm) + log(5+sm) - log(7+sm) + log(3+sm) - log(6+sm) + log(1+sm) - log(5+sm);
log_expected /= BLEU_ORDER;
BOOST_CHECK_CLOSE(actual,100*exp(log_expected),0.001);
}
BOOST_AUTO_TEST_CASE(evaluate_1ref_3src_nobp) {
Translation ref0,ref1,ref2,hyp0,hyp1,hyp2,src;
TextToTranslation("the first ref", ref0);
TextToTranslation("the second ref 2", ref1);
TextToTranslation("another ref",ref2);
TextToTranslation("the first guessed ref", hyp0);
TextToTranslation("this is the second hypothesis", hyp1);
TextToTranslation("another ref hyp",hyp2);
TextToTranslation("whatever",src);
vector<Translation> refs(1);
Bleu bleu;
refs[0] = ref0;
bleu.AddReferences(refs,src);
refs[0] = ref1;
bleu.AddReferences(refs,src);
refs[0] = ref2;
bleu.AddReferences(refs,src);
vector<size_t> sentenceIds;
sentenceIds.push_back(0);
sentenceIds.push_back(1);
sentenceIds.push_back(2);
GainFunctionHandle gf = bleu.GetGainFunction(sentenceIds);
vector<Translation> hyps;
hyps.push_back(hyp0);
hyps.push_back(hyp1);
hyps.push_back(hyp2);
float actual = gf->Evaluate(hyps);
float sm = BLEU_SMOOTHING;
//precision 7/12, 3/9, 0/6 and 0/3
float log_expected = log(7+sm) - log(12+sm) + log(3+sm) - log(9+sm) + log(0+sm) - log(6+sm) + log(0+sm) - log(3+sm);
log_expected /= BLEU_ORDER;
BOOST_CHECK_EQUAL(actual, 100*exp(log_expected));
}
BOOST_AUTO_TEST_CASE(evaluate_1ref_1src_bp) {
Translation ref, src, hyp;
TextToTranslation("this is the correct one , this one",ref);
TextToTranslation("this is the short one", hyp);
TextToTranslation("whatever",src);
vector<Translation> refs;
refs.push_back(ref);
Bleu bleu;
bleu.AddReferences(refs,src);
vector<size_t> sentenceIds;
sentenceIds.push_back(0);
GainFunctionHandle gf = bleu.GetGainFunction(sentenceIds);
float actual = gf->Evaluate(hyp);
float sm = BLEU_SMOOTHING;
//precision: 4/5, 2/4, 1/3, 0/2
float log_expected = log(4+sm) - log(5+sm) + log(2+sm) - log(4+sm) + log(1+sm) - log(3+sm) + log(0+sm) - log(2+sm);
log_expected /= BLEU_ORDER;
log_expected += (1 - 8.0/5.0);
BOOST_CHECK_EQUAL(actual, 100*exp(log_expected));
}
BOOST_AUTO_TEST_CASE(evaluate_caching) {
Translation ref, src, hyp1, hyp2, hyp3;
TextToTranslation("this is the reference sentence",ref);
TextToTranslation("this the reference sentence .", hyp1);
TextToTranslation("the reference sentence , what ?", hyp2);
TextToTranslation("the reference phrase . where ?", hyp3);
TextToTranslation("whatever", src);
Bleu bleu;
vector<Translation> refs1;
refs1.push_back(ref);
bleu.AddReferences(refs1,src);
vector<Translation> refs2;
refs2.push_back(ref);
bleu.AddReferences(refs2,src);
vector<size_t> sentenceIds;
sentenceIds.push_back(0);
sentenceIds.push_back(1);
GainFunctionHandle gf = bleu.GetGainFunction(sentenceIds);
vector<Translation> hyps1;
hyps1.push_back(hyp1);
hyps1.push_back(hyp2);
float actual = gf->Evaluate(hyps1);
float sm = BLEU_SMOOTHING;
//precision 7/11, 4/9, 2/7, 0/5
float log_expected = log(7+sm) - log(11+sm) + log(4+sm) - log(9+sm) + log(2+sm) - log(7+sm) + log(0+sm) - log(5+sm);
log_expected /= BLEU_ORDER;
BOOST_CHECK_CLOSE(actual, 100*exp(log_expected),0.01);
vector<Translation> hyps2;
hyps2.push_back(hyp1);
hyps2.push_back(hyp3);
actual = gf->Evaluate(hyps2);
//precision 6/11, 3/9, 1/7, 0/5
log_expected = log(6+sm) - log(11+sm) + log(3+sm) - log(9+sm) + log(1+sm) - log(7+sm) + log(0+sm) - log(5+sm);
log_expected /= BLEU_ORDER;
BOOST_CHECK_EQUAL(actual, 100*exp(log_expected));
}
BOOST_AUTO_TEST_CASE(evaluate_3ref_1src_bp) {
Translation ref1, ref2, ref3, src, hyp;
TextToTranslation("what is this saying ?", ref1);
TextToTranslation("what saying is this ? ?", ref2);
TextToTranslation("what is this is this ?", ref3);
TextToTranslation("is this is what", hyp);
TextToTranslation("whatever", src);
vector<Translation> refs;
refs.push_back(ref1);
refs.push_back(ref2);
refs.push_back(ref3);
Bleu bleu;
bleu.AddReferences(refs,src);
vector<size_t> sentenceIds;
sentenceIds.push_back(0);
GainFunctionHandle gf = bleu.GetGainFunction(sentenceIds);
float actual = gf->Evaluate(hyp);
float sm = BLEU_SMOOTHING;
//precision 4/4, 2/3, 1/2, 0/1
float log_expected = log(4+sm) - log(4+sm) + log(2+sm) - log(3+sm) + log(1+sm) - log(2+sm) + log(0+sm) - log(1+sm);
log_expected /= BLEU_ORDER;
//closest
log_expected += (1-5.0/4.0);
BOOST_CHECK_EQUAL(actual,100*exp(log_expected));
}
BOOST_AUTO_TEST_CASE(update_smoothing) {
Bleu bleu;
float sw = 0.5;
bleu.SetSmoothingWeight(sw);
BleuStats stats = bleu.GetSmoothingStats();
for (size_t order = 1; order < BLEU_ORDER; ++order) {
BOOST_CHECK_CLOSE(stats.tp(order), BLEU_SMOOTHING, 0.001);
BOOST_CHECK_CLOSE(stats.total(order), BLEU_SMOOTHING, 0.001);
}
Translation src,ref,hyp1,hyp2;
TextToTranslation("whatever it says", src);
TextToTranslation("this is the reference sentence ok ?", ref);
TextToTranslation("is this the reference sentence ok !", hyp1);
TextToTranslation("is this the hypothesis sentence ok !", hyp2);
vector<Translation> refs;
refs.push_back(ref);
bleu.AddReferences(refs,src);
vector<size_t> sentenceIds;
sentenceIds.push_back(0);
GainFunctionHandle gf = bleu.GetGainFunction(sentenceIds);
gf->AddSmoothingStats(0,hyp1);
gf->AddSmoothingStats(0,hyp2);
gf->UpdateSmoothingStats();
//should be the average of the two hypotheses
stats = bleu.GetSmoothingStats();
BOOST_CHECK_CLOSE(stats.tp(1), (5.5+BLEU_SMOOTHING)*sw, 0.001);
BOOST_CHECK_CLOSE(stats.total(1), (7+BLEU_SMOOTHING)*sw, 0.001);
BOOST_CHECK_CLOSE(stats.tp(2), (2+BLEU_SMOOTHING)*sw, 0.001);
BOOST_CHECK_CLOSE(stats.total(2), (6+BLEU_SMOOTHING)*sw, 0.001);
BOOST_CHECK_CLOSE(stats.tp(3), (1+BLEU_SMOOTHING)*sw, 0.001);
BOOST_CHECK_CLOSE(stats.total(3), (5+BLEU_SMOOTHING)*sw, 0.001);
BOOST_CHECK_CLOSE(stats.tp(4), (0.5+BLEU_SMOOTHING)*sw, 0.001);
BOOST_CHECK_CLOSE(stats.total(4), (4+BLEU_SMOOTHING)*sw, 0.001);
BOOST_CHECK_CLOSE(stats.src_len(), 3*sw, 0.001);
BOOST_CHECK_CLOSE(stats.ref_len(), 7*sw, 0.001);
BOOST_CHECK_CLOSE(stats.hyp_len(), 7*sw, 0.001);
gf->AddSmoothingStats(0,hyp1);
gf->UpdateSmoothingStats();
//previous stats should get downweighted
stats = bleu.GetSmoothingStats();
BOOST_CHECK_CLOSE(stats.tp(1), sw*(6+sw*(5.5+BLEU_SMOOTHING)), 0.001);
BOOST_CHECK_CLOSE(stats.total(1), sw*(7+sw*(7+BLEU_SMOOTHING)), 0.001);
BOOST_CHECK_CLOSE(stats.tp(2), sw*(3+sw*(2+BLEU_SMOOTHING)), 0.001);
BOOST_CHECK_CLOSE(stats.total(2), sw*(6+sw*(6+BLEU_SMOOTHING)), 0.001);
BOOST_CHECK_CLOSE(stats.tp(3), sw*(2+sw*(1+BLEU_SMOOTHING)), 0.001);
BOOST_CHECK_CLOSE(stats.total(3), sw*(5+sw*(5+BLEU_SMOOTHING)), 0.001);
BOOST_CHECK_CLOSE(stats.tp(4), sw*(1+sw*(0.5+BLEU_SMOOTHING)), 0.001);
BOOST_CHECK_CLOSE(stats.total(4), sw*(4+sw*(4+BLEU_SMOOTHING)), 0.001);
BOOST_CHECK_CLOSE(stats.src_len(), sw*(3+sw*(3)), 0.001);
BOOST_CHECK_CLOSE(stats.ref_len(), sw*(7+sw*(7)), 0.001);
BOOST_CHECK_CLOSE(stats.hyp_len(), sw*(7+sw*(7)), 0.001);
}
BOOST_AUTO_TEST_CASE(evaluate_smoothing) {
Bleu bleu;
float sw = 0.5;
bleu.SetSmoothingWeight(sw);
Translation src,ref,hyp1,hyp2;
TextToTranslation("whatever it says", src);
TextToTranslation("this is the reference sentence ok ?", ref);
TextToTranslation("is this the reference sentence ok !", hyp1);
TextToTranslation("is this the hypothesis sentence ok !", hyp2);
vector<Translation> refs;
refs.push_back(ref);
bleu.AddReferences(refs,src);
vector<size_t> sentenceIds;
sentenceIds.push_back(0);
GainFunctionHandle gf = bleu.GetGainFunction(sentenceIds);
float actual = gf->Evaluate(hyp1);
//should just be standard smoothed bleu
float sm = BLEU_SMOOTHING;
//precision 6/7, 3/6, 2/5, 1/4
float log_expected = log(6+sm) - log(7+sm) + log(3+sm) - log(6+sm) + log(2+sm) - log(5+sm) + log(1+sm) - log(4+sm);
log_expected /= BLEU_ORDER;
log_expected += log(3); //multiply by source length
BOOST_CHECK_CLOSE(actual,exp(log_expected),0.001);
gf->AddSmoothingStats(0,hyp1);
gf->UpdateSmoothingStats();
//now should get smoothed bleu
actual = gf->Evaluate(hyp2);
//precision 5/7 1/6 0/5 0/4
log_expected = log(5+sw*(6+sm)) - log(7+sw*(7+sm)) + log(1+sw*(3+sm)) - log(6+sw*(6+sm)) + log(0+sw*(2+sm))
- log(5 + sw*(5+sm)) + log(0 + sw*(1+sm)) - log(4+sw*(4+sm));
//cerr << log(5+sw*(6+sm)) << " " << log(7+sw*(7+sm)) << " " << log(1+sw*(3+sm)) << " " <<
// log(6+sw*(6+sm)) << " " << log(0+sw*(2+sm)) << " " << log(5 + sw*(5+sm)) << " " <<
// log(0 + sw*(1+sm)) << " " << log(4+sw*(4+sm)) << endl;
log_expected /= BLEU_ORDER;
log_expected += log(3 + sw*3); // weight bleu by (O_f + |f|)
BOOST_CHECK_CLOSE(actual,exp(log_expected),0.001);
}
BOOST_AUTO_TEST_SUITE_END()

View File

@ -0,0 +1,159 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <set>
#include <stdexcept>
#include <boost/test/unit_test.hpp>
#include "OnlineTrainingCorpus.h"
using namespace Josiah;
using namespace std;
BOOST_AUTO_TEST_SUITE(online_training_corpus)
class SourceFixture {
public:
SourceFixture() {
sourceFile = string(tmpnam(NULL));
size_t sourceSize = 20;
string line = "one two three four five six seven eight nine ten";
ofstream sourceHandle(sourceFile.c_str());
for (size_t i = 0; i < sourceSize; ++i) {
sourceHandle << line << " " << i << endl;
}
}
~SourceFixture() {
BOOST_CHECK(!remove(sourceFile.c_str()));
}
string sourceFile;
};
BOOST_FIXTURE_TEST_CASE(ctor_validate_lines, SourceFixture) {
//args are batch, epoch, max, mpi_size, mpi_rank
OnlineTrainingCorpus(sourceFile,1,10,50,1,0);
//epoch size not divisible by batch size
BOOST_CHECK_THROW(OnlineTrainingCorpus(sourceFile,3,10,50,1,0), runtime_error);
//max not divisible by shard
BOOST_CHECK_THROW(OnlineTrainingCorpus(sourceFile,2,10,52,1,0), runtime_error);
//for mpi, shard size should be divisible by batch size
// This example should give a shard size of 5, which is not divisible
// by the batch size of 2
BOOST_CHECK_THROW(OnlineTrainingCorpus(sourceFile, 2, 10, 50 , 2, 0), runtime_error);
}
BOOST_FIXTURE_TEST_CASE(batch_single_core, SourceFixture) {
//args are batch, epoch, max, mpi_size, mpi_rank
OnlineTrainingCorpus corpus(sourceFile,4,20,120,1,0);
size_t batchCount = 0;
size_t lineCount = 0;
multiset<size_t> linesSeen;
while(corpus.HasMore()) {
vector<string> lines;
vector<size_t> lineNumbers;
bool shouldMix;
corpus.GetNextBatch(&lines, &lineNumbers, &shouldMix);
BOOST_CHECK_EQUAL(lines.size(), (size_t)4);
BOOST_CHECK_EQUAL(lineNumbers.size(), (size_t)4);
++batchCount;
lineCount += lines.size();
linesSeen.insert(lineNumbers.begin(), lineNumbers.end());
BOOST_CHECK_EQUAL(shouldMix, lineCount % 20 == 0);
//cerr << "lineCount " << lineCount << " " << shouldDump << endl;
}
BOOST_CHECK_EQUAL(lineCount, (size_t)120);
BOOST_CHECK_EQUAL(batchCount, (size_t)30);
//Each sentence should appear exactly 6 times
for (multiset<size_t>::iterator i = linesSeen.begin(); i != linesSeen.end(); ++i) {
BOOST_CHECK_EQUAL((size_t)linesSeen.count(*i), (size_t)6);
}
}
BOOST_FIXTURE_TEST_CASE(batch_multi_core, SourceFixture) {
//args are batch, epoch, max, mpi_size, mpi_rank
OnlineTrainingCorpus corpus(sourceFile,4,60,120,3,0);
multiset<size_t> linesSeen;
while(corpus.HasMore()) {
vector<string> lines;
vector<size_t> lineNumbers;
bool shouldMix;
corpus.GetNextBatch(&lines, &lineNumbers, &shouldMix);
linesSeen.insert(lineNumbers.begin(), lineNumbers.end());
}
BOOST_CHECK_EQUAL(linesSeen.size(),(size_t)40);
//Each sentence should appear exactly twice
for (multiset<size_t>::iterator i = linesSeen.begin(); i != linesSeen.end(); ++i) {
BOOST_CHECK_EQUAL((size_t)linesSeen.count(*i), (size_t)2);
}
}
BOOST_FIXTURE_TEST_CASE(batch_zero, SourceFixture) {
//set batch size to 0, no mpi
//batch should be whole epoch
OnlineTrainingCorpus corpus(sourceFile,0,20,60,1,0);
//each line should be seen 3 times, and should mix after every batch
multiset<size_t> linesSeen;
while(corpus.HasMore()) {
vector<string> lines;
vector<size_t> lineNumbers;
bool shouldMix;
corpus.GetNextBatch(&lines,&lineNumbers,&shouldMix);
BOOST_CHECK(shouldMix);
linesSeen.insert(lineNumbers.begin(), lineNumbers.end());
}
BOOST_CHECK_EQUAL(linesSeen.size(), (size_t)60);
for (multiset<size_t>::iterator i = linesSeen.begin(); i != linesSeen.end(); ++i) {
BOOST_CHECK_EQUAL((size_t)linesSeen.count(*i), (size_t)3);
}
//try again, with mpi
OnlineTrainingCorpus corpus2(sourceFile,0,20,80,3,0);
//Each line should be seen 4 times, mix after every batch
linesSeen.clear();
while(corpus2.HasMore()) {
vector<string> lines;
vector<size_t> lineNumbers;
bool shouldMix;
corpus2.GetNextBatch(&lines,&lineNumbers,&shouldMix);
BOOST_CHECK(shouldMix);
linesSeen.insert(lineNumbers.begin(), lineNumbers.end());
}
BOOST_CHECK_EQUAL(linesSeen.size(), (size_t)80);
for (multiset<size_t>::iterator i = linesSeen.begin(); i != linesSeen.end(); ++i) {
BOOST_CHECK_EQUAL((size_t)linesSeen.count(*i), (size_t)4);
}
}
BOOST_AUTO_TEST_SUITE_END()

245
josiah/TrainingSource.cpp Normal file
View File

@ -0,0 +1,245 @@
#include "TrainingSource.h"
#ifdef MPI_ENABLED
#include <boost/mpi/communicator.hpp>
#include <boost/mpi/collectives.hpp>
#include "MpiDebug.h"
#endif
#include <cassert>
#include "Optimizer.h"
#include "Decoder.h"
#include "WeightManager.h"
using namespace std;
using namespace Moses;
#ifdef MPI_ENABLED
namespace mpi=boost::mpi;
#endif
namespace Josiah {
ExpectedBleuTrainer::ExpectedBleuTrainer(
int r,
int s,
int bsize,
vector<string>* sents,
unsigned int rseed,
bool randomize,
Optimizer* o,
int wt_dump_freq,
std::string wt_dump_stem)
: rank(r),
size(s),
batch_size(bsize),
corpus(),
keep_going(true),
order(batch_size),
rng(rseed),
dist(0, sents->size() - 1),
draw(rng, dist),
randomize_batches(randomize),
optimizer(o),
total_ref_len(),
total_exp_len(),
total_exp_gain(),
total_unreg_exp_gain(),
weight_dump_freq(wt_dump_freq),
weight_dump_stem(wt_dump_stem){
if (rank >= batch_size) keep_going = false;
corpus.swap(*sents);
int esize = min(batch_size, size);
//cerr << "esize: " << esize << endl;
int sents_per_batch = batch_size / esize;
cur = cur_start = sents_per_batch * rank;
//cerr << "sents_per_batch: " << sents_per_batch << endl;
cur_end = min((int)corpus.size(), sents_per_batch * (rank + 1));
if (rank == size - 1) cur_end = batch_size;
cerr << rank << "/" << size << ": cur_start=" << cur_start << " cur_end=" << cur_end << endl;
assert(cur_end >= cur_start);
tlc = 0;
ReserveNextBatch();
}
void ExpectedBleuTrainer::ReserveNextBatch() {
if (rank == 0) {
if (randomize_batches) {
for (unsigned int i = 0; i < order.size(); ++i, ++tlc)
order[i] = draw();
} else {
for (unsigned int i = 0; i < order.size(); ++i, ++tlc)
order[i] = tlc % corpus.size();
}
}
#ifdef MPI_ENABLED
mpi::broadcast(mpi::communicator(), order, 0);
// if (MPI_SUCCESS != MPI_Bcast(&order[0], order.size(), MPI_INT, 0, MPI_COMM_WORLD))
// MPI_Abort(MPI_COMM_WORLD,1);
#endif
}
bool ExpectedBleuTrainer::HasMore() const {
return keep_going && (cur < cur_end);
}
void ExpectedBleuTrainer::GetSentence(string* sentence, int* lineno) {
assert(static_cast<unsigned int>(cur) < order.size());
if (lineno) *lineno = order[cur];
*sentence = corpus[order[cur++]];
}
void ExpectedBleuTrainer::IncorporateGradient(
const FValue trans_len,
const FValue ref_len,
const FValue exp_gain,
const FValue unreg_exp_gain,
const FVector& grad) {
gradient += grad;
total_exp_gain += exp_gain;
total_unreg_exp_gain += unreg_exp_gain;
total_ref_len += ref_len;
total_exp_len += trans_len;
if (cur == cur_end) {
FVector& weights = WeightManager::instance().get();
FValue tg = 0, trl = 0, tel = 0, tgunreg = 0;
#ifdef MPI_ENABLED
FVector sum_gradient;
mpi::communicator world;
MPI_VERBOSE(1,"Reducing gradient, gradient = " << gradient << " rank = " << rank << endl);
mpi::reduce(world, gradient, sum_gradient, FVectorPlus(),0);
if (rank == 0) MPI_VERBOSE(1, "Reduced gradient = " << sum_gradient << endl;)
mpi::reduce(world, total_exp_gain, tg, std::plus<float>(),0);
mpi::reduce(world, total_unreg_exp_gain, tgunreg, std::plus<float>(),0);
mpi::reduce(world, total_ref_len, trl, std::plus<float>(),0);
mpi::reduce(world, total_exp_len, tel, std::plus<float>(),0);
/* if (MPI_SUCCESS != MPI_Reduce(const_cast<float*>(&gradient.data()[0]), &rcv_grad[0], w.size(), MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
if (MPI_SUCCESS != MPI_Reduce(&total_exp_gain, &tg, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
if (MPI_SUCCESS != MPI_Reduce(&total_unreg_exp_gain, &tgunreg, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
if (MPI_SUCCESS != MPI_Reduce(&total_ref_len, &trl, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
if (MPI_SUCCESS != MPI_Reduce(&total_exp_len, &tel, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
*/
if (rank == 0) {
gradient = sum_gradient;
}
#else
//rcv_grad = gradient.data();
tg = total_exp_gain;
tgunreg = total_exp_gain;
trl = total_ref_len;
tel = total_exp_len;
#endif
if (rank == 0) {
tg /= batch_size;
tgunreg /= batch_size;
gradient /=batch_size;
cerr << "TOTAL EXPECTED GAIN: " << tg << " (batch size = " << batch_size << ")\n";
cerr << "TOTAL UNREGULARIZED EXPECTED GAIN: " << tgunreg << " (batch size = " << batch_size << ")\n";
cerr << "EXPECTED LENGTH / REF LENGTH: " << tel << '/' << trl << " (" << (tel / trl) << ")\n";
optimizer->Optimize(tg, weights, gradient, &weights);
if (optimizer->HasConverged()) keep_going = false;
}
int iteration = optimizer->GetIteration();
#ifdef MPI_ENABLED
int kg = keep_going;
mpi::broadcast(world,weights,0);
mpi::broadcast(world,kg,0);
mpi::broadcast(world,iteration,0);
ReserveNextBatch();
world.barrier();
/* if (MPI_SUCCESS != MPI_Bcast(const_cast<float*>(&weights.data()[0]), weights.data().size(), MPI_FLOAT, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
if (MPI_SUCCESS != MPI_Bcast(&kg, 1, MPI_INT, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
if (MPI_SUCCESS != MPI_Bcast(&iteration, 1, MPI_INT, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
ReserveNextBatch();
if (MPI_SUCCESS != MPI_Barrier(MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
*/
keep_going = kg;
optimizer->SetIteration(iteration);
#endif
cur = cur_start;
gradient.clear();
total_exp_gain = 0;
total_unreg_exp_gain = 0;
total_exp_len = 0;
total_ref_len = 0;
if (weight_dump_freq > 0 && rank == 0 && iteration > 0 && (iteration % weight_dump_freq) == 0) {
stringstream s;
s << weight_dump_stem;
s << "_";
s << iteration;
string weight_file = s.str();
cerr << "Dumping weights to " << weight_file << endl;
WeightManager::instance().dump(weight_file);
}
}
}
void ExpectedBleuTrainer::IncorporateCorpusGradient(
const FValue trans_len,
const FValue ref_len,
const FValue exp_gain,
const FValue unreg_exp_gain,
const FVector& grad) {
if (cur == cur_end) {
FVector& weights = WeightManager::instance().get();
if (rank == 0) {
cerr << "TOTAL EXPECTED GAIN: " << exp_gain << " (batch size = " << batch_size << ")\n";
cerr << "TOTAL UNREGULARIZED EXPECTED GAIN: " << unreg_exp_gain << " (batch size = " << batch_size << ")\n";
cerr << "EXPECTED LENGTH / REF LENGTH: " << trans_len << '/' << ref_len << " (" << (trans_len / ref_len) << ")\n";
optimizer->Optimize(exp_gain, weights, grad, &weights);
if (optimizer->HasConverged()) keep_going = false;
}
int kg = keep_going;
int iteration = optimizer->GetIteration();
#ifdef MPI_ENABLED
mpi::communicator world;
mpi::broadcast(world, weights,0);
mpi::broadcast(world,kg,0);
mpi::broadcast(world,iteration,0);
ReserveNextBatch();
world.barrier();
/*
if (MPI_SUCCESS != MPI_Bcast(const_cast<float*>(&weights.data()[0]), weights.data().size(), MPI_FLOAT, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
if (MPI_SUCCESS != MPI_Bcast(&kg, 1, MPI_INT, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
if (MPI_SUCCESS != MPI_Bcast(&iteration, 1, MPI_INT, 0, MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
ReserveNextBatch();
if (MPI_SUCCESS != MPI_Barrier(MPI_COMM_WORLD)) MPI_Abort(MPI_COMM_WORLD,1);
*/
keep_going = kg;
optimizer->SetIteration(iteration);
#endif
cur = cur_start;
if (weight_dump_freq > 0 && rank == 0 && iteration > 0 && (iteration % weight_dump_freq) == 0) {
stringstream s;
s << weight_dump_stem;
s << "_";
s << iteration;
string weight_file = s.str();
WeightManager::instance().dump(weight_file);
}
}
}
}

74
josiah/TrainingSource.h Normal file
View File

@ -0,0 +1,74 @@
#pragma once
#include <string>
#include <vector>
#include <boost/random/variate_generator.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_smallint.hpp>
#include "FeatureVector.h"
#include "InputSource.h"
namespace Josiah {
class Decoder;
class Optimizer;
class ExpectedBleuTrainer : public InputSource {
public:
ExpectedBleuTrainer(
int r, // MPI rank, or 0 if not MPI
int s, // MPI size, or 1 if not MPI
int bsize, // batch size
std::vector<std::string>* sents, // development corpus
unsigned int rseed,
bool randomize,
Optimizer* o,
int wt_dump_freq,
std::string wt_dump_stem);
void ReserveNextBatch();
virtual bool HasMore() const;
virtual void GetSentence(std::string* sentence, int* lineno);
void IncorporateGradient(
const Moses::FValue trans_len,
const Moses::FValue ref_len,
const Moses::FValue exp_gain,
const Moses::FValue unreg_exp_gain,
const Moses::FVector& grad);
void IncorporateCorpusGradient(
const Moses::FValue trans_len,
const Moses::FValue ref_len,
const Moses::FValue exp_gain,
const Moses::FValue unreg_exp_gain,
const Moses::FVector& grad);
int GetCurr() { return cur;}
int GetCurrEnd() { return cur_end;}
private:
int rank, size, batch_size;
int cur, cur_start;
int cur_end;
std::vector<std::string> corpus;
bool keep_going;
Moses::FVector gradient;
std::vector<int> order;
boost::mt19937 rng;
boost::uniform_smallint<int> dist;
boost::variate_generator<boost::mt19937, boost::uniform_smallint<int> > draw;
bool randomize_batches;
Optimizer* optimizer;
Moses::FValue total_ref_len;
Moses::FValue total_exp_len;
Moses::FValue total_exp_gain;
Moses::FValue total_unreg_exp_gain;
int tlc;
int weight_dump_freq;
std::string weight_dump_stem;
};
}

303
josiah/TranslationDelta.cpp Normal file
View File

@ -0,0 +1,303 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <boost/lambda/lambda.hpp>
#include "TranslationDelta.h"
#include "Derivation.h"
#include "FeatureFunction.h"
#include "Gibbler.h"
#include "ScoreComponentCollection.h"
#include "DummyScoreProducers.h"
#include "GibbsOperator.h"
using namespace std;
namespace Josiah {
void TranslationDelta::getNewSentenceSingle(const TranslationOption* option, const WordsRange& targetSegment, vector<const Factor*>& newSentence) const{
const Phrase& targetPhrase = option->GetTargetPhrase();
size_t start = targetSegment.GetStartPos();
for (size_t i = 0; i < start; ++i) {
const Factor* factor =getSample().GetTargetWords()[i][0];
newSentence.push_back(factor);
}
//fill in the target phrase
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
newSentence.push_back(targetPhrase.GetWord(i)[0]);
}
//fill in the end of the sentence
size_t end = targetSegment.GetEndPos() + 1;
for (size_t i = end; i < getSample().GetTargetWords().size(); ++i) {
newSentence.push_back(getSample().GetTargetWords()[i][0]);
}
}
void TranslationDelta::initScoresSingleUpdate(const Sample& s, const TranslationOption* option, const TargetGap& gap) {
//don't worry about reordering because they don't change
// extra features
for (FeatureFunctionVector::const_iterator i=s.GetFeatureFunctions().begin(); i<s.GetFeatureFunctions().end(); ++i) {
(*i)->doSingleUpdate(option,gap,m_scores);
}
updateWeightedScore();
VERBOSE(2, "Single Update: Scores " << m_scores << endl);
VERBOSE(2,"Single Update: Total score is " << m_score << endl);
}
//Note that left and right refer to the target order.
void TranslationDelta::initScoresContiguousPairedUpdate(const Sample& s, const TranslationOption* leftOption,
const TranslationOption* rightOption, const TargetGap& gap) {
//don't worry about reordering because they don't change
for (FeatureFunctionVector::const_iterator i=s.GetFeatureFunctions().begin(); i<s.GetFeatureFunctions().end(); ++i) {
(*i)->doContiguousPairedUpdate(leftOption,rightOption,gap,m_scores);
}
}
void TranslationDelta::initScoresDiscontiguousPairedUpdate(const Sample& s, const TranslationOption* leftOption,
const TranslationOption* rightOption, const TargetGap& leftGap,
const TargetGap& rightGap)
{
for (FeatureFunctionVector::const_iterator i=s.GetFeatureFunctions().begin(); i<s.GetFeatureFunctions().end(); ++i) {
(*i)->doDiscontiguousPairedUpdate(leftOption, rightOption, leftGap, rightGap,m_scores);
}
}
void TranslationDelta::getNewSentenceContiguousPaired(const TranslationOption* leftOption, const TranslationOption* rightOption, const WordsRange* leftSegment, const WordsRange* rightTargetSegment, vector<const Factor*>& newSentence) const{
//Create the segment
WordsRange targetSegment = *leftSegment;
targetSegment.SetEndPos(rightTargetSegment->GetEndPos());
//create the phrase
Phrase targetPhrase(leftOption->GetTargetPhrase());
targetPhrase.Append(rightOption->GetTargetPhrase());
//set the indices for start and end positions
size_t leftStartPos(0);
size_t leftEndPos(leftOption->GetTargetPhrase().GetSize());
size_t rightStartPos(leftEndPos);
size_t rightEndPos(targetPhrase.GetSize());
size_t start = targetSegment.GetStartPos();
for (size_t i = 0; i < start; ++i) {
newSentence.push_back(getSample().GetTargetWords()[i][0]);
}
//fill in the target phrase
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
newSentence.push_back(targetPhrase.GetWord(i)[0]);
}
//fill in the end of the sentence
size_t end = targetSegment.GetEndPos() + 1;
for (size_t i = end; i < getSample().GetTargetWords().size(); ++i) {
newSentence.push_back(getSample().GetTargetWords()[i][0]);
}
}
void TranslationDelta::getNewSentenceDiscontiguousPaired(const TranslationOption* leftOption, const TranslationOption* rightOption, const WordsRange* leftSegment, const WordsRange* rightSegment, vector<const Factor*>& newSentence) const{
const Phrase& leftTgtPhrase = leftOption->GetTargetPhrase();
const Phrase& rightTgtPhrase = rightOption->GetTargetPhrase();
VERBOSE(2, "Sample : " << Josiah::Derivation(m_sample) << endl);
VERBOSE(2, *leftSegment << " " << *rightSegment << endl);
size_t start = leftSegment->GetStartPos();
for (size_t i = 0; i < start; ++i) {
newSentence.push_back(getSample().GetTargetWords()[i][0]);
}
//fill in the left target phrase
for (size_t i = 0; i < leftTgtPhrase.GetSize(); ++i) {
newSentence.push_back(leftTgtPhrase.GetWord(i)[0]);
}
for (size_t i = leftSegment->GetEndPos()+ 1 ; i < rightSegment->GetStartPos(); ++i) {
newSentence.push_back(getSample().GetTargetWords()[i][0]);
}
//fill in the right target phrase
for (size_t i = 0; i < rightTgtPhrase.GetSize(); ++i) {
newSentence.push_back(rightTgtPhrase.GetWord(i)[0]);
}
//fill in the end of the sentence
size_t end = rightSegment->GetEndPos() + 1;
for (size_t i = end; i < getSample().GetTargetWords().size(); ++i) {
newSentence.push_back(getSample().GetTargetWords()[i][0]);
}
}
void TranslationDelta::getNewSentencePaired(const TranslationOption* leftOption, const TranslationOption* rightOption, const WordsRange& leftTargetSegment, const WordsRange& rightTargetSegment, vector<const Factor*>& newSentence) const{
WordsRange* leftSegment = const_cast<WordsRange*> (&leftTargetSegment);
WordsRange* rightSegment = const_cast<WordsRange*> (&rightTargetSegment);
if (rightTargetSegment < leftTargetSegment) {
leftSegment = const_cast<WordsRange*> (&rightTargetSegment);
rightSegment = const_cast<WordsRange*>(&leftTargetSegment);
}
bool contiguous = (leftSegment->GetEndPos() + 1 == rightSegment->GetStartPos()) ;
if (contiguous)
getNewSentenceContiguousPaired(leftOption, rightOption, leftSegment, rightSegment, newSentence);
else
getNewSentenceDiscontiguousPaired(leftOption, rightOption, leftSegment, rightSegment, newSentence);
}
void TranslationDelta::updateWeightedScore() {
//weight the scores
m_score = inner_product(m_scores, WeightManager::instance().get());
VERBOSE(2, "Scores " << m_scores << endl);
VERBOSE(2,"Total score is " << m_score << endl);
}
TranslationUpdateDelta::TranslationUpdateDelta(Sample& sample, const TranslationOption* option ,const TargetGap& gap) :
TranslationDelta(sample), m_option(option), m_gap(gap) {
initScoresSingleUpdate(m_sample, m_option,m_gap);
}
void TranslationUpdateDelta::apply(const TranslationDelta& noChangeDelta) {
VERBOSE(3, "Applying Translation Update Delta" << endl);
m_scores -= noChangeDelta.getScores();
getSample().ChangeTarget(*m_option,m_scores);
}
void TranslationUpdateDelta::getNewSentence(vector<const Factor*>& newSentence) const{
getNewSentenceSingle(m_option, m_gap.segment, newSentence);
}
MergeDelta::MergeDelta(Sample& sample, const TranslationOption* option, const TargetGap& gap) :
TranslationDelta(sample), m_option(option), m_gap(gap) {
initScoresSingleUpdate(m_sample, m_option,m_gap);
}
void MergeDelta::apply(const TranslationDelta& noChangeDelta) {
VERBOSE(3, "Applying MergeDelta" << endl);
m_scores -= noChangeDelta.getScores();
getSample().MergeTarget(*m_option,m_scores);
}
void MergeDelta::getNewSentence(vector<const Factor*>& newSentence) const {
getNewSentenceSingle(m_option, m_gap.segment, newSentence);
}
PairedTranslationUpdateDelta::PairedTranslationUpdateDelta(Sample& sample,
const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap) : TranslationDelta(sample), m_leftOption(leftOption),
m_rightOption(rightOption), m_leftGap(leftGap), m_rightGap(rightGap) {
VERBOSE(2, "Left Target phrase: " << m_leftOption->GetTargetPhrase() << endl);
VERBOSE(2, "Right Target phrase: " << m_rightOption->GetTargetPhrase() << endl);
VERBOSE(2, "Left Target segment: " << m_leftGap.segment << endl);
VERBOSE(2, "Right Target segment: " << m_rightGap.segment << endl);
assert(m_leftGap.segment < m_rightGap.segment);
if (m_leftGap.segment.GetEndPos() + 1 == m_rightGap.segment.GetStartPos()) {
TargetGap gap(m_leftGap.leftHypo, m_rightGap.rightHypo,
WordsRange(m_leftGap.segment.GetStartPos(), m_rightGap.segment.GetEndPos()));
initScoresContiguousPairedUpdate(m_sample, m_leftOption,m_rightOption, gap);
} else {
initScoresDiscontiguousPairedUpdate(m_sample,m_leftOption,m_rightOption,m_leftGap,m_rightGap);
}
updateWeightedScore();
VERBOSE(2, "Left Target segment: " << m_leftGap.segment << endl);
VERBOSE(2, "Right Target segment: " << m_rightGap.segment << endl);
}
void PairedTranslationUpdateDelta::apply(const TranslationDelta& noChangeDelta) {
VERBOSE(3, "Applying Paired Translation Update Delta" << endl);
m_scores -= noChangeDelta.getScores();
getSample().ChangeTarget(*m_leftOption,m_scores);
FVector emptyScores;
getSample().ChangeTarget(*m_rightOption,emptyScores);
}
void PairedTranslationUpdateDelta::getNewSentence(vector<const Factor*>& newSentence) const {
getNewSentencePaired(m_leftOption, m_rightOption, m_leftGap.segment, m_rightGap.segment, newSentence);
}
SplitDelta::SplitDelta(Sample& sample, const TranslationOption* leftOption,
const TranslationOption* rightOption, const TargetGap& gap) : TranslationDelta(sample),
m_leftOption(leftOption), m_rightOption(rightOption), m_gap(gap){
VERBOSE(2, "Target phrase: " << m_leftOption->GetTargetPhrase() << " " << m_rightOption->GetTargetPhrase() << endl);
VERBOSE(2, "Target segment: " << m_gap.segment << endl);
initScoresContiguousPairedUpdate(m_sample, m_leftOption, m_rightOption, m_gap);
updateWeightedScore();
}
void SplitDelta::apply(const TranslationDelta& noChangeDelta) {
m_scores -= noChangeDelta.getScores();
getSample().SplitTarget(*m_leftOption,*m_rightOption,m_scores);
}
void SplitDelta::getNewSentence(vector<const Factor*>& newSentence) const {
getNewSentenceContiguousPaired(m_leftOption, m_rightOption, &(m_gap.segment), &(m_gap.segment), newSentence);
}
void FlipDelta::apply(const TranslationDelta& noChangeDelta) {
VERBOSE(3, "Applying Flip Delta" << endl);
m_scores -= noChangeDelta.getScores();
//cerr << "m_prevTgtHypo: " << *m_prevTgtHypo << endl;
//cerr << "m_nextTgtHypo: " << *m_nextTgtHypo << endl;
getSample().FlipNodes(*m_leftTgtOption, *m_rightTgtOption, m_prevTgtHypo, m_nextTgtHypo, m_scores);
}
FlipDelta::FlipDelta(Sample& sample,
const TranslationOption* leftTgtOption ,const TranslationOption* rightTgtOption,
const TargetGap& leftGap, const TargetGap& rightGap) :
TranslationDelta(sample),
m_leftTgtOption(leftTgtOption), m_rightTgtOption(rightTgtOption), m_leftGap(leftGap), m_rightGap(rightGap),
m_prevTgtHypo(const_cast<Hypothesis*> (leftGap.leftHypo)), m_nextTgtHypo(const_cast<Hypothesis*> (rightGap.rightHypo))
{
for (FeatureFunctionVector::const_iterator i=sample.GetFeatureFunctions().begin(); i<sample.GetFeatureFunctions().end(); ++i) {
(*i)->doFlipUpdate(leftTgtOption, rightTgtOption, leftGap, rightGap,m_scores);
}
updateWeightedScore();
VERBOSE(2, "Flip delta: Scores " << m_scores << endl);
VERBOSE(2,"Flip delta: Total score is " << m_score << endl);
//cerr << "Creating FlipDelta scores = " << m_scores << " total = " << m_score << endl;
}
void FlipDelta::getNewSentence(vector<const Factor*>& newSentence)const {
getNewSentencePaired(m_leftTgtOption, m_rightTgtOption, m_leftGap.segment, m_rightGap.segment, newSentence);
}
}//namespace

221
josiah/TranslationDelta.h Normal file
View File

@ -0,0 +1,221 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <utility>
#include <boost/shared_ptr.hpp>
#include "DummyScoreProducers.h"
#include "FeatureFunction.h"
#include "FeatureVector.h"
#include "WeightManager.h"
namespace Moses {
class TranslationOption;
class TranslationOptionCollection;
class Hypothesis;
class Factor;
class WordsRange;
class Word;
}
using namespace Moses;
namespace Josiah {
class Sample;
class GibbsOperator;
/**
* This class hierarchy represents the possible changes in the translation effected
* by the gibbs operators.
**/
class TranslationDelta {
public:
TranslationDelta(Sample& sample): m_score(-1e6), m_sample(sample) {}
/**
Get the absolute score of this delta
**/
double getScore() const { return m_score;}
/**
* Apply to the sample
**/
virtual void apply(const TranslationDelta& noChangeDelta) = 0;
/**
For gain calculation
**/
virtual void getNewSentence(std::vector<const Factor*>& newSentence) const = 0;
Sample& getSample() const {return m_sample;}
virtual ~TranslationDelta() {}
void updateWeightedScore();
const FVector& getScores() const { return m_scores;}
void setScores(const FVector& scores) { m_scores = scores;}
protected:
FVector m_scores;
FValue m_score;
Sample& m_sample;
void getNewSentenceSingle(const TranslationOption* option, const WordsRange& targetSegment, std::vector<const Factor*>& newSentence) const;
void getNewSentencePaired(const TranslationOption* leftOption, const TranslationOption* rightOption, const WordsRange& leftTargetSegment, const WordsRange& rightTargetSegment, std::vector<const Factor*>& newSentence) const;
void getNewSentenceContiguousPaired(const TranslationOption* leftOption, const TranslationOption* rightOption, const WordsRange* leftSegment, const WordsRange* rightTargetSegment, std::vector<const Factor*>& newSentence) const;
void getNewSentenceDiscontiguousPaired(const TranslationOption* leftOption, const TranslationOption* rightOption, const WordsRange* leftSegment, const WordsRange* rightTargetSegment, std::vector<const Factor*>& newSentence) const;
/**
* Initialise the scores for the case where only one source-target pair needs to be considered.
**/
void initScoresSingleUpdate(const Sample&, const TranslationOption* option, const TargetGap& gap);
/**
* Initialise the scores for the case where two (target contiguous) source-target pairs need to be considered.
* Note that left and right refers to the target order.
**/
void initScoresContiguousPairedUpdate(const Sample&, const TranslationOption* leftOption,
const TranslationOption* rightOption, const TargetGap& gap);
/** Discontiguous version of above. */
void initScoresDiscontiguousPairedUpdate(const Sample&, const TranslationOption* leftOption,
const TranslationOption* rightOption, const TargetGap& leftGap,
const TargetGap& rightGap);
};
/**
* An update that only changes a single source/target phrase pair. May change length of target
**/
class TranslationUpdateDelta : public virtual TranslationDelta {
public:
TranslationUpdateDelta(Sample& sample, const TranslationOption* option , const TargetGap& gap);
virtual void apply(const TranslationDelta& noChangeDelta);
const TranslationOption* getOption() const {return m_option;}
const TargetGap& getGap() const { return m_gap;}
virtual void getNewSentence(std::vector<const Factor*>& newSentence) const;
private:
const TranslationOption* m_option;
TargetGap m_gap;
};
/**
* An update that merges two source phrases and their corresponding target phrases.
**/
class MergeDelta : public virtual TranslationDelta {
public:
/**
* targetWords - the words in the current target sentence
* option - the source/target phrase to go into the merged segment
* targetSegment - the location of the target segment
**/
MergeDelta(Sample& sample, const TranslationOption* option, const TargetGap& gap);
virtual void apply(const TranslationDelta& noChangeDelta);
const TranslationOption* getOption() const {return m_option;}
const TargetGap& getGap() const { return m_gap;}
virtual void getNewSentence(std::vector<const Factor*>& newSentence) const;
private:
const TranslationOption* m_option;
TargetGap m_gap;
};
/**
* Like TranslationUpdateDelta, except that it updates a pair of source/target phrase pairs.
**/
class PairedTranslationUpdateDelta : public virtual TranslationDelta {
public:
/** Options and gaps in target order */
PairedTranslationUpdateDelta(Sample& sample,
const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& leftGap, const TargetGap& rightGap);
virtual void apply(const TranslationDelta& noChangeDelta);
const TranslationOption* getLeftOption() const {return m_leftOption;}
const TranslationOption* getRightOption() const {return m_rightOption;}
const TargetGap& getLeftGap() const { return m_leftGap;}
const TargetGap& getRightGap() const { return m_rightGap;}
virtual void getNewSentence(std::vector<const Factor*>& newSentence) const;
private:
const TranslationOption* m_leftOption;
const TranslationOption* m_rightOption;
TargetGap m_leftGap;
TargetGap m_rightGap;
};
/**
* Updates the sample by splitting a source phrase and its corresponding target phrase, choosing new options.
**/
class SplitDelta : public virtual TranslationDelta {
public:
/** Options and gaps in target order */
SplitDelta(Sample& sample, const TranslationOption* leftOption, const TranslationOption* rightOption,
const TargetGap& gap);
virtual void apply(const TranslationDelta& noChangeDelta);
const TranslationOption* getLeftOption() const {return m_leftOption;}
const TranslationOption* getRightOption() const {return m_rightOption;}
const TargetGap& getGap() const { return m_gap;}
virtual void getNewSentence(std::vector<const Factor*>& newSentence) const;
private:
const TranslationOption* m_leftOption;
const TranslationOption* m_rightOption;
TargetGap m_gap;
};
/**
* Switch the translations on the target side.
**/
class FlipDelta : public virtual TranslationDelta {
public:
/** Options and gaps in target order */
FlipDelta(Sample& sample, const TranslationOption* leftTgtOption, const TranslationOption* rightTgtOption,
const TargetGap& leftGap, const TargetGap& rightGap);
virtual void apply(const TranslationDelta& noChangeDelta);
const TranslationOption* getLeftOption() const {return m_leftTgtOption;}
const TranslationOption* getRightOption() const {return m_rightTgtOption;}
const TargetGap& getLeftGap() const { return m_leftGap;}
const TargetGap& getRightGap() const { return m_rightGap;}
virtual void getNewSentence(std::vector<const Factor*>& newSentence) const;
private:
const TranslationOption* m_leftTgtOption;
const TranslationOption* m_rightTgtOption;
TargetGap m_leftGap;
TargetGap m_rightGap;
Hypothesis* m_prevTgtHypo;
Hypothesis* m_nextTgtHypo;
};
typedef boost::shared_ptr<TranslationDelta> TDeltaHandle;
typedef std::vector<TDeltaHandle> TDeltaVector;
} //namespace

129
josiah/Truncate.cpp Normal file
View File

@ -0,0 +1,129 @@
/**
* Truncate a phrase table to the top n translation options, given an
* input corpus.
**/
#include <iostream>
#include <iomanip>
#include <fstream>
#include <set>
#include <boost/program_options.hpp>
#include "Decoder.h"
#include "InputSource.h"
#include "DummyScoreProducers.h"
#include "ScoreProducer.h"
#include "StaticData.h"
using namespace std;
using namespace Josiah;
using namespace Moses;
namespace po = boost::program_options;
int main(int argc, char** argv) {
size_t toptionLimit;
string inputFile;
string configFile;
bool help;
po::options_description visible("Allowed options");
visible.add_options()
("help",po::value( &help )->zero_tokens()->default_value(false), "Print this help message and exit")
("toption-limit,t", po::value<size_t>(&toptionLimit)->default_value(20), "Number of translation options to prune to");
po::options_description hidden("Hidden options");
hidden.add_options()
("config-file", po::value<string>(&configFile), "config file")
("input-file", po::value<string>(&inputFile), "input file");
po::positional_options_description p;
p.add("config-file", 1);
p.add("input-file", 1);
po::options_description cmdline_options;
cmdline_options.add(visible).add(hidden);
po::variables_map vm;
po::store(po::command_line_parser(argc,argv).
options(cmdline_options).positional(p).run(), vm);
po::notify(vm);
if (inputFile.empty() || configFile.empty()) help = true;
if (help) {
std::cout << "Usage: " + string(argv[0]) + " [options] config-file input-file" << std::endl;
std::cout << visible << std::endl;
return 0;
}
cerr << "Truncating the model " << configFile << " using the input file " << inputFile << endl;
//set up moses
vector<string> extraArgs;
extraArgs.push_back("-ttable-limit");
ostringstream toptionLimitStr;
toptionLimitStr << toptionLimit;
extraArgs.push_back(toptionLimitStr.str());
extraArgs.push_back("-persistent-cache-size");
extraArgs.push_back("0");
initMoses(configFile,0,extraArgs);
//store source phrases already output
set<Phrase> sourcePhrases;
ifstream in(inputFile.c_str());
if (!in) {
cerr << "Unable to open input file " << inputFile << endl;
return 1;
}
//only print the 1st factor
vector<FactorType> factors;
factors.push_back(0);
//Assume single phrase feature
StaticData& staticData =
const_cast<StaticData&>(StaticData::Instance());
PhraseDictionaryFeature* ptable = StaticData::Instance().GetTranslationSystem
(TranslationSystem::DEFAULT).GetPhraseDictionaries()[0];
//To detect unknown words
const ScoreProducer* uwp = staticData. GetTranslationSystem
(TranslationSystem::DEFAULT).GetUnknownWordPenaltyProducer();
string line;
while (getline(in,line)) {
//cerr << line << endl;
TranslationHypothesis translation(line);
size_t length = translation.getWords().size();
size_t maxPhraseSize = staticData.GetMaxPhraseLength();
for (size_t start = 0; start < length; ++start) {
for (size_t end = start; end < start + maxPhraseSize && end < length; ++end) {
TranslationOptionList& options = translation.getToc()->GetTranslationOptionList(start,end);
if (!options.size()) continue;
const Phrase* sourcePhrase = options.Get(0)->GetSourcePhrase();
if (sourcePhrases.find(*sourcePhrase) != sourcePhrases.end()) continue;
if (options.Get(0)->GetScoreBreakdown().GetScoreForProducer(uwp)) continue;
sourcePhrases.insert(*sourcePhrase);
for (size_t i = 0; i < options.size(); ++i) {
const TranslationOption* option = options.Get(i);
cout << sourcePhrase->GetStringRep(factors);
cout << " ||| ";
cout << option->GetTargetPhrase().GetStringRep(factors);
cout << " |||";
vector<float> scores = option->GetScoreBreakdown().GetScoresForProducer(ptable);
for (size_t j = 0; j < scores.size(); ++j) {
cout << " " << exp(scores[j]);
}
cout << " ||| |||";
cout << endl;
}
}
}
}
}

222
josiah/Utils.cpp Normal file
View File

@ -0,0 +1,222 @@
#include <boost/program_options.hpp>
#include "Utils.h"
#include "Pos.h"
#include "Dependency.h"
#include "DiscriminativeLMFeature.h"
#include "DistortionPenaltyFeature.h"
#include "LanguageModelFeature.h"
#include "LexicalReorderingFeature.h"
#include "MetaFeature.h"
#include "ParenthesisFeature.h"
#include "PhraseFeature.h"
#include "PhraseBoundaryFeature.h"
#include "PhrasePairFeature.h"
#include "PosProjectionFeature.h"
#include "RandomFeature.h"
#include "ReorderingFeature.h"
#include "SourceToTargetRatio.h"
#include "StatelessFeature.h"
#include "WordPenaltyFeature.h"
using namespace std;
namespace po = boost::program_options;
namespace Josiah {
void configure_features_from_file(const std::string& filename, FeatureVector& fv, bool disableUWP, FVector& coreWeights){
//Core features
fv.push_back(FeatureHandle(new WordPenaltyFeature()));
if (!disableUWP) {
fv.push_back(FeatureHandle(new UnknownWordPenaltyFeature()));
}
const TranslationSystem& system = StaticData::Instance().GetTranslationSystem
(TranslationSystem::DEFAULT);
vector<PhraseDictionaryFeature*> phraseTables = system.GetPhraseDictionaries();
for (size_t i = 0; i < phraseTables.size(); ++i) {
fv.push_back(FeatureHandle(new PhraseFeature(phraseTables[i],i)));
}
const LMList& lms = system.GetLanguageModels();
for (LMList::const_iterator i = lms.begin(); i != lms.end(); ++i) {
fv.push_back(FeatureHandle(new LanguageModelFeature(*i)));
}
fv.push_back(FeatureHandle(new DistortionPenaltyFeature()));
const std::vector<LexicalReordering*>& reorderModels = system.GetReorderModels();
for (size_t i = 0; i < reorderModels.size(); ++i) {
fv.push_back(FeatureHandle(new LexicalReorderingFeature(reorderModels[i],i)));
}
if (filename.empty()) return;
std::cerr << "Reading extra features from " << filename << std::endl;
std::ifstream in(filename.c_str());
if (!in) {
throw std::runtime_error("Unable to open feature configuration file");
}
// todo: instead of having this function know about all required options of
// each feature, have features populate options / read variable maps /
// populate feature_vector using static functions.
po::options_description desc;
bool useVerbDiff = false;
bool useCherry = false;
bool useDepDist = false;
bool useSrcTgtRatio = false;
string posProjectBigramTags;
size_t posSourceFactor;
size_t posTargetFactor;
string parenthesisLefts;
string parenthesisRights;
size_t dependencyFactor;
bool discrimlmBigram;
string discrimlmVocab;
FactorType discrimlmFactor;
string coreWeightFile;
vector<string> msdConfig;
vector<string> msdVocab;
bool phrasePairSourceTarget;
size_t phrasePairSourceFactor;
size_t phrasePairTargetFactor;
size_t randomFeatureCount;
size_t randomFeatureScaling;
vector<string> phraseBoundarySourceFactors;
vector<string> phraseBoundaryTargetFactors;
desc.add_options()
("model1.table", "Model 1 table")
("model1.pef_column", "Column containing p(e|f) score")
("model1.pfe_column", "Column containing p(f|e) score")
("dependency.cherry", po::value<bool>(&useCherry)->default_value(false), "Use Colin Cherry's syntactic cohesiveness feature")
("dependency.distortion", po::value<bool>(&useDepDist)->default_value(false), "Use the dependency distortion feature")
("dependency.factor", po::value<size_t>(&dependencyFactor)->default_value(1), "Factor representing the dependency tree")
("pos.sourcefactor", po::value<size_t>(&posSourceFactor)->default_value(1), "Factor representing the source pos tag")
("pos.targetfactor", po::value<size_t>(&posTargetFactor)->default_value(1), "Factor representing the target pos tag")
("pos.verbdiff", po::value<bool>(&useVerbDiff)->default_value(false), "Verb difference feature")
("pos.projectbigram", po::value<string>(&posProjectBigramTags),
"Pos project bigram. Comma separated list of tags, or * for all tags.")
("srctgtratio.useFeat", po::value<bool>(&useSrcTgtRatio)->default_value(false), "Use source length to target length ratio feature")
("parenthesis.lefts", po::value<std::string>(&parenthesisLefts), "Left parentheses")
("parenthesis.rights", po::value<std::string>(&parenthesisRights), "Right parentheses")
("discrimlm.vocab", po::value<string>(&discrimlmVocab), "Vocabulary file for discriminative lms")
("discrimlm.bigram", po::value<bool>(&discrimlmBigram)->default_value(false), "Use the discriminative lm bigram feature")
("discrimlm.factor", po::value<FactorType>(&discrimlmFactor)->default_value(0), "The factor to use for the discuminative lm features")
("core.weightfile", po::value<string>(&coreWeightFile),
"Weights of core features, if they are to be combined into a single feature")
("reordering.msd", po::value<vector<string> >(&msdConfig),
"Reordering msd (monotone/swap/discontinuous) feature configuration")
("reordering.msdvocab", po::value<vector<string> > (&msdVocab),
"Vocabularies for msd features. In the form factor_id:source/target:file")
("phrasepair.sourcetarget", po::value<bool>(&phrasePairSourceTarget)->zero_tokens()->default_value(false), "Watanabe style phrase pair feature")
("phrasepair.sourcefactor", po::value<size_t>(&phrasePairSourceFactor)->default_value(0), "The source factor for the phrase pair feature")
("phrasepair.targetfactor", po::value<size_t>(&phrasePairTargetFactor)->default_value(0), "The target factor for the phrase pair feature")
("random.numvalues", po::value<size_t>(&randomFeatureCount)->default_value(0),
"The number of values for the random feature")
("random.scaling", po::value<size_t>(&randomFeatureScaling)->default_value(5),
"The scaling for the random feature")
("phraseboundary.sourcefactors", po::value<vector<string> >(&phraseBoundarySourceFactors), "Source factors used in the phrase boundary feature, with optional vocab separated by comma")
("phraseboundary.targetfactors", po::value<vector<string> >(&phraseBoundaryTargetFactors), "Target factors used in the phrase boundary feature, with optional vocab separated by comma")
;
po::variables_map vm;
po::store(po::parse_config_file(in,desc,true), vm);
notify(vm);
if (!coreWeightFile.empty()) {
cerr << "Using single feature for core features, loading weights from " << coreWeightFile << endl;
coreWeights.load(coreWeightFile);
FeatureHandle metaFeature(new MetaFeature(coreWeights,fv));
fv.clear();
fv.push_back(metaFeature);
}
if (useVerbDiff) {
//FIXME: Should be configurable
fv.push_back(FeatureHandle(new VerbDifferenceFeature(posSourceFactor,posTargetFactor)));
}
if (useCherry) {
fv.push_back(FeatureHandle(new CherrySyntacticCohesionFeature(dependencyFactor)));
}
if (useSrcTgtRatio) {
fv.push_back(FeatureHandle(new SourceToTargetRatioFeature));
}
if (useDepDist) {
fv.push_back(FeatureHandle(new DependencyDistortionFeature(dependencyFactor)));
}
if (parenthesisRights.size() > 0 || parenthesisLefts.size() > 0) {
assert(parenthesisRights.size() == parenthesisLefts.size());
fv.push_back(FeatureHandle(new ParenthesisFeature(parenthesisLefts,parenthesisRights)));
}
if (posProjectBigramTags.size()) {
fv.push_back(FeatureHandle(new PosProjectionBigramFeature(posSourceFactor,posProjectBigramTags)));
}
if (discrimlmBigram) {
fv.push_back(FeatureHandle(new DiscriminativeLMBigramFeature(discrimlmFactor,discrimlmVocab)));
}
if (msdConfig.size()) {
fv.push_back(FeatureHandle(new ReorderingFeature(msdConfig,msdVocab)));
}
if (phrasePairSourceTarget) {
fv.push_back(FeatureHandle(
new PhrasePairFeature(phrasePairSourceFactor,phrasePairTargetFactor)));
}
if (randomFeatureCount) {
fv.push_back(FeatureHandle(
new StatelessFeatureAdaptor(MosesFeatureHandle(
new Moses::RandomFeature(randomFeatureCount, randomFeatureScaling)))));
}
if (phraseBoundarySourceFactors.size() || phraseBoundaryTargetFactors.size())
{
FactorList sourceFactorIds;
vector<string> sourceVocabs;
FactorList targetFactorIds;
vector<string> targetVocabs;
for (size_t i = 0; i < phraseBoundarySourceFactors.size(); ++i) {
vector<string> tokens = Tokenize(phraseBoundarySourceFactors[i],",");
assert(tokens.size() <= 2);
sourceFactorIds.push_back(Scan<FactorType>(tokens[0]));
if (tokens.size() > 1) {
sourceVocabs.push_back(tokens[1]);
} else {
sourceVocabs.push_back("");
}
}
for (size_t i = 0; i < phraseBoundaryTargetFactors.size(); ++i) {
vector<string> tokens = Tokenize(phraseBoundaryTargetFactors[i],",");
assert(tokens.size() <= 2);
targetFactorIds.push_back(Scan<FactorType>(tokens[0]));
if (tokens.size() > 1) {
targetVocabs.push_back(tokens[1]);
} else {
targetVocabs.push_back("");
}
}
fv.push_back(FeatureHandle(
new PhraseBoundaryFeature(sourceFactorIds,targetFactorIds,
sourceVocabs,targetVocabs)));
}
in.close();
}
/*
bool ValidateAndGetLMFromName(string featsName, LanguageModel* &lm) {
const ScoreIndexManager& scoreIndexManager = StaticData::Instance().GetScoreIndexManager();
size_t numScores = scoreIndexManager.GetTotalNumberOfScores();
for (size_t i = 0; i < numScores; ++i) {
if (scoreIndexManager.GetFeatureName(i) == featsName) {
const ScoreProducer* scoreProducer = scoreIndexManager.GetScoreProducer(i);
lm = static_cast<LanguageModel*>(const_cast<ScoreProducer*>(scoreProducer));
return true;
}
}
return false;
}*/
}

30
josiah/Utils.h Normal file
View File

@ -0,0 +1,30 @@
#pragma once
#include <vector>
#include "Timer.h"
#include "FeatureFunction.h"
namespace Moses {
class LanguageModel;
}
namespace Josiah {
/**
* Wrap moses timer to give a way to no-op it.
**/
class GibbsTimer {
public:
GibbsTimer() : m_doTiming(false) {}
void on() {m_doTiming = true; m_timer.start("TIME: Starting timer");}
void check(const std::string& msg) {if (m_doTiming) m_timer.check(std::string("TIME:" + msg).c_str());}
private:
Timer m_timer;
bool m_doTiming;
} ;
void configure_features_from_file(const std::string& filename, FeatureVector& fv, bool disableUWP, FVector& coreWeights);
// bool ValidateAndGetLMFromName(std::string featsName, Moses::LanguageModel* &lm);
}

67
josiah/WeightManager.cpp Normal file
View File

@ -0,0 +1,67 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <cassert>
#include <stdexcept>
#include "StaticData.h"
#include "WeightManager.h"
using namespace Moses;
using namespace std;
namespace Josiah {
auto_ptr<WeightManager> WeightManager::s_instance;
void WeightManager::init(const std::string& weightsFile) {
init();
s_instance->m_weights.load(weightsFile);
}
void WeightManager::init() {
assert(!s_instance.get());
s_instance.reset(new WeightManager());
}
WeightManager& WeightManager::instance() {
assert(s_instance.get());
return *s_instance;
}
FVector& WeightManager::get() {
return m_weights;
}
void WeightManager::scale(FValue& scale) {
m_weights *= scale;
}
void WeightManager::dump(const string& filename) {
ofstream out(filename.c_str());
if (!out) {
cerr << "WARN: Failed to open " << filename << " for weight dump" << endl;
} else {
m_weights.write(out);
out.close();
}
}
}

Some files were not shown because too many files have changed in this diff Show More