Merge branch 'vw_tgtcontext' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Ales Tamchyna 2016-06-07 17:14:16 +02:00
commit 44c9e6db17
30 changed files with 1415 additions and 491 deletions

View File

@ -77,6 +77,9 @@
#ifdef HAVE_VW
#include "moses/FF/VW/VW.h"
#include "moses/FF/VW/VWFeatureContextBigrams.h"
#include "moses/FF/VW/VWFeatureContextBilingual.h"
#include "moses/FF/VW/VWFeatureContextWindow.h"
#include "moses/FF/VW/VWFeatureSourceBagOfWords.h"
#include "moses/FF/VW/VWFeatureSourceBigrams.h"
#include "moses/FF/VW/VWFeatureSourceIndicator.h"
@ -300,6 +303,9 @@ FeatureRegistry::FeatureRegistry()
#ifdef HAVE_VW
MOSES_FNAME(VW);
MOSES_FNAME(VWFeatureContextBigrams);
MOSES_FNAME(VWFeatureContextBilingual);
MOSES_FNAME(VWFeatureContextWindow);
MOSES_FNAME(VWFeatureSourceBagOfWords);
MOSES_FNAME(VWFeatureSourceBigrams);
MOSES_FNAME(VWFeatureSourceIndicator);

View File

@ -0,0 +1,40 @@
#pragma once
namespace Moses
{
/**
* Helper class for storing alignment constraints.
*/
class AlignmentConstraint
{
public:
AlignmentConstraint() : m_min(std::numeric_limits<int>::max()), m_max(-1) {}
AlignmentConstraint(int min, int max) : m_min(min), m_max(max) {}
/**
* We are aligned to point => our min cannot be larger, our max cannot be smaller.
*/
void Update(int point) {
if (m_min > point) m_min = point;
if (m_max < point) m_max = point;
}
bool IsSet() const {
return m_max != -1;
}
int GetMin() const {
return m_min;
}
int GetMax() const {
return m_max;
}
private:
int m_min, m_max;
};
}

627
moses/FF/VW/VW.cpp Normal file
View File

@ -0,0 +1,627 @@
#include <string>
#include <map>
#include <limits>
#include <vector>
#include <boost/unordered_map.hpp>
#include <boost/functional/hash.hpp>
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/PP/CountsPhraseProperty.h"
#include "moses/TranslationOptionList.h"
#include "moses/TranslationOption.h"
#include "moses/Util.h"
#include "moses/TypeDef.h"
#include "moses/StaticData.h"
#include "moses/Phrase.h"
#include "moses/AlignmentInfo.h"
#include "moses/AlignmentInfoCollection.h"
#include "moses/Word.h"
#include "moses/FactorCollection.h"
#include "Normalizer.h"
#include "Classifier.h"
#include "VWFeatureBase.h"
#include "TabbedSentence.h"
#include "ThreadLocalByFeatureStorage.h"
#include "TrainingLoss.h"
#include "VWTargetSentence.h"
#include "VWState.h"
#include "VW.h"
namespace Moses
{
VW::VW(const std::string &line)
: StatefulFeatureFunction(1, line)
, TLSTargetSentence(this)
, m_train(false)
, m_sentenceStartWord(Word()) {
ReadParameters();
Discriminative::ClassifierFactory *classifierFactory = m_train
? new Discriminative::ClassifierFactory(m_modelPath)
: new Discriminative::ClassifierFactory(m_modelPath, m_vwOptions);
m_tlsClassifier = new TLSClassifier(this, *classifierFactory);
m_tlsFutureScores = new TLSFloatHashMap(this);
m_tlsComputedStateExtensions = new TLSStateExtensions(this);
m_tlsTranslationOptionFeatures = new TLSFeatureVectorMap(this);
m_tlsTargetContextFeatures = new TLSFeatureVectorMap(this);
if (! m_normalizer) {
VERBOSE(1, "VW :: No loss function specified, assuming logistic loss.\n");
m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
}
if (! m_trainingLoss) {
VERBOSE(1, "VW :: Using basic 1/0 loss calculation in training.\n");
m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
}
// create a virtual beginning-of-sentence word with all factors replaced by <S>
const Factor *bosFactor = FactorCollection::Instance().AddFactor(BOS_);
for (size_t i = 0; i < MAX_NUM_FACTORS; i++)
m_sentenceStartWord.SetFactor(i, bosFactor);
}
VW::~VW() {
delete m_tlsClassifier;
delete m_normalizer;
// TODO delete more stuff
}
FFState* VW::EvaluateWhenApplied(
const Hypothesis& curHypo,
const FFState* prevState,
ScoreComponentCollection* accumulator) const
{
VERBOSE(3, "VW :: Evaluating translation options\n");
const VWState& prevVWState = *static_cast<const VWState *>(prevState);
const std::vector<VWFeatureBase*>& contextFeatures =
VWFeatureBase::GetTargetContextFeatures(GetScoreProducerDescription());
if (contextFeatures.empty()) {
// no target context features => we already evaluated everything in
// EvaluateTranslationOptionListWithSourceContext(). Nothing to do now,
// no state information to track.
return new VWState();
}
size_t spanStart = curHypo.GetTranslationOption().GetStartPos();
size_t spanEnd = curHypo.GetTranslationOption().GetEndPos();
// compute our current key
size_t cacheKey = MakeCacheKey(prevState, spanStart, spanEnd);
boost::unordered_map<size_t, FloatHashMap> &computedStateExtensions
= *m_tlsComputedStateExtensions->GetStored();
if (computedStateExtensions.find(cacheKey) == computedStateExtensions.end()) {
// we have not computed this set of translation options yet
const TranslationOptionList *topts =
curHypo.GetManager().getSntTranslationOptions()->GetTranslationOptionList(spanStart, spanEnd);
const InputType& input = curHypo.GetManager().GetSource();
Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();
// extract target context features
size_t contextHash = prevVWState.hash();
FeatureVectorMap &contextFeaturesCache = *m_tlsTargetContextFeatures->GetStored();
FeatureVectorMap::const_iterator contextIt = contextFeaturesCache.find(contextHash);
if (contextIt == contextFeaturesCache.end()) {
// we have not extracted features for this context yet
const Phrase &targetContext = prevVWState.GetPhrase();
Discriminative::FeatureVector contextVector;
const AlignmentInfo *alignInfo = TransformAlignmentInfo(curHypo, targetContext.GetSize());
for(size_t i = 0; i < contextFeatures.size(); ++i)
(*contextFeatures[i])(input, targetContext, *alignInfo, classifier, contextVector);
contextFeaturesCache[contextHash] = contextVector;
VERBOSE(3, "VW :: context cache miss\n");
} else {
// context already in cache, simply put feature IDs in the classifier object
classifier.AddLabelIndependentFeatureVector(contextIt->second);
VERBOSE(3, "VW :: context cache hit\n");
}
std::vector<float> losses(topts->size());
for (size_t toptIdx = 0; toptIdx < topts->size(); toptIdx++) {
const TranslationOption *topt = topts->Get(toptIdx);
const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
size_t toptHash = hash_value(*topt);
// start with pre-computed source-context-only VW scores
losses[toptIdx] = m_tlsFutureScores->GetStored()->find(toptHash)->second;
// add all features associated with this translation option
// (pre-computed when evaluated with source context)
const Discriminative::FeatureVector &targetFeatureVector =
m_tlsTranslationOptionFeatures->GetStored()->find(toptHash)->second;
classifier.AddLabelDependentFeatureVector(targetFeatureVector);
// add classifier score with context+target features only to the total loss
losses[toptIdx] += classifier.Predict(MakeTargetLabel(targetPhrase));
}
// normalize classifier scores to get a probability distribution
(*m_normalizer)(losses);
// fill our cache with the results
FloatHashMap &toptScores = computedStateExtensions[cacheKey];
for (size_t toptIdx = 0; toptIdx < topts->size(); toptIdx++) {
const TranslationOption *topt = topts->Get(toptIdx);
size_t toptHash = hash_value(*topt);
toptScores[toptHash] = FloorScore(TransformScore(losses[toptIdx]));
}
VERBOSE(3, "VW :: cache miss\n");
} else {
VERBOSE(3, "VW :: cache hit\n");
}
// now our cache is guaranteed to contain the required score, simply look it up
std::vector<float> newScores(m_numScoreComponents);
size_t toptHash = hash_value(curHypo.GetTranslationOption());
newScores[0] = computedStateExtensions[cacheKey][toptHash];
VERBOSE(3, "VW :: adding score: " << newScores[0] << "\n");
accumulator->PlusEquals(this, newScores);
return new VWState(prevVWState, curHypo);
}
const FFState* VW::EmptyHypothesisState(const InputType &input) const {
size_t maxContextSize = VWFeatureBase::GetMaximumContextSize(GetScoreProducerDescription());
Phrase initialPhrase;
for (size_t i = 0; i < maxContextSize; i++)
initialPhrase.AddWord(m_sentenceStartWord);
return new VWState(initialPhrase);
}
void VW::EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();
if (translationOptionList.size() == 0)
return; // nothing to do
VERBOSE(3, "VW :: Evaluating translation options\n");
// which feature functions do we use (on the source and target side)
const std::vector<VWFeatureBase*>& sourceFeatures =
VWFeatureBase::GetSourceFeatures(GetScoreProducerDescription());
const std::vector<VWFeatureBase*>& contextFeatures =
VWFeatureBase::GetTargetContextFeatures(GetScoreProducerDescription());
const std::vector<VWFeatureBase*>& targetFeatures =
VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription());
size_t maxContextSize = VWFeatureBase::GetMaximumContextSize(GetScoreProducerDescription());
// only use stateful score computation when needed
bool haveTargetContextFeatures = ! contextFeatures.empty();
const Range &sourceRange = translationOptionList.Get(0)->GetSourceWordsRange();
if (m_train) {
//
// extract features for training the classifier (only call this when using vwtrainer, not in Moses!)
//
// find which topts are correct
std::vector<bool> correct(translationOptionList.size());
std::vector<int> startsAt(translationOptionList.size());
std::set<int> uncoveredStartingPositions;
for (size_t i = 0; i < translationOptionList.size(); i++) {
std::pair<bool, int> isCorrect = IsCorrectTranslationOption(* translationOptionList.Get(i));
correct[i] = isCorrect.first;
startsAt[i] = isCorrect.second;
if (isCorrect.first) {
uncoveredStartingPositions.insert(isCorrect.second);
}
}
// optionally update translation options using leave-one-out
std::vector<bool> keep = (m_leaveOneOut.size() > 0)
? LeaveOneOut(translationOptionList, correct)
: std::vector<bool>(translationOptionList.size(), true);
while (! uncoveredStartingPositions.empty()) {
int currentStart = *uncoveredStartingPositions.begin();
uncoveredStartingPositions.erase(uncoveredStartingPositions.begin());
// check whether we (still) have some correct translation
int firstCorrect = -1;
for (size_t i = 0; i < translationOptionList.size(); i++) {
if (keep[i] && correct[i] && startsAt[i] == currentStart) {
firstCorrect = i;
break;
}
}
// do not train if there are no positive examples
if (firstCorrect == -1) {
VERBOSE(3, "VW :: skipping topt collection, no correct translation for span at current tgt start position\n");
continue;
}
// the first correct topt can be used by some loss functions
const TargetPhrase &correctPhrase = translationOptionList.Get(firstCorrect)->GetTargetPhrase();
// feature extraction *at prediction time* outputs feature hashes which can be cached;
// this is training time, simply store everything in this dummyVector
Discriminative::FeatureVector dummyVector;
// extract source side features
for(size_t i = 0; i < sourceFeatures.size(); ++i)
(*sourceFeatures[i])(input, sourceRange, classifier, dummyVector);
// build target-side context
Phrase targetContext;
for (size_t i = 0; i < maxContextSize; i++)
targetContext.AddWord(m_sentenceStartWord);
const Phrase *targetSent = GetStored()->m_sentence;
// word alignment info shifted by context size
AlignmentInfo contextAlignment = TransformAlignmentInfo(*GetStored()->m_alignment, maxContextSize, currentStart);
if (currentStart > 0)
targetContext.Append(targetSent->GetSubString(Range(0, currentStart - 1)));
// extract target-context features
for(size_t i = 0; i < contextFeatures.size(); ++i)
(*contextFeatures[i])(input, targetContext, contextAlignment, classifier, dummyVector);
// go over topts, extract target side features and train the classifier
for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
// this topt was discarded by leaving one out
if (! keep[toptIdx])
continue;
// extract target-side features for each topt
const TargetPhrase &targetPhrase = translationOptionList.Get(toptIdx)->GetTargetPhrase();
for(size_t i = 0; i < targetFeatures.size(); ++i)
(*targetFeatures[i])(input, targetPhrase, classifier, dummyVector);
bool isCorrect = correct[toptIdx] && startsAt[toptIdx] == currentStart;
float loss = (*m_trainingLoss)(targetPhrase, correctPhrase, isCorrect);
// train classifier on current example
classifier.Train(MakeTargetLabel(targetPhrase), loss);
}
}
} else {
//
// predict using a trained classifier, use this in decoding (=at test time)
//
std::vector<float> losses(translationOptionList.size());
Discriminative::FeatureVector outFeaturesSourceNamespace;
// extract source side features
for(size_t i = 0; i < sourceFeatures.size(); ++i)
(*sourceFeatures[i])(input, sourceRange, classifier, outFeaturesSourceNamespace);
for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
const TranslationOption *topt = translationOptionList.Get(toptIdx);
const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
Discriminative::FeatureVector outFeaturesTargetNamespace;
// extract target-side features for each topt
for(size_t i = 0; i < targetFeatures.size(); ++i)
(*targetFeatures[i])(input, targetPhrase, classifier, outFeaturesTargetNamespace);
// cache the extracted target features (i.e. features associated with given topt)
// for future use at decoding time
size_t toptHash = hash_value(*topt);
m_tlsTranslationOptionFeatures->GetStored()->insert(
std::make_pair(toptHash, outFeaturesTargetNamespace));
// get classifier score
losses[toptIdx] = classifier.Predict(MakeTargetLabel(targetPhrase));
}
// normalize classifier scores to get a probability distribution
std::vector<float> rawLosses = losses;
(*m_normalizer)(losses);
// update scores of topts
for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
TranslationOption *topt = *(translationOptionList.begin() + toptIdx);
if (! haveTargetContextFeatures) {
// no target context features; evaluate the FF now
std::vector<float> newScores(m_numScoreComponents);
newScores[0] = FloorScore(TransformScore(losses[toptIdx]));
ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
scoreBreakDown.PlusEquals(this, newScores);
topt->UpdateScore();
} else {
// We have target context features => this is just a partial score,
// do not add it to the score component collection.
size_t toptHash = hash_value(*topt);
// Subtract the score contribution of target-only features, otherwise it would
// be included twice.
Discriminative::FeatureVector emptySource;
const Discriminative::FeatureVector &targetFeatureVector =
m_tlsTranslationOptionFeatures->GetStored()->find(toptHash)->second;
classifier.AddLabelIndependentFeatureVector(emptySource);
classifier.AddLabelDependentFeatureVector(targetFeatureVector);
float targetOnlyLoss = classifier.Predict(VW_DUMMY_LABEL);
float futureScore = rawLosses[toptIdx] - targetOnlyLoss;
m_tlsFutureScores->GetStored()->insert(std::make_pair(toptHash, futureScore));
}
}
}
}
void VW::SetParameter(const std::string& key, const std::string& value) {
if (key == "train") {
m_train = Scan<bool>(value);
} else if (key == "path") {
m_modelPath = value;
} else if (key == "vw-options") {
m_vwOptions = value;
} else if (key == "leave-one-out-from") {
m_leaveOneOut = value;
} else if (key == "training-loss") {
// which type of loss to use for training
if (value == "basic") {
m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
} else if (value == "bleu") {
m_trainingLoss = (TrainingLoss *) new TrainingLossBLEU();
} else {
UTIL_THROW2("Unknown training loss type:" << value);
}
} else if (key == "loss") {
// which normalizer to use (theoretically depends on the loss function used for training the
// classifier (squared/logistic/hinge/...), hence the name "loss"
if (value == "logistic") {
m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
} else if (value == "squared") {
m_normalizer = (Discriminative::Normalizer *) new Discriminative::SquaredLossNormalizer();
} else {
UTIL_THROW2("Unknown loss type:" << value);
}
} else {
StatefulFeatureFunction::SetParameter(key, value);
}
}
void VW::InitializeForInput(ttasksptr const& ttask) {
// do not keep future cost estimates across sentences!
m_tlsFutureScores->GetStored()->clear();
// invalidate our caches after each sentence
m_tlsComputedStateExtensions->GetStored()->clear();
// it's not certain that we should clear these caches; we do it
// because they shouldn't be allowed to grow indefinitely large but
// target contexts and translation options will have identical features
// the next time we extract them...
m_tlsTargetContextFeatures->GetStored()->clear();
m_tlsTranslationOptionFeatures->GetStored()->clear();
InputType const& source = *(ttask->GetSource().get());
// tabbed sentence is assumed only in training
if (! m_train)
return;
UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
"This feature function requires the TabbedSentence input type");
const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
UTIL_THROW_IF2(tabbedSentence.GetColumns().size() < 2,
"TabbedSentence must contain target<tab>alignment");
// target sentence represented as a phrase
Phrase *target = new Phrase();
target->CreateFromString(
Output
, StaticData::Instance().options()->output.factor_order
, tabbedSentence.GetColumns()[0]
, NULL);
// word alignment between source and target sentence
// we don't store alignment info in AlignmentInfoCollection because we keep alignments of whole
// sentences, not phrases
AlignmentInfo *alignment = new AlignmentInfo(tabbedSentence.GetColumns()[1]);
VWTargetSentence &targetSent = *GetStored();
targetSent.Clear();
targetSent.m_sentence = target;
targetSent.m_alignment = alignment;
// pre-compute max- and min- aligned points for faster translation option checking
targetSent.SetConstraints(source.GetSize());
}
/*************************************************************************************
* private methods
************************************************************************************/
const AlignmentInfo *VW::TransformAlignmentInfo(const Hypothesis &curHypo, size_t contextSize) const {
std::set<std::pair<size_t, size_t> > alignmentPoints;
const Hypothesis *contextHypo = curHypo.GetPrevHypo();
int idxInContext = contextSize - 1;
int processedWordsInHypo = 0;
while (idxInContext >= 0 && contextHypo) {
int idxInHypo = contextHypo->GetCurrTargetLength() - 1 - processedWordsInHypo;
if (idxInHypo >= 0) {
const AlignmentInfo &hypoAlign = contextHypo->GetCurrTargetPhrase().GetAlignTerm();
std::set<size_t> alignedToTgt = hypoAlign.GetAlignmentsForTarget(idxInHypo);
size_t srcOffset = contextHypo->GetCurrSourceWordsRange().GetStartPos();
BOOST_FOREACH(size_t srcIdx, alignedToTgt) {
alignmentPoints.insert(std::make_pair(srcOffset + srcIdx, idxInContext));
}
processedWordsInHypo++;
idxInContext--;
} else {
processedWordsInHypo = 0;
contextHypo = contextHypo->GetPrevHypo();
}
}
return AlignmentInfoCollection::Instance().Add(alignmentPoints);
}
AlignmentInfo VW::TransformAlignmentInfo(const AlignmentInfo &alignInfo, size_t contextSize, int currentStart) const {
std::set<std::pair<size_t, size_t> > alignmentPoints;
for (int i = std::max(0, currentStart - (int)contextSize); i < currentStart; i++) {
std::set<size_t> alignedToTgt = alignInfo.GetAlignmentsForTarget(i);
BOOST_FOREACH(size_t srcIdx, alignedToTgt) {
alignmentPoints.insert(std::make_pair(srcIdx, i + contextSize));
}
}
return AlignmentInfo(alignmentPoints);
}
std::pair<bool, int> VW::IsCorrectTranslationOption(const TranslationOption &topt) const {
//std::cerr << topt.GetSourceWordsRange() << std::endl;
int sourceStart = topt.GetSourceWordsRange().GetStartPos();
int sourceEnd = topt.GetSourceWordsRange().GetEndPos();
const VWTargetSentence &targetSentence = *GetStored();
// [targetStart, targetEnd] spans aligned target words
int targetStart = targetSentence.m_sentence->GetSize();
int targetEnd = -1;
// get the left-most and right-most alignment point within source span
for(int i = sourceStart; i <= sourceEnd; ++i) {
if(targetSentence.m_sourceConstraints[i].IsSet()) {
if(targetStart > targetSentence.m_sourceConstraints[i].GetMin())
targetStart = targetSentence.m_sourceConstraints[i].GetMin();
if(targetEnd < targetSentence.m_sourceConstraints[i].GetMax())
targetEnd = targetSentence.m_sourceConstraints[i].GetMax();
}
}
// there was no alignment
if(targetEnd == -1)
return std::make_pair(false, -1);
//std::cerr << "Shorter: " << targetStart << " " << targetEnd << std::endl;
// [targetStart2, targetEnd2] spans unaligned words left and right of [targetStart, targetEnd]
int targetStart2 = targetStart;
for(int i = targetStart2; i >= 0 && !targetSentence.m_targetConstraints[i].IsSet(); --i)
targetStart2 = i;
int targetEnd2 = targetEnd;
for(int i = targetEnd2;
i < targetSentence.m_sentence->GetSize() && !targetSentence.m_targetConstraints[i].IsSet();
++i)
targetEnd2 = i;
//std::cerr << "Longer: " << targetStart2 << " " << targetEnd2 << std::endl;
const TargetPhrase &tphrase = topt.GetTargetPhrase();
//std::cerr << tphrase << std::endl;
// if target phrase is shorter than inner span return false
if(tphrase.GetSize() < targetEnd - targetStart + 1)
return std::make_pair(false, -1);
// if target phrase is longer than outer span return false
if(tphrase.GetSize() > targetEnd2 - targetStart2 + 1)
return std::make_pair(false, -1);
// for each possible starting point
for(int tempStart = targetStart2; tempStart <= targetStart; tempStart++) {
bool found = true;
// check if the target phrase is within longer span
for(int i = tempStart; i <= targetEnd2 && i < tphrase.GetSize() + tempStart; ++i) {
if(tphrase.GetWord(i - tempStart) != targetSentence.m_sentence->GetWord(i)) {
found = false;
break;
}
}
// return true if there was a match
if(found) {
//std::cerr << "Found" << std::endl;
return std::make_pair(true, tempStart);
}
}
return std::make_pair(false, -1);
}
std::vector<bool> VW::LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const {
UTIL_THROW_IF2(m_leaveOneOut.size() == 0 || ! m_train, "LeaveOneOut called in wrong setting!");
float sourceRawCount = 0.0;
const float ONE = 1.0001; // I don't understand floating point numbers
std::vector<bool> keepOpt;
for (size_t i = 0; i < topts.size(); i++) {
TranslationOption *topt = *(topts.begin() + i);
const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
// extract raw counts from phrase-table property
const CountsPhraseProperty *property =
static_cast<const CountsPhraseProperty *>(targetPhrase.GetProperty("Counts"));
if (! property) {
VERBOSE(2, "VW :: Counts not found for topt! Is this an OOV?\n");
// keep all translation opts without updating, this is either OOV or bad usage...
keepOpt.assign(topts.size(), true);
return keepOpt;
}
if (sourceRawCount == 0.0) {
sourceRawCount = property->GetSourceMarginal() - ONE; // discount one occurrence of the source phrase
if (sourceRawCount <= 0) {
// no translation options survived, source phrase was a singleton
keepOpt.assign(topts.size(), false);
return keepOpt;
}
}
float discount = correct[i] ? ONE : 0.0;
float target = property->GetTargetMarginal() - discount;
float joint = property->GetJointCount() - discount;
if (discount != 0.0) VERBOSE(3, "VW :: leaving one out!\n");
if (joint > 0) {
// topt survived leaving one out, update its scores
const FeatureFunction *feature = &FindFeatureFunction(m_leaveOneOut);
std::vector<float> scores = targetPhrase.GetScoreBreakdown().GetScoresForProducer(feature);
UTIL_THROW_IF2(scores.size() != 4, "Unexpected number of scores in feature " << m_leaveOneOut);
scores[0] = TransformScore(joint / target); // P(f|e)
scores[2] = TransformScore(joint / sourceRawCount); // P(e|f)
ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
scoreBreakDown.Assign(feature, scores);
topt->UpdateScore();
keepOpt.push_back(true);
} else {
// they only occurred together once, discard topt
VERBOSE(2, "VW :: discarded topt when leaving one out\n");
keepOpt.push_back(false);
}
}
return keepOpt;
}
} // namespace Moses

View File

@ -3,8 +3,12 @@
#include <string>
#include <map>
#include <limits>
#include <vector>
#include "moses/FF/StatelessFeatureFunction.h"
#include <boost/unordered_map.hpp>
#include <boost/functional/hash.hpp>
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/PP/CountsPhraseProperty.h"
#include "moses/TranslationOptionList.h"
#include "moses/TranslationOption.h"
@ -13,6 +17,8 @@
#include "moses/StaticData.h"
#include "moses/Phrase.h"
#include "moses/AlignmentInfo.h"
#include "moses/Word.h"
#include "moses/FactorCollection.h"
#include "Normalizer.h"
#include "Classifier.h"
@ -20,119 +26,50 @@
#include "TabbedSentence.h"
#include "ThreadLocalByFeatureStorage.h"
#include "TrainingLoss.h"
#include "VWTargetSentence.h"
/*
* VW classifier feature. See vw/README.md for further information.
*
* TODO: say which paper to cite.
*/
namespace Moses
{
const std::string VW_DUMMY_LABEL = "1111"; // VW does not use the actual label, other classifiers might
/**
* Helper class for storing alignment constraints.
*/
class Constraint
{
public:
Constraint() : m_min(std::numeric_limits<int>::max()), m_max(-1) {}
Constraint(int min, int max) : m_min(min), m_max(max) {}
/**
* We are aligned to point => our min cannot be larger, our max cannot be smaller.
*/
void Update(int point) {
if (m_min > point) m_min = point;
if (m_max < point) m_max = point;
}
bool IsSet() const {
return m_max != -1;
}
int GetMin() const {
return m_min;
}
int GetMax() const {
return m_max;
}
private:
int m_min, m_max;
};
/**
* VW thread-specific data about target sentence.
*/
struct VWTargetSentence {
VWTargetSentence() : m_sentence(NULL), m_alignment(NULL) {}
void Clear() {
if (m_sentence) delete m_sentence;
if (m_alignment) delete m_alignment;
}
~VWTargetSentence() {
Clear();
}
void SetConstraints(size_t sourceSize) {
// initialize to unconstrained
m_sourceConstraints.assign(sourceSize, Constraint());
m_targetConstraints.assign(m_sentence->GetSize(), Constraint());
// set constraints according to alignment points
AlignmentInfo::const_iterator it;
for (it = m_alignment->begin(); it != m_alignment->end(); it++) {
int src = it->first;
int tgt = it->second;
if (src >= m_sourceConstraints.size() || tgt >= m_targetConstraints.size()) {
UTIL_THROW2("VW :: alignment point out of bounds: " << src << "-" << tgt);
}
m_sourceConstraints[src].Update(tgt);
m_targetConstraints[tgt].Update(src);
}
}
Phrase *m_sentence;
AlignmentInfo *m_alignment;
std::vector<Constraint> m_sourceConstraints, m_targetConstraints;
};
// dummy class label; VW does not use the actual label, other classifiers might
const std::string VW_DUMMY_LABEL = "1111";
// thread-specific classifier instance
typedef ThreadLocalByFeatureStorage<Discriminative::Classifier, Discriminative::ClassifierFactory &> TLSClassifier;
// current target sentence, used in VW training (vwtrainer), not in decoding (prediction time)
typedef ThreadLocalByFeatureStorage<VWTargetSentence> TLSTargetSentence;
class VW : public StatelessFeatureFunction, public TLSTargetSentence
// hash table of feature vectors
typedef boost::unordered_map<size_t, Discriminative::FeatureVector> FeatureVectorMap;
// thread-specific feature vector hash
typedef ThreadLocalByFeatureStorage<FeatureVectorMap> TLSFeatureVectorMap;
// hash table of partial scores
typedef boost::unordered_map<size_t, float> FloatHashMap;
// thread-specific score hash table, used for caching
typedef ThreadLocalByFeatureStorage<FloatHashMap> TLSFloatHashMap;
// thread-specific hash tablei for caching full classifier outputs
typedef ThreadLocalByFeatureStorage<boost::unordered_map<size_t, FloatHashMap> > TLSStateExtensions;
/*
* VW feature function. A discriminative classifier with source and target context features.
*/
class VW : public StatefulFeatureFunction, public TLSTargetSentence
{
public:
VW(const std::string &line)
: StatelessFeatureFunction(1, line)
, TLSTargetSentence(this)
, m_train(false) {
ReadParameters();
Discriminative::ClassifierFactory *classifierFactory = m_train
? new Discriminative::ClassifierFactory(m_modelPath)
: new Discriminative::ClassifierFactory(m_modelPath, m_vwOptions);
VW(const std::string &line);
m_tlsClassifier = new TLSClassifier(this, *classifierFactory);
if (! m_normalizer) {
VERBOSE(1, "VW :: No loss function specified, assuming logistic loss.\n");
m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
}
if (! m_trainingLoss) {
VERBOSE(1, "VW :: Using basic 1/0 loss calculation in training.\n");
m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
}
}
virtual ~VW() {
delete m_tlsClassifier;
delete m_normalizer;
}
virtual ~VW();
bool IsUseable(const FactorMask &mask) const {
return true;
@ -152,335 +89,89 @@ public:
, ScoreComponentCollection *estimatedFutureScore = NULL) const {
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
Discriminative::Classifier &classifier = *m_tlsClassifier->GetStored();
// This behavior of this method depends on whether it's called during VW
// training (feature extraction) by vwtrainer or during decoding (prediction
// time) by Moses.
//
// When predicting, it evaluates all translation options with the VW model;
// if no target-context features are defined, this is the final score and it
// is added directly to the TranslationOption score. If there are target
// context features, the score is a partial score and it is only stored in
// cache; the final score is computed based on target context in
// EvaluateWhenApplied().
//
// This method is also used in training by vwtrainer in which case features
// are written to a file, no classifier predictions take place. Target-side
// context is constant at training time (we know the true target sentence),
// so target-context features are extracted here as well.
virtual void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const;
if (translationOptionList.size() == 0)
return; // nothing to do
// Evaluate VW during decoding. This is only used at prediction time (not in training).
// When no target-context features are defined, VW predictions were already fully calculated
// in EvaluateTranslationOptionListWithSourceContext() and the scores were added to the model.
// If there are target-context features, we compute the context-dependent part of the
// classifier score and combine it with the source-context only partial score which was computed
// in EvaluateTranslationOptionListWithSourceContext(). Various caches are used to make this
// method more efficient.
virtual FFState* EvaluateWhenApplied(
const Hypothesis& curHypo,
const FFState* prevState,
ScoreComponentCollection* accumulator) const;
VERBOSE(2, "VW :: Evaluating translation options\n");
// which feature functions do we use (on the source and target side)
const std::vector<VWFeatureBase*>& sourceFeatures =
VWFeatureBase::GetSourceFeatures(GetScoreProducerDescription());
const std::vector<VWFeatureBase*>& targetFeatures =
VWFeatureBase::GetTargetFeatures(GetScoreProducerDescription());
const Range &sourceRange = translationOptionList.Get(0)->GetSourceWordsRange();
const InputPath &inputPath = translationOptionList.Get(0)->GetInputPath();
if (m_train) {
//
// extract features for training the classifier (only call this when using vwtrainer, not in Moses!)
//
// find which topts are correct
std::vector<bool> correct(translationOptionList.size());
for (size_t i = 0; i < translationOptionList.size(); i++)
correct[i] = IsCorrectTranslationOption(* translationOptionList.Get(i));
// optionally update translation options using leave-one-out
std::vector<bool> keep = (m_leaveOneOut.size() > 0)
? LeaveOneOut(translationOptionList, correct)
: std::vector<bool>(translationOptionList.size(), true);
// check whether we (still) have some correct translation
int firstCorrect = -1;
for (size_t i = 0; i < translationOptionList.size(); i++) {
if (keep[i] && correct[i]) {
firstCorrect = i;
break;
}
}
// do not train if there are no positive examples
if (firstCorrect == -1) {
VERBOSE(2, "VW :: skipping topt collection, no correct translation for span\n");
return;
}
// the first correct topt can be used by some loss functions
const TargetPhrase &correctPhrase = translationOptionList.Get(firstCorrect)->GetTargetPhrase();
// extract source side features
for(size_t i = 0; i < sourceFeatures.size(); ++i)
(*sourceFeatures[i])(input, inputPath, sourceRange, classifier);
// go over topts, extract target side features and train the classifier
for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
// this topt was discarded by leaving one out
if (! keep[toptIdx])
continue;
// extract target-side features for each topt
const TargetPhrase &targetPhrase = translationOptionList.Get(toptIdx)->GetTargetPhrase();
for(size_t i = 0; i < targetFeatures.size(); ++i)
(*targetFeatures[i])(input, inputPath, targetPhrase, classifier);
float loss = (*m_trainingLoss)(targetPhrase, correctPhrase, correct[toptIdx]);
// train classifier on current example
classifier.Train(MakeTargetLabel(targetPhrase), loss);
}
} else {
//
// predict using a trained classifier, use this in decoding (=at test time)
//
std::vector<float> losses(translationOptionList.size());
// extract source side features
for(size_t i = 0; i < sourceFeatures.size(); ++i)
(*sourceFeatures[i])(input, inputPath, sourceRange, classifier);
for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
const TranslationOption *topt = translationOptionList.Get(toptIdx);
const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
// extract target-side features for each topt
for(size_t i = 0; i < targetFeatures.size(); ++i)
(*targetFeatures[i])(input, inputPath, targetPhrase, classifier);
// get classifier score
losses[toptIdx] = classifier.Predict(MakeTargetLabel(targetPhrase));
}
// normalize classifier scores to get a probability distribution
(*m_normalizer)(losses);
// update scores of topts
for (size_t toptIdx = 0; toptIdx < translationOptionList.size(); toptIdx++) {
TranslationOption *topt = *(translationOptionList.begin() + toptIdx);
std::vector<float> newScores(m_numScoreComponents);
newScores[0] = FloorScore(TransformScore(losses[toptIdx]));
ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
scoreBreakDown.PlusEquals(this, newScores);
topt->UpdateScore();
}
}
virtual FFState* EvaluateWhenApplied(
const ChartHypothesis&,
int,
ScoreComponentCollection* accumulator) const {
throw new std::logic_error("hiearchical/syntax not supported");
}
void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const {
}
// Initial VW state; contains unaligned BOS symbols.
const FFState* EmptyHypothesisState(const InputType &input) const;
void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const {
}
void SetParameter(const std::string& key, const std::string& value) {
if (key == "train") {
m_train = Scan<bool>(value);
} else if (key == "path") {
m_modelPath = value;
} else if (key == "vw-options") {
m_vwOptions = value;
} else if (key == "leave-one-out-from") {
m_leaveOneOut = value;
} else if (key == "training-loss") {
// which type of loss to use for training
if (value == "basic") {
m_trainingLoss = (TrainingLoss *) new TrainingLossBasic();
} else if (value == "bleu") {
m_trainingLoss = (TrainingLoss *) new TrainingLossBLEU();
} else {
UTIL_THROW2("Unknown training loss type:" << value);
}
} else if (key == "loss") {
// which normalizer to use (theoretically depends on the loss function used for training the
// classifier (squared/logistic/hinge/...), hence the name "loss"
if (value == "logistic") {
m_normalizer = (Discriminative::Normalizer *) new Discriminative::LogisticLossNormalizer();
} else if (value == "squared") {
m_normalizer = (Discriminative::Normalizer *) new Discriminative::SquaredLossNormalizer();
} else {
UTIL_THROW2("Unknown loss type:" << value);
}
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
}
virtual void InitializeForInput(ttasksptr const& ttask) {
InputType const& source = *(ttask->GetSource().get());
// tabbed sentence is assumed only in training
if (! m_train)
return;
UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
"This feature function requires the TabbedSentence input type");
const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
UTIL_THROW_IF2(tabbedSentence.GetColumns().size() < 2,
"TabbedSentence must contain target<tab>alignment");
// target sentence represented as a phrase
Phrase *target = new Phrase();
target->CreateFromString(
Output
, StaticData::Instance().options()->output.factor_order
, tabbedSentence.GetColumns()[0]
, NULL);
// word alignment between source and target sentence
// we don't store alignment info in AlignmentInfoCollection because we keep alignments of whole
// sentences, not phrases
AlignmentInfo *alignment = new AlignmentInfo(tabbedSentence.GetColumns()[1]);
VWTargetSentence &targetSent = *GetStored();
targetSent.Clear();
targetSent.m_sentence = target;
targetSent.m_alignment = alignment;
// pre-compute max- and min- aligned points for faster translation option checking
targetSent.SetConstraints(source.GetSize());
}
void SetParameter(const std::string& key, const std::string& value);
// At prediction time, this clears our caches. At training time, we load the next sentence, its
// translation and word alignment.
virtual void InitializeForInput(ttasksptr const& ttask);
private:
std::string MakeTargetLabel(const TargetPhrase &targetPhrase) const {
return VW_DUMMY_LABEL;
inline std::string MakeTargetLabel(const TargetPhrase &targetPhrase) const {
return VW_DUMMY_LABEL; // VW does not care about class labels in our setting (--csoaa_ldf mc).
}
bool IsCorrectTranslationOption(const TranslationOption &topt) const {
//std::cerr << topt.GetSourceWordsRange() << std::endl;
int sourceStart = topt.GetSourceWordsRange().GetStartPos();
int sourceEnd = topt.GetSourceWordsRange().GetEndPos();
const VWTargetSentence &targetSentence = *GetStored();
// [targetStart, targetEnd] spans aligned target words
int targetStart = targetSentence.m_sentence->GetSize();
int targetEnd = -1;
// get the left-most and right-most alignment point within source span
for(int i = sourceStart; i <= sourceEnd; ++i) {
if(targetSentence.m_sourceConstraints[i].IsSet()) {
if(targetStart > targetSentence.m_sourceConstraints[i].GetMin())
targetStart = targetSentence.m_sourceConstraints[i].GetMin();
if(targetEnd < targetSentence.m_sourceConstraints[i].GetMax())
targetEnd = targetSentence.m_sourceConstraints[i].GetMax();
}
}
// there was no alignment
if(targetEnd == -1)
return false;
//std::cerr << "Shorter: " << targetStart << " " << targetEnd << std::endl;
// [targetStart2, targetEnd2] spans unaligned words left and right of [targetStart, targetEnd]
int targetStart2 = targetStart;
for(int i = targetStart2; i >= 0 && !targetSentence.m_targetConstraints[i].IsSet(); --i)
targetStart2 = i;
int targetEnd2 = targetEnd;
for(int i = targetEnd2;
i < targetSentence.m_sentence->GetSize() && !targetSentence.m_targetConstraints[i].IsSet();
++i)
targetEnd2 = i;
//std::cerr << "Longer: " << targetStart2 << " " << targetEnd2 << std::endl;
const TargetPhrase &tphrase = topt.GetTargetPhrase();
//std::cerr << tphrase << std::endl;
// if target phrase is shorter than inner span return false
if(tphrase.GetSize() < targetEnd - targetStart + 1)
return false;
// if target phrase is longer than outer span return false
if(tphrase.GetSize() > targetEnd2 - targetStart2 + 1)
return false;
// for each possible starting point
for(int tempStart = targetStart2; tempStart <= targetStart; tempStart++) {
bool found = true;
// check if the target phrase is within longer span
for(int i = tempStart; i <= targetEnd2 && i < tphrase.GetSize() + tempStart; ++i) {
if(tphrase.GetWord(i - tempStart) != targetSentence.m_sentence->GetWord(i)) {
found = false;
break;
}
}
// return true if there was a match
if(found) {
//std::cerr << "Found" << std::endl;
return true;
}
}
return false;
inline size_t MakeCacheKey(const FFState *prevState, size_t spanStart, size_t spanEnd) const {
size_t key = 0;
boost::hash_combine(key, prevState);
boost::hash_combine(key, spanStart);
boost::hash_combine(key, spanEnd);
return key;
}
std::vector<bool> LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const {
UTIL_THROW_IF2(m_leaveOneOut.size() == 0 || ! m_train, "LeaveOneOut called in wrong setting!");
// used in decoding to transform the global word alignment information into
// context-phrase internal alignment information (i.e., with target indices correspoding
// to positions in contextPhrase)
const AlignmentInfo *TransformAlignmentInfo(const Hypothesis &curHypo, size_t contextSize) const;
float sourceRawCount = 0.0;
const float ONE = 1.0001; // I don't understand floating point numbers
// used during training to extract relevant alignment points from the full sentence alignment
// and shift them by target context size
AlignmentInfo TransformAlignmentInfo(const AlignmentInfo &alignInfo, size_t contextSize, int currentStart) const;
std::vector<bool> keepOpt;
// At training time, determine whether a translation option is correct for the current target sentence
// based on word alignment. This is a bit complicated because we need to handle various corner-cases
// where some word(s) on phrase borders are unaligned.
std::pair<bool, int> IsCorrectTranslationOption(const TranslationOption &topt) const;
for (size_t i = 0; i < topts.size(); i++) {
TranslationOption *topt = *(topts.begin() + i);
const TargetPhrase &targetPhrase = topt->GetTargetPhrase();
// extract raw counts from phrase-table property
const CountsPhraseProperty *property =
static_cast<const CountsPhraseProperty *>(targetPhrase.GetProperty("Counts"));
if (! property) {
VERBOSE(1, "VW :: Counts not found for topt! Is this an OOV?\n");
// keep all translation opts without updating, this is either OOV or bad usage...
keepOpt.assign(topts.size(), true);
return keepOpt;
}
if (sourceRawCount == 0.0) {
sourceRawCount = property->GetSourceMarginal() - ONE; // discount one occurrence of the source phrase
if (sourceRawCount <= 0) {
// no translation options survived, source phrase was a singleton
keepOpt.assign(topts.size(), false);
return keepOpt;
}
}
float discount = correct[i] ? ONE : 0.0;
float target = property->GetTargetMarginal() - discount;
float joint = property->GetJointCount() - discount;
if (discount != 0.0) VERBOSE(2, "VW :: leaving one out!\n");
if (joint > 0) {
// topt survived leaving one out, update its scores
const FeatureFunction *feature = &FindFeatureFunction(m_leaveOneOut);
std::vector<float> scores = targetPhrase.GetScoreBreakdown().GetScoresForProducer(feature);
UTIL_THROW_IF2(scores.size() != 4, "Unexpected number of scores in feature " << m_leaveOneOut);
scores[0] = TransformScore(joint / target); // P(f|e)
scores[2] = TransformScore(joint / sourceRawCount); // P(e|f)
ScoreComponentCollection &scoreBreakDown = topt->GetScoreBreakdown();
scoreBreakDown.Assign(feature, scores);
topt->UpdateScore();
keepOpt.push_back(true);
} else {
// they only occurred together once, discard topt
VERBOSE(2, "VW :: discarded topt when leaving one out\n");
keepOpt.push_back(false);
}
}
return keepOpt;
}
// At training time, optionally discount occurrences of phrase pairs from the current sentence, helps prevent
// over-fitting.
std::vector<bool> LeaveOneOut(const TranslationOptionList &topts, const std::vector<bool> &correct) const;
bool m_train; // false means predict
std::string m_modelPath;
std::string m_vwOptions;
std::string m_modelPath; // path to the VW model file; at training time, this is where extracted features are stored
std::string m_vwOptions; // options for Vowpal Wabbit
// BOS token, all factors
Word m_sentenceStartWord;
// calculator of training loss
TrainingLoss *m_trainingLoss = NULL;
@ -488,9 +179,16 @@ private:
// optionally contains feature name of a phrase table where we recompute scores with leaving one out
std::string m_leaveOneOut;
// normalizer, typically this means softmax
Discriminative::Normalizer *m_normalizer = NULL;
// thread-specific classifier instance
TLSClassifier *m_tlsClassifier;
// caches for partial scores and feature vectors
TLSFloatHashMap *m_tlsFutureScores;
TLSStateExtensions *m_tlsComputedStateExtensions;
TLSFeatureVectorMap *m_tlsTranslationOptionFeatures, *m_tlsTargetContextFeatures;
};
}

View File

@ -2,11 +2,25 @@
#include <string>
#include "VWFeatureBase.h"
#include "VWFeatureContext.h"
namespace Moses
{
std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_features;
std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_sourceFeatures;
std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_targetContextFeatures;
std::map<std::string, std::vector<VWFeatureBase*> > VWFeatureBase::s_targetFeatures;
std::map<std::string, size_t> VWFeatureBase::s_targetContextLength;
void VWFeatureBase::UpdateContextSize(const std::string &usedBy) {
// using the standard map behavior here: if the entry does not
// exist, it will be added and initialized to zero
size_t currentSize = s_targetContextLength[usedBy];
size_t newSize = static_cast<VWFeatureContext *const>(this)->GetContextSize();
s_targetContextLength[usedBy] = std::max(currentSize, newSize);
}
}

View File

@ -12,11 +12,17 @@
namespace Moses
{
enum VWFeatureType {
vwft_source,
vwft_target,
vwft_targetContext
};
class VWFeatureBase : public StatelessFeatureFunction
{
public:
VWFeatureBase(const std::string &line, bool isSource = true)
: StatelessFeatureFunction(0, line), m_usedBy(1, "VW0"), m_isSource(isSource) {
VWFeatureBase(const std::string &line, VWFeatureType featureType = vwft_source)
: StatelessFeatureFunction(0, line), m_usedBy(1, "VW0"), m_featureType(featureType) {
// defaults
m_sourceFactors.push_back(0);
m_targetFactors.push_back(0);
@ -71,26 +77,47 @@ public:
return s_sourceFeatures[name];
}
// Return only target-context classifier features
static const std::vector<VWFeatureBase*>& GetTargetContextFeatures(std::string name = "VW0") {
// don't throw an exception when there are no target-context features, this feature type is not mandatory
return s_targetContextFeatures[name];
}
// Return only target-dependent classifier features
static const std::vector<VWFeatureBase*>& GetTargetFeatures(std::string name = "VW0") {
UTIL_THROW_IF2(s_targetFeatures.count(name) == 0, "No target features registered for parent classifier: " + name);
return s_targetFeatures[name];
}
// Required length context (maximum context size of defined target-context features)
static size_t GetMaximumContextSize(std::string name = "VW0") {
return s_targetContextLength[name]; // 0 by default
}
// Overload to process source-dependent data, create features once for every
// source sentence word range.
virtual void operator()(const InputType &input
, const InputPath &inputPath
, const Range &sourceRange
, Discriminative::Classifier &classifier) const = 0;
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const = 0;
// Overload to process target-dependent features, create features once for
// every target phrase. One source word range will have at leat one target
// every target phrase. One source word range will have at least one target
// phrase, but may have more.
virtual void operator()(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, Discriminative::Classifier &classifier) const = 0;
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const = 0;
// Overload to process target-context dependent features, these features are
// evaluated during decoding. For efficiency, features are not fed directly into
// the classifier object but instead output in the vector "features" and managed
// separately in VW.h.
virtual void operator()(const InputType &input
, const Phrase &contextPhrase
, const AlignmentInfo &alignmentInfo
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const = 0;
protected:
std::vector<FactorType> m_sourceFactors, m_targetFactors;
@ -99,10 +126,15 @@ protected:
for(std::vector<std::string>::const_iterator it = m_usedBy.begin();
it != m_usedBy.end(); it++) {
s_features[*it].push_back(this);
if(m_isSource)
if(m_featureType == vwft_source) {
s_sourceFeatures[*it].push_back(this);
else
} else if (m_featureType == vwft_targetContext) {
s_targetContextFeatures[*it].push_back(this);
UpdateContextSize(*it);
} else {
s_targetFeatures[*it].push_back(this);
}
}
}
@ -112,11 +144,16 @@ private:
Tokenize(m_usedBy, usedBy, ",");
}
void UpdateContextSize(const std::string &usedBy);
std::vector<std::string> m_usedBy;
bool m_isSource;
VWFeatureType m_featureType;
static std::map<std::string, std::vector<VWFeatureBase*> > s_features;
static std::map<std::string, std::vector<VWFeatureBase*> > s_sourceFeatures;
static std::map<std::string, std::vector<VWFeatureBase*> > s_targetContextFeatures;
static std::map<std::string, std::vector<VWFeatureBase*> > s_targetFeatures;
static std::map<std::string, size_t> s_targetContextLength;
};
}

View File

@ -0,0 +1,116 @@
#pragma once
#include <string>
#include <boost/foreach.hpp>
#include "VWFeatureBase.h"
#include "moses/InputType.h"
#include "moses/TypeDef.h"
#include "moses/Word.h"
namespace Moses
{
// Inherit from this for source-dependent classifier features. They will
// automatically register with the classifier class named VW0 or one or more
// names specified by the used-by=name1,name2,... parameter.
//
// The classifier gets a full list by calling
// VWFeatureBase::GetTargetContextFeatures(GetScoreProducerDescription())
class VWFeatureContext : public VWFeatureBase
{
public:
VWFeatureContext(const std::string &line, size_t contextSize)
: VWFeatureBase(line, vwft_targetContext), m_contextSize(contextSize) {
}
// Gets its pure virtual functions from VWFeatureBase
virtual void operator()(const InputType &input
, const TargetPhrase &targetPhrase
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
}
virtual void operator()(const InputType &input
, const Range &sourceRange
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
}
virtual void SetParameter(const std::string& key, const std::string& value) {
if (key == "size") {
m_contextSize = Scan<size_t>(value);
} else if (key == "factor-positions") {
// factor positions: assuming a factor such as positional morphological tag, use this
// option to select only certain positions; this assumes that only a single
// target-side factor is defined
Tokenize<size_t>(m_factorPositions, value, ",");
} else {
VWFeatureBase::SetParameter(key, value);
}
}
size_t GetContextSize() {
return m_contextSize;
}
protected:
// Get word with the correct subset of factors as string. Because we're target
// context features, we look at a limited number of words to the left of the
// current translation. posFromEnd is interpreted like this:
// 0 = last word of the hypothesis
// 1 = next to last word
// ...etc.
inline std::string GetWord(const Phrase &phrase, size_t posFromEnd) const {
const Word &word = phrase.GetWord(phrase.GetSize() - posFromEnd - 1);
if (m_factorPositions.empty()) {
return word.GetString(m_targetFactors, false);
} else {
if (m_targetFactors.size() != 1)
UTIL_THROW2("You can only use factor-positions when a single target-side factor is defined.");
const std::string &fullFactor = word.GetFactor(m_targetFactors[0])->GetString().as_string();
// corner cases: at sentence beginning/end, we don't have the correct factors set up
// similarly for UNK
if (fullFactor == BOS_ || fullFactor == EOS_ || fullFactor == UNKNOWN_FACTOR)
return fullFactor;
std::string subFactor(m_factorPositions.size(), 'x'); // initialize string with correct size and placeholder chars
for (size_t i = 0; i < m_factorPositions.size(); i++)
subFactor[i] = fullFactor[m_factorPositions[i]];
return subFactor;
}
}
// some target-context feature functions also look at the source
inline std::string GetSourceWord(const InputType &input, size_t pos) const {
return input.GetWord(pos).GetString(m_sourceFactors, false);
}
// get source words aligned to a particular context word
std::vector<std::string> GetAlignedSourceWords(const Phrase &contextPhrase
, const InputType &input
, const AlignmentInfo &alignInfo
, size_t posFromEnd) const {
size_t idx = contextPhrase.GetSize() - posFromEnd - 1;
std::set<size_t> alignedToTarget = alignInfo.GetAlignmentsForTarget(idx);
std::vector<std::string> out;
out.reserve(alignedToTarget.size());
BOOST_FOREACH(size_t srcIdx, alignedToTarget) {
out.push_back(GetSourceWord(input, srcIdx));
}
return out;
}
// required context size
size_t m_contextSize;
// factor positions: assuming a factor such as positional morphological tag, use this
// option to select only certain positions
std::vector<size_t> m_factorPositions;
};
}

View File

@ -0,0 +1,40 @@
#pragma once
#include <string>
#include <algorithm>
#include "VWFeatureContext.h"
#include "moses/Util.h"
namespace Moses
{
class VWFeatureContextBigrams : public VWFeatureContext
{
public:
VWFeatureContextBigrams(const std::string &line)
: VWFeatureContext(line, DEFAULT_WINDOW_SIZE) {
ReadParameters();
// Call this last
VWFeatureBase::UpdateRegister();
}
virtual void operator()(const InputType &input
, const Phrase &contextPhrase
, const AlignmentInfo &alignmentInfo
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
for (size_t i = 1; i < m_contextSize; i++)
outFeatures.push_back(classifier.AddLabelIndependentFeature("tcbigram^-" + SPrint(i + 1)
+ "^" + GetWord(contextPhrase, i - 1) + "^" + GetWord(contextPhrase, i)));
}
virtual void SetParameter(const std::string& key, const std::string& value) {
VWFeatureContext::SetParameter(key, value);
}
private:
static const int DEFAULT_WINDOW_SIZE = 1;
};
}

View File

@ -0,0 +1,45 @@
#pragma once
#include <string>
#include <boost/foreach.hpp>
#include <algorithm>
#include "VWFeatureContext.h"
#include "moses/Util.h"
namespace Moses
{
class VWFeatureContextBilingual : public VWFeatureContext
{
public:
VWFeatureContextBilingual(const std::string &line)
: VWFeatureContext(line, DEFAULT_WINDOW_SIZE) {
ReadParameters();
// Call this last
VWFeatureBase::UpdateRegister();
}
virtual void operator()(const InputType &input
, const Phrase &contextPhrase
, const AlignmentInfo &alignmentInfo
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
for (size_t i = 0; i < m_contextSize; i++) {
std::string tgtWord = GetWord(contextPhrase, i);
std::vector<std::string> alignedTo = GetAlignedSourceWords(contextPhrase, input, alignmentInfo, i);
BOOST_FOREACH(const std::string &srcWord, alignedTo) {
outFeatures.push_back(classifier.AddLabelIndependentFeature("tcblng^-" + SPrint(i + 1) + "^" + tgtWord + "^" + srcWord));
}
}
}
virtual void SetParameter(const std::string& key, const std::string& value) {
VWFeatureContext::SetParameter(key, value);
}
private:
static const int DEFAULT_WINDOW_SIZE = 1;
};
}

View File

@ -0,0 +1,39 @@
#pragma once
#include <string>
#include <algorithm>
#include "VWFeatureContext.h"
#include "moses/Util.h"
namespace Moses
{
class VWFeatureContextWindow : public VWFeatureContext
{
public:
VWFeatureContextWindow(const std::string &line)
: VWFeatureContext(line, DEFAULT_WINDOW_SIZE) {
ReadParameters();
// Call this last
VWFeatureBase::UpdateRegister();
}
virtual void operator()(const InputType &input
, const Phrase &contextPhrase
, const AlignmentInfo &alignmentInfo
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
for (size_t i = 0; i < m_contextSize; i++)
outFeatures.push_back(classifier.AddLabelIndependentFeature("tcwin^-" + SPrint(i + 1) + "^" + GetWord(contextPhrase, i)));
}
virtual void SetParameter(const std::string& key, const std::string& value) {
VWFeatureContext::SetParameter(key, value);
}
private:
static const int DEFAULT_WINDOW_SIZE = 1;
};
}

View File

@ -19,15 +19,22 @@ class VWFeatureSource : public VWFeatureBase
{
public:
VWFeatureSource(const std::string &line)
: VWFeatureBase(line, true) {
: VWFeatureBase(line, vwft_source) {
}
// Gets its pure virtual functions from VWFeatureBase
virtual void operator()(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, Discriminative::Classifier &classifier) const {
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
}
virtual void operator()(const InputType &input
, const Phrase &contextPhrase
, const AlignmentInfo &alignmentInfo
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
}
virtual void SetParameter(const std::string& key, const std::string& value) {

View File

@ -18,11 +18,11 @@ public:
}
void operator()(const InputType &input
, const InputPath &inputPath
, const Range &sourceRange
, Discriminative::Classifier &classifier) const {
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
for (size_t i = 0; i < input.GetSize(); i++) {
classifier.AddLabelIndependentFeature("bow^" + GetWord(input, i));
outFeatures.push_back(classifier.AddLabelIndependentFeature("bow^" + GetWord(input, i)));
}
}

View File

@ -18,11 +18,11 @@ public:
}
void operator()(const InputType &input
, const InputPath &inputPath
, const Range &sourceRange
, Discriminative::Classifier &classifier) const {
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
for (size_t i = 1; i < input.GetSize(); i++) {
classifier.AddLabelIndependentFeature("bigram^" + GetWord(input, i - 1) + "^" + GetWord(input, i));
outFeatures.push_back(classifier.AddLabelIndependentFeature("bigram^" + GetWord(input, i - 1) + "^" + GetWord(input, i)));
}
}

View File

@ -23,12 +23,12 @@ public:
}
void operator()(const InputType &input
, const InputPath &inputPath
, const Range &sourceRange
, Discriminative::Classifier &classifier) const {
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
const Features& features = *m_tls.GetStored();
for (size_t i = 0; i < features.size(); i++) {
classifier.AddLabelIndependentFeature("srcext^" + features[i]);
outFeatures.push_back(classifier.AddLabelIndependentFeature("srcext^" + features[i]));
}
}

View File

@ -20,9 +20,9 @@ public:
}
void operator()(const InputType &input
, const InputPath &inputPath
, const Range &sourceRange
, Discriminative::Classifier &classifier) const {
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
size_t begin = sourceRange.GetStartPos();
size_t end = sourceRange.GetEndPos() + 1;
@ -31,7 +31,7 @@ public:
for (size_t i = 0; i < end - begin; i++)
words[i] = GetWord(input, begin + i);
classifier.AddLabelIndependentFeature("sind^" + Join(" ", words));
outFeatures.push_back(classifier.AddLabelIndependentFeature("sind^" + Join(" ", words)));
}
virtual void SetParameter(const std::string& key, const std::string& value) {

View File

@ -20,14 +20,14 @@ public:
}
void operator()(const InputType &input
, const InputPath &inputPath
, const Range &sourceRange
, Discriminative::Classifier &classifier) const {
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
size_t begin = sourceRange.GetStartPos();
size_t end = sourceRange.GetEndPos() + 1;
while (begin < end) {
classifier.AddLabelIndependentFeature("sin^" + GetWord(input, begin++));
outFeatures.push_back(classifier.AddLabelIndependentFeature("sin^" + GetWord(input, begin++)));
}
}

View File

@ -51,9 +51,9 @@ public:
}
void operator()(const InputType &input
, const InputPath &inputPath
, const Range &sourceRange
, Discriminative::Classifier &classifier) const {
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
int begin = sourceRange.GetStartPos();
int end = sourceRange.GetEndPos() + 1;
int inputLen = input.GetSize();
@ -64,24 +64,24 @@ public:
// before current phrase
for (int i = std::max(0, begin - m_size); i < begin; i++) {
BOOST_FOREACH(const Sense &sense, senses[i]) {
classifier.AddLabelIndependentFeature("snsb^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob);
classifier.AddLabelIndependentFeature("snsb^" + forms[i] + sense.m_label, sense.m_prob);
outFeatures.push_back(classifier.AddLabelIndependentFeature("snsb^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob));
outFeatures.push_back(classifier.AddLabelIndependentFeature("snsb^" + forms[i] + sense.m_label, sense.m_prob));
}
}
// within current phrase
for (int i = begin; i < end; i++) {
BOOST_FOREACH(const Sense &sense, senses[i]) {
classifier.AddLabelIndependentFeature("snsin^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob);
classifier.AddLabelIndependentFeature("snsin^" + forms[i] + sense.m_label, sense.m_prob);
outFeatures.push_back(classifier.AddLabelIndependentFeature("snsin^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob));
outFeatures.push_back(classifier.AddLabelIndependentFeature("snsin^" + forms[i] + sense.m_label, sense.m_prob));
}
}
// after current phrase
for (int i = end; i < std::min(end + m_size, inputLen); i++) {
BOOST_FOREACH(const Sense &sense, senses[i]) {
classifier.AddLabelIndependentFeature("snsa^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob);
classifier.AddLabelIndependentFeature("snsa^" + forms[i] + sense.m_label, sense.m_prob);
outFeatures.push_back(classifier.AddLabelIndependentFeature("snsa^" + forms[i] + SPrint(i - begin) + "^" + sense.m_label, sense.m_prob));
outFeatures.push_back(classifier.AddLabelIndependentFeature("snsa^" + forms[i] + sense.m_label, sense.m_prob));
}
}
}

View File

@ -20,19 +20,19 @@ public:
}
void operator()(const InputType &input
, const InputPath &inputPath
, const Range &sourceRange
, Discriminative::Classifier &classifier) const {
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
int begin = sourceRange.GetStartPos();
int end = sourceRange.GetEndPos() + 1;
int inputLen = input.GetSize();
for (int i = std::max(0, begin - m_size); i < begin; i++) {
classifier.AddLabelIndependentFeature("c^" + SPrint(i - begin) + "^" + GetWord(input, i));
outFeatures.push_back(classifier.AddLabelIndependentFeature("c^" + SPrint(i - begin) + "^" + GetWord(input, i)));
}
for (int i = end; i < std::min(end + m_size, inputLen); i++) {
classifier.AddLabelIndependentFeature("c^" + SPrint(i - end + 1) + "^" + GetWord(input, i));
outFeatures.push_back(classifier.AddLabelIndependentFeature("c^" + SPrint(i - end + 1) + "^" + GetWord(input, i)));
}
}

View File

@ -17,15 +17,22 @@ class VWFeatureTarget : public VWFeatureBase
{
public:
VWFeatureTarget(const std::string &line)
: VWFeatureBase(line, false) {
: VWFeatureBase(line, vwft_target) {
}
// Gets its pure virtual functions from VWFeatureBase
virtual void operator()(const InputType &input
, const InputPath &inputPath
, const Range &sourceRange
, Discriminative::Classifier &classifier) const {
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
}
virtual void operator()(const InputType &input
, const Phrase &contextPhrase
, const AlignmentInfo &alignmentInfo
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
}
virtual void SetParameter(const std::string& key, const std::string& value) {

View File

@ -17,11 +17,11 @@ public:
}
void operator()(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, Discriminative::Classifier &classifier) const {
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
for (size_t i = 1; i < targetPhrase.GetSize(); i++) {
classifier.AddLabelDependentFeature("tbigram^" + GetWord(targetPhrase, i - 1) + "^" + GetWord(targetPhrase, i));
outFeatures.push_back(classifier.AddLabelDependentFeature("tbigram^" + GetWord(targetPhrase, i - 1) + "^" + GetWord(targetPhrase, i)));
}
}

View File

@ -17,10 +17,10 @@ public:
}
void operator()(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, Discriminative::Classifier &classifier) const {
classifier.AddLabelDependentFeature("tind^" + targetPhrase.GetStringRep(m_targetFactors));
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
outFeatures.push_back(classifier.AddLabelDependentFeature("tind^" + targetPhrase.GetStringRep(m_targetFactors)));
}
virtual void SetParameter(const std::string& key, const std::string& value) {

View File

@ -17,11 +17,11 @@ public:
}
void operator()(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, Discriminative::Classifier &classifier) const {
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
for (size_t i = 0; i < targetPhrase.GetSize(); i++) {
classifier.AddLabelDependentFeature("tin^" + GetWord(targetPhrase, i));
outFeatures.push_back(classifier.AddLabelDependentFeature("tin^" + GetWord(targetPhrase, i)));
}
}

View File

@ -20,9 +20,9 @@ public:
}
void operator()(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, Discriminative::Classifier &classifier) const {
, Discriminative::Classifier &classifier
, Discriminative::FeatureVector &outFeatures) const {
std::vector<FeatureFunction*> features = FeatureFunction::GetFeatureFunctions();
for (size_t i = 0; i < features.size(); i++) {
std::string fname = features[i]->GetScoreProducerDescription();
@ -31,7 +31,7 @@ public:
std::vector<float> scores = targetPhrase.GetScoreBreakdown().GetScoresForProducer(features[i]);
for(size_t j = 0; j < scores.size(); ++j)
classifier.AddLabelDependentFeature(fname + "^" + boost::lexical_cast<std::string>(j), scores[j]);
outFeatures.push_back(classifier.AddLabelDependentFeature(fname + "^" + boost::lexical_cast<std::string>(j), scores[j]));
}
}

70
moses/FF/VW/VWState.cpp Normal file
View File

@ -0,0 +1,70 @@
#include "VWState.h"
#include "moses/FF/FFState.h"
#include "moses/Phrase.h"
#include "moses/Hypothesis.h"
#include "moses/Util.h"
#include "moses/TypeDef.h"
#include "moses/StaticData.h"
#include "moses/TranslationOption.h"
#include <boost/functional/hash.hpp>
namespace Moses {
VWState::VWState() : m_spanStart(0), m_spanEnd(0) {
ComputeHash();
}
VWState::VWState(const Phrase &phrase)
: m_phrase(phrase), m_spanStart(0), m_spanEnd(0) {
ComputeHash();
}
VWState::VWState(const VWState &prevState, const Hypothesis &curHypo) {
VERBOSE(3, "VW :: updating state\n>> previous state: " << prevState << "\n");
// copy phrase from previous state
Phrase phrase = prevState.GetPhrase();
size_t contextSize = phrase.GetSize(); // identical to VWFeatureBase::GetMaximumContextSize()
// add words from current hypothesis
phrase.Append(curHypo.GetCurrTargetPhrase());
VERBOSE(3, ">> current hypo: " << curHypo.GetCurrTargetPhrase() << "\n");
// get a slice of appropriate length
Range range(phrase.GetSize() - contextSize, phrase.GetSize() - 1);
m_phrase = phrase.GetSubString(range);
// set current span start/end
m_spanStart = curHypo.GetTranslationOption().GetStartPos();
m_spanEnd = curHypo.GetTranslationOption().GetEndPos();
// compute our hash
ComputeHash();
VERBOSE(3, ">> updated state: " << *this << "\n");
}
bool VWState::operator==(const FFState& o) const {
const VWState &other = static_cast<const VWState &>(o);
return m_phrase == other.GetPhrase()
&& m_spanStart == other.GetSpanStart()
&& m_spanEnd == other.GetSpanEnd();
}
void VWState::ComputeHash() {
m_hash = 0;
boost::hash_combine(m_hash, m_phrase);
boost::hash_combine(m_hash, m_spanStart);
boost::hash_combine(m_hash, m_spanEnd);
}
std::ostream &operator<<(std::ostream &out, const VWState &state) {
out << state.GetPhrase() << "::" << state.GetSpanStart() << "-" << state.GetSpanEnd();
return out;
}
}

54
moses/FF/VW/VWState.h Normal file
View File

@ -0,0 +1,54 @@
#pragma once
#include <ostream>
#include "moses/FF/FFState.h"
#include "moses/Phrase.h"
#include "moses/Hypothesis.h"
namespace Moses {
/**
* VW state, used in decoding (when target context is enabled).
*/
class VWState : public FFState {
public:
// empty state, used only when VWState is ignored
VWState();
// used for construction of the initial VW state
VWState(const Phrase &phrase);
// continue from previous VW state with a new hypothesis
VWState(const VWState &prevState, const Hypothesis &curHypo);
virtual bool operator==(const FFState& o) const;
inline virtual size_t hash() const {
return m_hash;
}
inline const Phrase &GetPhrase() const {
return m_phrase;
}
inline size_t GetSpanStart() const {
return m_spanStart;
}
inline size_t GetSpanEnd() const {
return m_spanEnd;
}
private:
void ComputeHash();
Phrase m_phrase;
size_t m_spanStart, m_spanEnd;
size_t m_hash;
};
// how to print a VW state
std::ostream &operator<<(std::ostream &out, const VWState &state);
}

View File

@ -0,0 +1,54 @@
#pragma once
#include <vector>
#include "moses/AlignmentInfo.h"
#include "moses/Phrase.h"
#include "AlignmentConstraint.h"
namespace Moses
{
/**
* VW thread-specific data about target sentence.
*/
class VWTargetSentence {
public:
VWTargetSentence() : m_sentence(NULL), m_alignment(NULL) {}
void Clear() {
if (m_sentence) delete m_sentence;
if (m_alignment) delete m_alignment;
}
~VWTargetSentence() {
Clear();
}
void SetConstraints(size_t sourceSize) {
// initialize to unconstrained
m_sourceConstraints.assign(sourceSize, AlignmentConstraint());
m_targetConstraints.assign(m_sentence->GetSize(), AlignmentConstraint());
// set constraints according to alignment points
AlignmentInfo::const_iterator it;
for (it = m_alignment->begin(); it != m_alignment->end(); it++) {
int src = it->first;
int tgt = it->second;
if (src >= m_sourceConstraints.size() || tgt >= m_targetConstraints.size()) {
UTIL_THROW2("VW :: alignment point out of bounds: " << src << "-" << tgt);
}
m_sourceConstraints[src].Update(tgt);
m_targetConstraints[tgt].Update(src);
}
}
Phrase *m_sentence;
AlignmentInfo *m_alignment;
std::vector<AlignmentConstraint> m_sourceConstraints, m_targetConstraints;
};
}

View File

@ -24,6 +24,8 @@ class ezexample;
namespace Discriminative
{
typedef std::pair<uint32_t, float> FeatureType; // feature hash (=ID) and value
typedef std::vector<FeatureType> FeatureVector;
/**
* Abstract class to be implemented by classifiers.
@ -34,12 +36,22 @@ public:
/**
* Add a feature that does not depend on the class (label).
*/
virtual void AddLabelIndependentFeature(const StringPiece &name, float value) = 0;
virtual FeatureType AddLabelIndependentFeature(const StringPiece &name, float value) = 0;
/**
* Add a feature that is specific for the given class.
*/
virtual void AddLabelDependentFeature(const StringPiece &name, float value) = 0;
virtual FeatureType AddLabelDependentFeature(const StringPiece &name, float value) = 0;
/**
* Efficient addition of features when their IDs are already computed.
*/
virtual void AddLabelIndependentFeatureVector(const FeatureVector &features) = 0;
/**
* Efficient addition of features when their IDs are already computed.
*/
virtual void AddLabelDependentFeatureVector(const FeatureVector &features) = 0;
/**
* Train using current example. Use loss to distinguish positive and negative training examples.
@ -54,12 +66,12 @@ public:
virtual float Predict(const StringPiece &label) = 0;
// helper methods for indicator features
void AddLabelIndependentFeature(const StringPiece &name) {
AddLabelIndependentFeature(name, 1.0);
FeatureType AddLabelIndependentFeature(const StringPiece &name) {
return AddLabelIndependentFeature(name, 1.0);
}
void AddLabelDependentFeature(const StringPiece &name) {
AddLabelDependentFeature(name, 1.0);
FeatureType AddLabelDependentFeature(const StringPiece &name) {
return AddLabelDependentFeature(name, 1.0);
}
virtual ~Classifier() {}
@ -95,8 +107,10 @@ public:
VWTrainer(const std::string &outputFile);
virtual ~VWTrainer();
virtual void AddLabelIndependentFeature(const StringPiece &name, float value);
virtual void AddLabelDependentFeature(const StringPiece &name, float value);
virtual FeatureType AddLabelIndependentFeature(const StringPiece &name, float value);
virtual FeatureType AddLabelDependentFeature(const StringPiece &name, float value);
virtual void AddLabelIndependentFeatureVector(const FeatureVector &features);
virtual void AddLabelDependentFeatureVector(const FeatureVector &features);
virtual void Train(const StringPiece &label, float loss);
virtual float Predict(const StringPiece &label);
@ -121,15 +135,17 @@ public:
VWPredictor(const std::string &modelFile, const std::string &vwOptions);
virtual ~VWPredictor();
virtual void AddLabelIndependentFeature(const StringPiece &name, float value);
virtual void AddLabelDependentFeature(const StringPiece &name, float value);
virtual FeatureType AddLabelIndependentFeature(const StringPiece &name, float value);
virtual FeatureType AddLabelDependentFeature(const StringPiece &name, float value);
virtual void AddLabelIndependentFeatureVector(const FeatureVector &features);
virtual void AddLabelDependentFeatureVector(const FeatureVector &features);
virtual void Train(const StringPiece &label, float loss);
virtual float Predict(const StringPiece &label);
friend class ClassifierFactory;
protected:
void AddFeature(const StringPiece &name, float values);
FeatureType AddFeature(const StringPiece &name, float values);
::vw *m_VWInstance, *m_VWParser;
::ezexample *m_ex;

View File

@ -2,6 +2,7 @@
#define moses_Normalizer_h
#include <vector>
#include <algorithm>
#include "Util.h"
namespace Discriminative
@ -45,16 +46,25 @@ public:
virtual ~SquaredLossNormalizer() {}
};
// safe softmax
class LogisticLossNormalizer : public Normalizer
{
public:
virtual void operator()(std::vector<float> &losses) const {
float sum = 0;
std::vector<float>::iterator it;
float sum = 0;
float max = 0;
for (it = losses.begin(); it != losses.end(); it++) {
*it = exp(-*it);
*it = -*it;
max = std::max(max, *it);
}
for (it = losses.begin(); it != losses.end(); it++) {
*it = exp(*it - max);
sum += *it;
}
for (it = losses.begin(); it != losses.end(); it++) {
*it /= sum;
}

View File

@ -36,7 +36,7 @@ VWPredictor::~VWPredictor()
VW::finish(*m_VWInstance);
}
void VWPredictor::AddLabelIndependentFeature(const StringPiece &name, float value)
FeatureType VWPredictor::AddLabelIndependentFeature(const StringPiece &name, float value)
{
// label-independent features are kept in a different feature namespace ('s' = source)
@ -48,10 +48,10 @@ void VWPredictor::AddLabelIndependentFeature(const StringPiece &name, float valu
m_ex->addns('s');
if (DEBUG) std::cerr << "VW :: Setting source namespace\n";
}
AddFeature(name, value); // namespace 's' is set up, add the feature
return AddFeature(name, value); // namespace 's' is set up, add the feature
}
void VWPredictor::AddLabelDependentFeature(const StringPiece &name, float value)
FeatureType VWPredictor::AddLabelDependentFeature(const StringPiece &name, float value)
{
// VW does not use the label directly, instead, we do a Cartesian product between source and target feature
// namespaces, where the source namespace ('s') contains label-independent features and the target
@ -63,7 +63,37 @@ void VWPredictor::AddLabelDependentFeature(const StringPiece &name, float value)
m_ex->addns('t');
if (DEBUG) std::cerr << "VW :: Setting target namespace\n";
}
AddFeature(name, value);
return AddFeature(name, value);
}
void VWPredictor::AddLabelIndependentFeatureVector(const FeatureVector &features)
{
if (m_isFirstSource) {
// the first feature of a new example => create the source namespace for
// label-independent features to live in
m_isFirstSource = false;
m_ex->finish();
m_ex->addns('s');
if (DEBUG) std::cerr << "VW :: Setting source namespace\n";
}
// add each feature index using this "low level" call to VW
for (FeatureVector::const_iterator it = features.begin(); it != features.end(); it++)
m_ex->addf(it->first, it->second);
}
void VWPredictor::AddLabelDependentFeatureVector(const FeatureVector &features)
{
if (m_isFirstTarget) {
// the first target-side feature => create namespace 't'
m_isFirstTarget = false;
m_ex->addns('t');
if (DEBUG) std::cerr << "VW :: Setting target namespace\n";
}
// add each feature index using this "low level" call to VW
for (FeatureVector::const_iterator it = features.begin(); it != features.end(); it++)
m_ex->addf(it->first, it->second);
}
void VWPredictor::Train(const StringPiece &label, float loss)
@ -82,10 +112,10 @@ float VWPredictor::Predict(const StringPiece &label)
return loss;
}
void VWPredictor::AddFeature(const StringPiece &name, float value)
FeatureType VWPredictor::AddFeature(const StringPiece &name, float value)
{
if (DEBUG) std::cerr << "VW :: Adding feature: " << EscapeSpecialChars(name.as_string()) << ":" << value << "\n";
m_ex->addf(EscapeSpecialChars(name.as_string()), value);
return std::make_pair(m_ex->addf(EscapeSpecialChars(name.as_string()), value), value);
}
} // namespace Discriminative

View File

@ -25,7 +25,7 @@ VWTrainer::~VWTrainer()
close(m_bfos);
}
void VWTrainer::AddLabelIndependentFeature(const StringPiece &name, float value)
FeatureType VWTrainer::AddLabelIndependentFeature(const StringPiece &name, float value)
{
if (m_isFirstSource) {
if (m_isFirstExample) {
@ -43,9 +43,11 @@ void VWTrainer::AddLabelIndependentFeature(const StringPiece &name, float value)
}
AddFeature(name, value);
return std::make_pair(0, value); // we don't hash features
}
void VWTrainer::AddLabelDependentFeature(const StringPiece &name, float value)
FeatureType VWTrainer::AddLabelDependentFeature(const StringPiece &name, float value)
{
if (m_isFirstTarget) {
m_isFirstTarget = false;
@ -56,6 +58,18 @@ void VWTrainer::AddLabelDependentFeature(const StringPiece &name, float value)
}
AddFeature(name, value);
return std::make_pair(0, value); // we don't hash features
}
void VWTrainer::AddLabelIndependentFeatureVector(const FeatureVector &features)
{
throw logic_error("VW trainer does not support feature IDs.");
}
void VWTrainer::AddLabelDependentFeatureVector(const FeatureVector &features)
{
throw logic_error("VW trainer does not support feature IDs.");
}
void VWTrainer::Train(const StringPiece &label, float loss)