adding some VW feature functions

This commit is contained in:
Ales Tamchyna 2015-01-08 10:53:36 +01:00
parent 635b4c8181
commit 596d1564c4
9 changed files with 201 additions and 6 deletions

View File

@ -65,8 +65,13 @@
#ifdef HAVE_VW
#include "moses/FF/VW/VW.h"
#include "moses/FF/VW/VWFeatureBagOfWords.h"
#include "moses/FF/VW/VWFeatureSourceBagOfWords.h"
#include "moses/FF/VW/VWFeatureSourceIndicator.h"
#include "moses/FF/VW/VWFeatureSourcePhraseInternal.h"
#include "moses/FF/VW/VWFeatureSourceWindow.h"
#include "moses/FF/VW/VWFeatureTargetIndicator.h"
#include "moses/FF/VW/VWFeatureTargetPhraseInternal.h"
#endif
#ifdef HAVE_CMPH
@ -240,7 +245,11 @@ FeatureRegistry::FeatureRegistry()
#ifdef HAVE_VW
MOSES_FNAME(VW);
MOSES_FNAME(VWFeatureBagOfWords);
MOSES_FNAME(VWFeatureSourceBagOfWords);
MOSES_FNAME(VWFeatureSourceIndicator);
MOSES_FNAME(VWFeatureSourcePhraseInternal);
MOSES_FNAME(VWFeatureSourceWindow);
MOSES_FNAME(VWFeatureTargetPhraseInternal);
MOSES_FNAME(VWFeatureTargetIndicator);
#endif

View File

@ -32,6 +32,11 @@ class VWFeatureSource : public VWFeatureBase
virtual void SetParameter(const std::string& key, const std::string& value) {
VWFeatureBase::SetParameter(key, value);
}
protected:
inline std::string GetWord(const InputType &input, size_t pos) const {
return input.GetWord(pos).GetString(m_sourceFactors, false);
}
};
}
}

View File

@ -6,10 +6,10 @@
namespace Moses
{
class VWFeatureBagOfWords : public VWFeatureSource
class VWFeatureSourceBagOfWords : public VWFeatureSource
{
public:
VWFeatureBagOfWords(const std::string &line)
VWFeatureSourceBagOfWords(const std::string &line)
: VWFeatureSource(line)
{
ReadParameters();
@ -24,7 +24,7 @@ class VWFeatureBagOfWords : public VWFeatureSource
, Discriminative::Classifier *classifier) const
{
for (size_t i = 0; i < input.GetSize(); i++) {
classifier->AddLabelIndependentFeature("bow^" + input.GetWord(i).GetString(m_sourceFactors, false));
classifier->AddLabelIndependentFeature("bow^" + GetWord(input, i));
}
}

View File

@ -0,0 +1,44 @@
#pragma once
#include <string>
#include <algorithm>
#include "VWFeatureSource.h"
#include "moses/Util.h"
namespace Moses
{
class VWFeatureSourceIndicator : public VWFeatureSource
{
public:
VWFeatureSourceIndicator(const std::string &line)
: VWFeatureSource(line)
{
ReadParameters();
// Call this last
VWFeatureBase::UpdateRegister();
}
void operator()(const InputType &input
, const InputPath &inputPath
, const WordsRange &sourceRange
, Discriminative::Classifier *classifier) const
{
size_t begin = sourceRange.GetStartPos();
size_t end = sourceRange.GetEndPos() + 1;
std::vector<std::string> words(end - begin);
for (size_t i = 0; i < end - begin; i++)
words[i] = GetWord(input, begin + i);
classifier->AddLabelIndependentFeature("sind^" + Join(" ", words));
}
virtual void SetParameter(const std::string& key, const std::string& value) {
VWFeatureSource::SetParameter(key, value);
}
};
}

View File

@ -0,0 +1,41 @@
#pragma once
#include <string>
#include <algorithm>
#include "VWFeatureSource.h"
#include "moses/Util.h"
namespace Moses
{
class VWFeatureSourcePhraseInternal : public VWFeatureSource
{
public:
VWFeatureSourcePhraseInternal(const std::string &line)
: VWFeatureSource(line)
{
ReadParameters();
// Call this last
VWFeatureBase::UpdateRegister();
}
void operator()(const InputType &input
, const InputPath &inputPath
, const WordsRange &sourceRange
, Discriminative::Classifier *classifier) const
{
size_t begin = sourceRange.GetStartPos();
size_t end = sourceRange.GetEndPos() + 1;
while (begin < end) {
classifier->AddLabelIndependentFeature("sin^" + GetWord(input, begin++));
}
}
virtual void SetParameter(const std::string& key, const std::string& value) {
VWFeatureSource::SetParameter(key, value);
}
};
}

View File

@ -0,0 +1,55 @@
#pragma once
#include <string>
#include <algorithm>
#include "VWFeatureSource.h"
#include "moses/Util.h"
namespace Moses
{
class VWFeatureSourceWindow : public VWFeatureSource
{
public:
VWFeatureSourceWindow(const std::string &line)
: VWFeatureSource(line), m_size(DEFAULT_WINDOW_SIZE)
{
ReadParameters();
// Call this last
VWFeatureBase::UpdateRegister();
}
void operator()(const InputType &input
, const InputPath &inputPath
, const WordsRange &sourceRange
, Discriminative::Classifier *classifier) const
{
int begin = sourceRange.GetStartPos();
int end = sourceRange.GetEndPos() + 1;
int inputLen = input.GetSize();
for (int i = std::max(0, begin - m_size); i < begin; i++) {
classifier->AddLabelIndependentFeature("c^" + SPrint(i - begin) + "^" + GetWord(input, i));
}
for (int i = end; i < std::min(end + m_size, inputLen); i++) {
classifier->AddLabelIndependentFeature("c^" + SPrint(end - i + 1) + "^" + GetWord(input, i));
}
}
virtual void SetParameter(const std::string& key, const std::string& value) {
if (key == "size") {
m_size = Scan<size_t>(value);
} else {
VWFeatureSource::SetParameter(key, value);
}
}
private:
static const int DEFAULT_WINDOW_SIZE = 3;
int m_size;
};
}

View File

@ -31,6 +31,11 @@ class VWFeatureTarget : public VWFeatureBase
virtual void SetParameter(const std::string& key, const std::string& value) {
VWFeatureBase::SetParameter(key, value);
}
protected:
inline std::string GetWord(const TargetPhrase &phrase, size_t pos) const {
return phrase.GetWord(pos).GetString(m_targetFactors, false);
}
};
}

View File

@ -0,0 +1,35 @@
#pragma once
#include <string>
#include "VWFeatureTarget.h"
namespace Moses
{
class VWFeatureTargetPhraseInternal : public VWFeatureTarget
{
public:
VWFeatureTargetPhraseInternal(const std::string &line)
: VWFeatureTarget(line)
{
ReadParameters();
VWFeatureBase::UpdateRegister();
}
void operator()(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, Discriminative::Classifier *classifier) const
{
for (size_t i = 0; i < targetPhrase.GetSize(); i++) {
classifier->AddLabelDependentFeature("tin^" + GetWord(targetPhrase, i));
}
}
virtual void SetParameter(const std::string& key, const std::string& value) {
VWFeatureTarget::SetParameter(key, value);
}
};
}

View File

@ -41,6 +41,7 @@ class WordsRange
{
friend std::ostream& operator << (std::ostream& out, const WordsRange& range);
// m_endPos is inclusive
size_t m_startPos, m_endPos;
public:
inline WordsRange(size_t startPos, size_t endPos) : m_startPos(startPos), m_endPos(endPos) {}