From 596d1564c41e4dc4b773479f8d7a281f952181a7 Mon Sep 17 00:00:00 2001 From: Ales Tamchyna Date: Thu, 8 Jan 2015 10:53:36 +0100 Subject: [PATCH] adding some VW feature functions --- moses/FF/Factory.cpp | 13 ++++- moses/FF/VW/VWFeatureSource.h | 7 ++- ...gOfWords.h => VWFeatureSourceBagOfWords.h} | 6 +- moses/FF/VW/VWFeatureSourceIndicator.h | 44 +++++++++++++++ moses/FF/VW/VWFeatureSourcePhraseInternal.h | 41 ++++++++++++++ moses/FF/VW/VWFeatureSourceWindow.h | 55 +++++++++++++++++++ moses/FF/VW/VWFeatureTarget.h | 5 ++ moses/FF/VW/VWFeatureTargetPhraseInternal.h | 35 ++++++++++++ moses/WordsRange.h | 1 + 9 files changed, 201 insertions(+), 6 deletions(-) rename moses/FF/VW/{VWFeatureBagOfWords.h => VWFeatureSourceBagOfWords.h} (75%) create mode 100644 moses/FF/VW/VWFeatureSourceIndicator.h create mode 100644 moses/FF/VW/VWFeatureSourcePhraseInternal.h create mode 100644 moses/FF/VW/VWFeatureSourceWindow.h create mode 100644 moses/FF/VW/VWFeatureTargetPhraseInternal.h diff --git a/moses/FF/Factory.cpp b/moses/FF/Factory.cpp index 8f56f281a..7190025a8 100644 --- a/moses/FF/Factory.cpp +++ b/moses/FF/Factory.cpp @@ -65,8 +65,13 @@ #ifdef HAVE_VW #include "moses/FF/VW/VW.h" -#include "moses/FF/VW/VWFeatureBagOfWords.h" +#include "moses/FF/VW/VWFeatureSourceBagOfWords.h" +#include "moses/FF/VW/VWFeatureSourceIndicator.h" +#include "moses/FF/VW/VWFeatureSourcePhraseInternal.h" +#include "moses/FF/VW/VWFeatureSourceWindow.h" #include "moses/FF/VW/VWFeatureTargetIndicator.h" +#include "moses/FF/VW/VWFeatureTargetPhraseInternal.h" + #endif #ifdef HAVE_CMPH @@ -240,7 +245,11 @@ FeatureRegistry::FeatureRegistry() #ifdef HAVE_VW MOSES_FNAME(VW); - MOSES_FNAME(VWFeatureBagOfWords); + MOSES_FNAME(VWFeatureSourceBagOfWords); + MOSES_FNAME(VWFeatureSourceIndicator); + MOSES_FNAME(VWFeatureSourcePhraseInternal); + MOSES_FNAME(VWFeatureSourceWindow); + MOSES_FNAME(VWFeatureTargetPhraseInternal); MOSES_FNAME(VWFeatureTargetIndicator); #endif diff --git a/moses/FF/VW/VWFeatureSource.h b/moses/FF/VW/VWFeatureSource.h index 939c773f0..dc8d2f5b0 100644 --- a/moses/FF/VW/VWFeatureSource.h +++ b/moses/FF/VW/VWFeatureSource.h @@ -32,6 +32,11 @@ class VWFeatureSource : public VWFeatureBase virtual void SetParameter(const std::string& key, const std::string& value) { VWFeatureBase::SetParameter(key, value); } + + protected: + inline std::string GetWord(const InputType &input, size_t pos) const { + return input.GetWord(pos).GetString(m_sourceFactors, false); + } }; -} \ No newline at end of file +} diff --git a/moses/FF/VW/VWFeatureBagOfWords.h b/moses/FF/VW/VWFeatureSourceBagOfWords.h similarity index 75% rename from moses/FF/VW/VWFeatureBagOfWords.h rename to moses/FF/VW/VWFeatureSourceBagOfWords.h index 340f08996..345c949d3 100644 --- a/moses/FF/VW/VWFeatureBagOfWords.h +++ b/moses/FF/VW/VWFeatureSourceBagOfWords.h @@ -6,10 +6,10 @@ namespace Moses { -class VWFeatureBagOfWords : public VWFeatureSource +class VWFeatureSourceBagOfWords : public VWFeatureSource { public: - VWFeatureBagOfWords(const std::string &line) + VWFeatureSourceBagOfWords(const std::string &line) : VWFeatureSource(line) { ReadParameters(); @@ -24,7 +24,7 @@ class VWFeatureBagOfWords : public VWFeatureSource , Discriminative::Classifier *classifier) const { for (size_t i = 0; i < input.GetSize(); i++) { - classifier->AddLabelIndependentFeature("bow^" + input.GetWord(i).GetString(m_sourceFactors, false)); + classifier->AddLabelIndependentFeature("bow^" + GetWord(input, i)); } } diff --git a/moses/FF/VW/VWFeatureSourceIndicator.h b/moses/FF/VW/VWFeatureSourceIndicator.h new file mode 100644 index 000000000..3a65d68d6 --- /dev/null +++ b/moses/FF/VW/VWFeatureSourceIndicator.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include "VWFeatureSource.h" +#include "moses/Util.h" + +namespace Moses +{ + +class VWFeatureSourceIndicator : public VWFeatureSource +{ + public: + VWFeatureSourceIndicator(const std::string &line) + : VWFeatureSource(line) + { + ReadParameters(); + + // Call this last + VWFeatureBase::UpdateRegister(); + } + + void operator()(const InputType &input + , const InputPath &inputPath + , const WordsRange &sourceRange + , Discriminative::Classifier *classifier) const + { + size_t begin = sourceRange.GetStartPos(); + size_t end = sourceRange.GetEndPos() + 1; + + std::vector words(end - begin); + + for (size_t i = 0; i < end - begin; i++) + words[i] = GetWord(input, begin + i); + + classifier->AddLabelIndependentFeature("sind^" + Join(" ", words)); + } + + virtual void SetParameter(const std::string& key, const std::string& value) { + VWFeatureSource::SetParameter(key, value); + } +}; + +} diff --git a/moses/FF/VW/VWFeatureSourcePhraseInternal.h b/moses/FF/VW/VWFeatureSourcePhraseInternal.h new file mode 100644 index 000000000..51968a760 --- /dev/null +++ b/moses/FF/VW/VWFeatureSourcePhraseInternal.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include +#include "VWFeatureSource.h" +#include "moses/Util.h" + +namespace Moses +{ + +class VWFeatureSourcePhraseInternal : public VWFeatureSource +{ + public: + VWFeatureSourcePhraseInternal(const std::string &line) + : VWFeatureSource(line) + { + ReadParameters(); + + // Call this last + VWFeatureBase::UpdateRegister(); + } + + void operator()(const InputType &input + , const InputPath &inputPath + , const WordsRange &sourceRange + , Discriminative::Classifier *classifier) const + { + size_t begin = sourceRange.GetStartPos(); + size_t end = sourceRange.GetEndPos() + 1; + + while (begin < end) { + classifier->AddLabelIndependentFeature("sin^" + GetWord(input, begin++)); + } + } + + virtual void SetParameter(const std::string& key, const std::string& value) { + VWFeatureSource::SetParameter(key, value); + } +}; + +} diff --git a/moses/FF/VW/VWFeatureSourceWindow.h b/moses/FF/VW/VWFeatureSourceWindow.h new file mode 100644 index 000000000..058f22742 --- /dev/null +++ b/moses/FF/VW/VWFeatureSourceWindow.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include +#include "VWFeatureSource.h" +#include "moses/Util.h" + +namespace Moses +{ + +class VWFeatureSourceWindow : public VWFeatureSource +{ + public: + VWFeatureSourceWindow(const std::string &line) + : VWFeatureSource(line), m_size(DEFAULT_WINDOW_SIZE) + { + ReadParameters(); + + // Call this last + VWFeatureBase::UpdateRegister(); + } + + void operator()(const InputType &input + , const InputPath &inputPath + , const WordsRange &sourceRange + , Discriminative::Classifier *classifier) const + { + int begin = sourceRange.GetStartPos(); + int end = sourceRange.GetEndPos() + 1; + int inputLen = input.GetSize(); + + for (int i = std::max(0, begin - m_size); i < begin; i++) { + classifier->AddLabelIndependentFeature("c^" + SPrint(i - begin) + "^" + GetWord(input, i)); + } + + for (int i = end; i < std::min(end + m_size, inputLen); i++) { + classifier->AddLabelIndependentFeature("c^" + SPrint(end - i + 1) + "^" + GetWord(input, i)); + } + } + + virtual void SetParameter(const std::string& key, const std::string& value) { + if (key == "size") { + m_size = Scan(value); + } else { + VWFeatureSource::SetParameter(key, value); + } + } + + private: + static const int DEFAULT_WINDOW_SIZE = 3; + + int m_size; +}; + +} diff --git a/moses/FF/VW/VWFeatureTarget.h b/moses/FF/VW/VWFeatureTarget.h index 46f4cc236..17877eb4a 100644 --- a/moses/FF/VW/VWFeatureTarget.h +++ b/moses/FF/VW/VWFeatureTarget.h @@ -31,6 +31,11 @@ class VWFeatureTarget : public VWFeatureBase virtual void SetParameter(const std::string& key, const std::string& value) { VWFeatureBase::SetParameter(key, value); } + + protected: + inline std::string GetWord(const TargetPhrase &phrase, size_t pos) const { + return phrase.GetWord(pos).GetString(m_targetFactors, false); + } }; } diff --git a/moses/FF/VW/VWFeatureTargetPhraseInternal.h b/moses/FF/VW/VWFeatureTargetPhraseInternal.h new file mode 100644 index 000000000..5cee52b57 --- /dev/null +++ b/moses/FF/VW/VWFeatureTargetPhraseInternal.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include "VWFeatureTarget.h" + +namespace Moses +{ + +class VWFeatureTargetPhraseInternal : public VWFeatureTarget +{ + public: + VWFeatureTargetPhraseInternal(const std::string &line) + : VWFeatureTarget(line) + { + ReadParameters(); + + VWFeatureBase::UpdateRegister(); + } + + void operator()(const InputType &input + , const InputPath &inputPath + , const TargetPhrase &targetPhrase + , Discriminative::Classifier *classifier) const + { + for (size_t i = 0; i < targetPhrase.GetSize(); i++) { + classifier->AddLabelDependentFeature("tin^" + GetWord(targetPhrase, i)); + } + } + + virtual void SetParameter(const std::string& key, const std::string& value) { + VWFeatureTarget::SetParameter(key, value); + } +}; + +} diff --git a/moses/WordsRange.h b/moses/WordsRange.h index a4b41de83..4a38ecde7 100644 --- a/moses/WordsRange.h +++ b/moses/WordsRange.h @@ -41,6 +41,7 @@ class WordsRange { friend std::ostream& operator << (std::ostream& out, const WordsRange& range); + // m_endPos is inclusive size_t m_startPos, m_endPos; public: inline WordsRange(size_t startPos, size_t endPos) : m_startPos(startPos), m_endPos(endPos) {}