diff --git a/moses/Jamfile b/moses/Jamfile index 34650f759..1e0f28e55 100644 --- a/moses/Jamfile +++ b/moses/Jamfile @@ -19,7 +19,7 @@ if $(with-oxlm) { local vw = ; if [ option.get "with-vw" ] { - classifier += ..//vw//vw ; + vw += ..//vw//vw ; } alias headers : ../util//kenutil $(vw) : : : $(max-factors) $(dlib) $(oxlm) ; diff --git a/moses/Util.h b/moses/Util.h index a595e9a3f..12038468e 100644 --- a/moses/Util.h +++ b/moses/Util.h @@ -331,6 +331,20 @@ std::string Join(const std::string& delimiter, const std::vector& items) return outstr.str(); } +/* + * Convert any container to string + */ +template +std::string Join(const std::string &delim, It begin, It end) +{ + std::ostringstream outstr; + if (begin != end) + outstr << *begin++; + for ( ; begin != end; ++begin) + outstr << delim << *begin; + return outstr.str(); +} + //! transform prob to natural log score inline float TransformScore(float prob) { diff --git a/vw/Classifier.h b/vw/Classifier.h index cdeb41232..763adebb5 100644 --- a/vw/Classifier.h +++ b/vw/Classifier.h @@ -41,11 +41,13 @@ public: /** * Train using current example. Use loss to distinguish positive and negative training examples. + * Throws away current label-dependent features (so that features for another label/class can now be set). */ virtual void Train(const StringPiece &label, float loss) = 0; /** * Predict the loss (inverse of score) of current example. + * Throws away current label-dependent features (so that features for another label/class can now be set). */ virtual float Predict(const StringPiece &label) = 0; @@ -76,7 +78,6 @@ public: protected: void AddFeature(const StringPiece &name, float value); - void FinishExample(); void Finish(); bool m_isFirstSource, m_isFirstTarget, m_isFirstExample; @@ -106,7 +107,6 @@ public: protected: void AddFeature(const StringPiece &name, float value); - void FinishExample(); void Finish(); ::vw *m_VWInstance; @@ -139,7 +139,7 @@ public: /** * Release a VWPredictor instance. */ - void Release(VWPredictor * fc); + void Release(VWPredictor *vwpred); ~VWPredictorFactory(); diff --git a/vw/FeatureExtractor.cpp b/vw/FeatureExtractor.cpp index 2e873db2c..63a45ccc4 100644 --- a/vw/FeatureExtractor.cpp +++ b/vw/FeatureExtractor.cpp @@ -1,9 +1,6 @@ #include "FeatureExtractor.h" #include "Util.h" -#include -#include - using namespace std; using namespace Moses; diff --git a/vw/Jamfile b/vw/Jamfile index 32aee6abf..bc3cd42a2 100644 --- a/vw/Jamfile +++ b/vw/Jamfile @@ -7,8 +7,8 @@ local with-vw = [ option.get "with-vw" ] ; if $(with-vw) { lib vwlib : : $(with-vw)/lib ; lib allreduce : : $(with-vw)/lib ; - obj VWLibraryConsumer.o : VWLibraryConsumer.cpp headers : $(with-vw)/library $(with-vw)/vowpalwabbit ; - alias vw_objects : VWLibraryConsumer.o vwlib allreduce : : : boost_program_options ; - lib vw : [ glob *.cpp : VWLibraryConsumer.cpp ] vw_objects headers ; + obj VWPredictor.o : VWPredictor.cpp VWPredictorFactory.cpp headers : $(with-vw)/library $(with-vw)/vowpalwabbit ; + alias vw_objects : VWPredictor.o vwlib allreduce : : : boost_program_options ; + lib vw : [ glob *.cpp : VWPredictor.cpp VWPredictorFactory.cpp ] vw_objects headers ; echo "Linking with Vowpal Wabbit" ; } diff --git a/vw/VWPredictor.cpp b/vw/VWPredictor.cpp index 039296db2..a3b3db8de 100644 --- a/vw/VWPredictor.cpp +++ b/vw/VWPredictor.cpp @@ -1,10 +1,6 @@ #include "Classifier.h" #include "vw.h" -#include "Util.h" #include "ezexample.h" -#include -#include -#include namespace Discriminative { @@ -18,7 +14,7 @@ VWPredictor::VWPredictor(const string &modelFile, const string &vwOptions) m_isFirstSource = m_isFirstTarget = true; } -VWPredictor::VWPredictor(vw * instance, int index) +VWPredictor::VWPredictor(vw *instance, int index) { m_VWInstance = instance; m_sharedVwInstance = true; @@ -29,17 +25,26 @@ VWPredictor::VWPredictor(vw * instance, int index) void VWPredictor::AddLabelIndependentFeature(const StringPiece &name, float value) { + // label-independent features are kept in a different feature namespace ('s' = source) + if (m_isFirstSource) { + // the first feature of a new example => create the source namespace for + // label-independent features to live in m_isFirstSource = false; m_ex->clear_features(); // removes all namespaces along with features m_ex->addns('s'); } - AddFeature(name, value); + AddFeature(name, value); // namespace 's' is set up, add the feature } void VWPredictor::AddLabelDependentFeature(const StringPiece &name, float value) { + // VW does not use the label directly, instead, we do a Cartesian product between source and target feature + // namespaces, where the source namespace ('s') contains label-independent features and the target + // namespace ('t') contains label-dependent features + if (m_isFirstTarget) { + // the first target-side feature => create namespace 't' m_isFirstTarget = false; m_ex->addns('t'); } diff --git a/vw/VWPredictorFactory.cpp b/vw/VWPredictorFactory.cpp index d6732d919..556e31ef6 100644 --- a/vw/VWPredictorFactory.cpp +++ b/vw/VWPredictorFactory.cpp @@ -1,10 +1,5 @@ #include "Classifier.h" #include "vw.h" -#include "Util.h" -#include "ezexample.h" -#include -#include -#include using namespace std; @@ -63,7 +58,7 @@ VWPredictorFactory::~VWPredictorFactory() VW::finish(*m_VWInstance); } -VWPredictor * VWPredictorFactory::Acquire() +VWPredictor *VWPredictorFactory::Acquire() { boost::unique_lock lock(m_mutex); while (m_firstFree == EMPTY_LIST) @@ -74,17 +69,17 @@ VWPredictor * VWPredictorFactory::Acquire() return m_predictors[free]; } -void VWPredictorFactory::Release(VWPredictor * fc) +void VWPredictorFactory::Release(VWPredictor *vwpred) { // use scope block to handle the lock { boost::unique_lock lock(m_mutex); - int index = fc->m_index; + int index = vwpred->m_index; if (index < 0 || index >= (int)m_predictors.size()) throw std::runtime_error("bad index at VWPredictorFactory::Release"); - if (fc != m_predictors[index]) + if (vwpred != m_predictors[index]) throw std::runtime_error("mismatched pointer at VWPredictorFactory::Release"); m_nextFree[index] = m_firstFree; diff --git a/vw/VWTrainer.cpp b/vw/VWTrainer.cpp index c0654ae43..b78e2b83f 100644 --- a/vw/VWTrainer.cpp +++ b/vw/VWTrainer.cpp @@ -1,8 +1,5 @@ #include "Util.h" #include "Classifier.h" -#include -#include -#include #include using namespace std; @@ -68,7 +65,6 @@ float VWTrainer::Predict(const StringPiece &label) void VWTrainer::AddFeature(const StringPiece &name, float value) { - // TODO take advantage of StringPiece here somehow? m_outputBuffer.push_back(EscapeSpecialChars(name.as_string()) + ":" + SPrint(value)); }