Merge branch 'vw_integration' of github.com:moses-smt/mosesdecoder into vw_integration

This commit is contained in:
Ales Tamchyna 2015-01-08 12:32:34 +01:00
commit cf4608163f
10 changed files with 246 additions and 3 deletions

View File

@ -71,8 +71,8 @@
#include "moses/FF/VW/VWFeatureSourcePhraseInternal.h"
#include "moses/FF/VW/VWFeatureSourceWindow.h"
#include "moses/FF/VW/VWFeatureTargetIndicator.h"
#include "moses/FF/VW/VWFeatureSourceExternalFeatures.h"
#include "moses/FF/VW/VWFeatureTargetPhraseInternal.h"
#endif
#ifdef HAVE_CMPH
@ -253,6 +253,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(VWFeatureSourceWindow);
MOSES_FNAME(VWFeatureTargetPhraseInternal);
MOSES_FNAME(VWFeatureTargetIndicator);
MOSES_FNAME(VWFeatureSourceExternalFeatures);
#endif
#ifdef HAVE_CMPH

View File

@ -1,6 +1,8 @@
#pragma once
#include <string>
#include <boost/thread/tss.hpp>
#include "Classifier.h"
#include "moses/TypeDef.h"
#include "moses/Util.h"
@ -9,6 +11,10 @@
namespace Moses
{
typedef std::vector<std::string> Features;
typedef std::map<std::string, Features> NameFeatureMap;
typedef boost::thread_specific_ptr<NameFeatureMap> TSNameFeatureMap;
class VWFeatureBase : public StatelessFeatureFunction
{
public:

View File

@ -0,0 +1,9 @@
#include "VWFeatureSourceExternalFeatures.h"
namespace Moses
{
TSNameFeatureMap VWFeatureSourceExternalFeatures::m_nameMap;
}

View File

@ -0,0 +1,65 @@
#pragma once
#include <string>
#include <cstdlib>
#include "VWFeatureSource.h"
#include "TabbedSentence.h"
namespace Moses
{
class VWFeatureSourceExternalFeatures : public VWFeatureSource
{
public:
VWFeatureSourceExternalFeatures(const std::string &line)
: VWFeatureSource(line), m_column(0)
{
ReadParameters();
// Call this last
VWFeatureBase::UpdateRegister();
}
void operator()(const InputType &input
, const InputPath &inputPath
, const WordsRange &sourceRange
, Discriminative::Classifier *classifier) const
{
Features& features = (*m_nameMap)[GetScoreProducerDescription()];
for (size_t i = 0; i < features.size(); i++) {
classifier->AddLabelIndependentFeature(features[i]);
}
}
virtual void SetParameter(const std::string& key, const std::string& value) {
if(key == "column")
m_column = Scan<size_t>(value);
else
VWFeatureSource::SetParameter(key, value);
}
virtual void InitializeForInput(InputType const& source) {
UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput, "This feature function requires the TabbedSentence input type");
const TabbedSentence& tabbedSentence = static_cast<const TabbedSentence&>(source);
UTIL_THROW_IF2(tabbedSentence.GetColumns().size() <= m_column, "There is no column with index: " << m_column);
if(!m_nameMap.get())
m_nameMap.reset(new NameFeatureMap());
(*m_nameMap)[GetScoreProducerDescription()].clear();
Features& features = (*m_nameMap)[GetScoreProducerDescription()];
const std::string &column = tabbedSentence.GetColumns()[m_column];
Tokenize(features, column, " ");
}
private:
size_t m_column;
static TSNameFeatureMap m_nameMap;
};
}

View File

@ -272,6 +272,9 @@ bool IOWrapper::ReadInput(InputTypeEnum inputType, InputType*& source)
case TreeInputType:
source = GetInput(new TreeInput);
break;
case TabbedSentenceInput:
source = GetInput(new TabbedSentence);
break;
default:
TRACE_ERR("Unknown input type: " << inputType << "\n");
}

View File

@ -41,6 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "moses/TypeDef.h"
#include "moses/Sentence.h"
#include "moses/TabbedSentence.h"
#include "moses/FactorTypeSet.h"
#include "moses/FactorCollection.h"
#include "moses/Hypothesis.h"

View File

@ -91,12 +91,12 @@ public:
void GetXmlTranslationOptions(std::vector <TranslationOption*> &list, size_t startPos, size_t endPos) const;
std::vector <ChartTranslationOptions*> GetXmlChartTranslationOptions() const;
int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
void Print(std::ostream& out) const;
TranslationOptionCollection* CreateTranslationOptionCollection() const;
void CreateFromString(const std::vector<FactorType> &factorOrder
virtual void CreateFromString(const std::vector<FactorType> &factorOrder
, const std::string &phraseString); // , const std::string &factorDelimiter);
const NonTerminalSet &GetLabelSet(size_t /*startPos*/, size_t /*endPos*/) const {

72
moses/TabbedSentence.cpp Normal file
View File

@ -0,0 +1,72 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <vector>
#include <string>
#include <sstream>
#include <boost/algorithm/string.hpp>
#include "TabbedSentence.h"
namespace Moses
{
void TabbedSentence::CreateFromString(const std::vector<FactorType> &factorOrder
, const std::string &tabbedString) {
TabbedColumns allColumns;
boost::split(allColumns, tabbedString, boost::is_any_of("\t"));
if(allColumns.size() < 2) {
Sentence::CreateFromString(factorOrder, tabbedString);
}
else {
m_columns.resize(allColumns.size() - 1);
std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin());
Sentence::CreateFromString(factorOrder, allColumns[0]);
}
}
int TabbedSentence::Read(std::istream& in, const std::vector<FactorType>& factorOrder) {
TabbedColumns allColumns;
std::string line;
if (getline(in, line, '\n').eof())
return 0;
boost::split(allColumns, line, boost::is_any_of("\t"));
if(allColumns.size() < 2) {
std::stringstream dummyStream;
dummyStream << line << std::endl;
return Sentence::Read(dummyStream, factorOrder);
}
else {
m_columns.resize(allColumns.size() - 1);
std::copy(allColumns.begin() + 1, allColumns.end(), m_columns.begin());
std::stringstream dummyStream;
dummyStream << allColumns[0] << std::endl;
return Sentence::Read(dummyStream, factorOrder);
}
}
}

85
moses/TabbedSentence.h Normal file
View File

@ -0,0 +1,85 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <vector>
#include <string>
#include "Sentence.h"
namespace Moses
{
/**
* Adds a vector of strings to Sentence that are filled from tab-separated input.
* The first column is just treated as the normal input sentence with all the XML
* processing and stuff. Then it contains a vector of strings that contains all
* other columns.
*
* At creation time calls FeatureFunction::ProcessColumns(m_columns) once for all
* feature functions. So any feature function can do anything with any column.
* Ideally, feature functions should keep the parse results for the columns in
* thread-specific storage, e.g. boost::thread_specific_ptr<Something>.
*
* In theory a column can contain anything, even text-serialized parse trees or
* classifier features etc.
*
*/
typedef std::vector<std::string> TabbedColumns;
class TabbedSentence : public Sentence
{
protected:
public:
TabbedSentence() {
std::cerr << "I am a TabbedSentence" << std::endl;
}
~TabbedSentence() {}
InputTypeEnum GetType() const {
return TabbedSentenceInput;
}
// Splits off the first tab-separated column and passes it to
// Sentence::CreateFromString(...), the remaining columns are stored in
// m_columns .
virtual void CreateFromString(const std::vector<FactorType> &factorOrder
, const std::string &tabbedString);
virtual int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
const TabbedColumns& GetColumns() const {
return m_columns;
}
private:
TabbedColumns m_columns;
};
}

View File

@ -123,6 +123,7 @@ enum InputTypeEnum {
,WordLatticeInput = 2
,TreeInputType = 3
,WordLatticeInput2 = 4
, TabbedSentenceInput = 5
};