basic implementation of non-hierarchical sparse features

This commit is contained in:
Barry Haddow 2014-06-09 22:17:05 +01:00
parent 3dec0abf0a
commit a5e5a6590b
5 changed files with 61 additions and 8 deletions

View File

@ -52,7 +52,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
throw "Unknown conditioning option!";
}
m_configuration->ConfigureSparse(sparseArgs);
m_configuration->ConfigureSparse(sparseArgs, this);
}
LexicalReordering::~LexicalReordering()

View File

@ -38,10 +38,11 @@ size_t LexicalReorderingConfiguration::GetNumScoreComponents() const
}
}
void LexicalReorderingConfiguration::ConfigureSparse(const std::map<std::string,std::string>& sparseArgs)
void LexicalReorderingConfiguration::ConfigureSparse
(const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer)
{
if (sparseArgs.size()) {
m_sparse.reset(new SparseReordering(sparseArgs));
m_sparse.reset(new SparseReordering(sparseArgs, producer));
}
}

View File

@ -34,7 +34,7 @@ public:
LexicalReorderingConfiguration(const std::string &modelType);
void ConfigureSparse(const std::map<std::string,std::string>& sparseArgs);
void ConfigureSparse(const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer);
LexicalReorderingState *CreateLexicalReorderingState(const InputType &input) const;

View File

@ -1,8 +1,11 @@
#include <fstream>
#include "moses/FactorCollection.h"
#include "moses/InputPath.h"
#include "moses/Util.h"
#include "util/exception.hh"
#include "LexicalReordering.h"
#include "SparseReordering.h"
@ -11,7 +14,8 @@ using namespace std;
namespace Moses
{
SparseReordering::SparseReordering(const map<string,string>& config)
SparseReordering::SparseReordering(const map<string,string>& config, const LexicalReordering* producer)
: m_producer(producer)
{
static const string kSource= "source";
static const string kTarget = "target";
@ -42,14 +46,32 @@ SparseReordering::SparseReordering(const map<string,string>& config)
void SparseReordering::ReadWordList(const string& filename, const string& id, vector<WordList>* pWordLists) {
ifstream fh(filename.c_str());
UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename);
string line;
pWordLists->push_back(WordList());
pWordLists->back().first = id;
while (getline(fh,line)) {
pWordLists->back().second.insert(line);
//TODO: StringPiece
const Factor* factor = FactorCollection::Instance().AddFactor(line);
pWordLists->back().second.insert(factor);
}
}
void SparseReordering::AddFeatures(
const string& type, const Word& word, const string& position, const WordList& words,
LexicalReorderingState::ReorderingType reoType,
ScoreComponentCollection* scores) const {
//TODO: Precalculate all feature names
static string kSep = "-";
const Factor* wordFactor = word.GetFactor(0);
if (words.second.find(wordFactor) == words.second.end()) return;
ostringstream buf;
buf << type << kSep << position << kSep << words.first << kSep << wordFactor->GetString() << kSep << reoType;
scores->PlusEquals(m_producer, buf.str(), 1.0);
}
void SparseReordering::CopyScores(
const TranslationOption& topt,
LexicalReorderingState::ReorderingType reoType,
@ -57,6 +79,29 @@ void SparseReordering::CopyScores(
ScoreComponentCollection* scores) const
{
//std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
const string kPhrase = "phr"; //phrase (backward)
const string kStack = "stk"; //stack (forward)
const string* type = &kPhrase;
//TODO: bidirectional?
if (direction == LexicalReorderingConfiguration::Forward) {
if (!m_useStack) return;
type = &kStack;
} else if (direction == LexicalReorderingConfiguration::Backward && !m_usePhrase) {
return;
}
for (vector<WordList>::const_iterator i = m_sourceWordLists.begin(); i != m_sourceWordLists.end(); ++i) {
const Phrase& sourcePhrase = topt.GetInputPath().GetPhrase();
AddFeatures(*type, sourcePhrase.GetWord(0), "src.first", *i, reoType, scores);
AddFeatures(*type, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), "src.last", *i, reoType, scores);
}
for (vector<WordList>::const_iterator i = m_targetWordLists.begin(); i != m_targetWordLists.end(); ++i) {
const Phrase& targetPhrase = topt.GetTargetPhrase();
AddFeatures(*type, targetPhrase.GetWord(0), "tgt.first", *i, reoType, scores);
AddFeatures(*type, targetPhrase.GetWord(targetPhrase.GetSize()-1), "tgt.last", *i, reoType, scores);
}
}
} //namespace

View File

@ -10,6 +10,8 @@
#include <string>
#include <vector>
#include <boost/unordered_set.hpp>
#include "moses/ScoreComponentCollection.h"
#include "LexicalReorderingState.h"
@ -32,7 +34,7 @@ namespace Moses
class SparseReordering
{
public:
SparseReordering(const std::map<std::string,std::string>& config);
SparseReordering(const std::map<std::string,std::string>& config, const LexicalReordering* producer);
//If direction is backward topt is the current option, otherwise the previous
void CopyScores(const TranslationOption& topt,
@ -41,7 +43,8 @@ public:
ScoreComponentCollection* scores) const ;
private:
typedef std::pair<std::string, std::set<std::string> > WordList; //id and list
const LexicalReordering* m_producer;
typedef std::pair<std::string, boost::unordered_set<const Factor*> > WordList; //id and list
std::vector<WordList> m_sourceWordLists;
std::vector<WordList> m_targetWordLists;
bool m_usePhrase;
@ -49,6 +52,10 @@ private:
bool m_useStack;
void ReadWordList(const std::string& filename, const std::string& id, std::vector<WordList>* pWordLists);
void AddFeatures(
const std::string& type, const Word& word, const std::string& position, const WordList& words,
LexicalReorderingState::ReorderingType reoType,
ScoreComponentCollection* scores) const;
};