mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-20 07:42:21 +03:00
basic implementation of non-hierarchical sparse features
This commit is contained in:
parent
3dec0abf0a
commit
a5e5a6590b
@ -52,7 +52,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
|
||||
throw "Unknown conditioning option!";
|
||||
}
|
||||
|
||||
m_configuration->ConfigureSparse(sparseArgs);
|
||||
m_configuration->ConfigureSparse(sparseArgs, this);
|
||||
}
|
||||
|
||||
LexicalReordering::~LexicalReordering()
|
||||
|
@ -38,10 +38,11 @@ size_t LexicalReorderingConfiguration::GetNumScoreComponents() const
|
||||
}
|
||||
}
|
||||
|
||||
void LexicalReorderingConfiguration::ConfigureSparse(const std::map<std::string,std::string>& sparseArgs)
|
||||
void LexicalReorderingConfiguration::ConfigureSparse
|
||||
(const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer)
|
||||
{
|
||||
if (sparseArgs.size()) {
|
||||
m_sparse.reset(new SparseReordering(sparseArgs));
|
||||
m_sparse.reset(new SparseReordering(sparseArgs, producer));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,7 @@ public:
|
||||
|
||||
LexicalReorderingConfiguration(const std::string &modelType);
|
||||
|
||||
void ConfigureSparse(const std::map<std::string,std::string>& sparseArgs);
|
||||
void ConfigureSparse(const std::map<std::string,std::string>& sparseArgs, const LexicalReordering* producer);
|
||||
|
||||
LexicalReorderingState *CreateLexicalReorderingState(const InputType &input) const;
|
||||
|
||||
|
@ -1,8 +1,11 @@
|
||||
#include <fstream>
|
||||
|
||||
#include "moses/FactorCollection.h"
|
||||
#include "moses/InputPath.h"
|
||||
#include "moses/Util.h"
|
||||
#include "util/exception.hh"
|
||||
|
||||
#include "LexicalReordering.h"
|
||||
#include "SparseReordering.h"
|
||||
|
||||
|
||||
@ -11,7 +14,8 @@ using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
SparseReordering::SparseReordering(const map<string,string>& config)
|
||||
SparseReordering::SparseReordering(const map<string,string>& config, const LexicalReordering* producer)
|
||||
: m_producer(producer)
|
||||
{
|
||||
static const string kSource= "source";
|
||||
static const string kTarget = "target";
|
||||
@ -42,14 +46,32 @@ SparseReordering::SparseReordering(const map<string,string>& config)
|
||||
|
||||
void SparseReordering::ReadWordList(const string& filename, const string& id, vector<WordList>* pWordLists) {
|
||||
ifstream fh(filename.c_str());
|
||||
UTIL_THROW_IF(!fh, util::Exception, "Unable to open: " << filename);
|
||||
string line;
|
||||
pWordLists->push_back(WordList());
|
||||
pWordLists->back().first = id;
|
||||
while (getline(fh,line)) {
|
||||
pWordLists->back().second.insert(line);
|
||||
//TODO: StringPiece
|
||||
const Factor* factor = FactorCollection::Instance().AddFactor(line);
|
||||
pWordLists->back().second.insert(factor);
|
||||
}
|
||||
}
|
||||
|
||||
void SparseReordering::AddFeatures(
|
||||
const string& type, const Word& word, const string& position, const WordList& words,
|
||||
LexicalReorderingState::ReorderingType reoType,
|
||||
ScoreComponentCollection* scores) const {
|
||||
|
||||
//TODO: Precalculate all feature names
|
||||
static string kSep = "-";
|
||||
const Factor* wordFactor = word.GetFactor(0);
|
||||
if (words.second.find(wordFactor) == words.second.end()) return;
|
||||
ostringstream buf;
|
||||
buf << type << kSep << position << kSep << words.first << kSep << wordFactor->GetString() << kSep << reoType;
|
||||
scores->PlusEquals(m_producer, buf.str(), 1.0);
|
||||
|
||||
}
|
||||
|
||||
void SparseReordering::CopyScores(
|
||||
const TranslationOption& topt,
|
||||
LexicalReorderingState::ReorderingType reoType,
|
||||
@ -57,6 +79,29 @@ void SparseReordering::CopyScores(
|
||||
ScoreComponentCollection* scores) const
|
||||
{
|
||||
//std::cerr << "SR " << topt << " " << reoType << " " << direction << std::endl;
|
||||
const string kPhrase = "phr"; //phrase (backward)
|
||||
const string kStack = "stk"; //stack (forward)
|
||||
|
||||
const string* type = &kPhrase;
|
||||
//TODO: bidirectional?
|
||||
if (direction == LexicalReorderingConfiguration::Forward) {
|
||||
if (!m_useStack) return;
|
||||
type = &kStack;
|
||||
} else if (direction == LexicalReorderingConfiguration::Backward && !m_usePhrase) {
|
||||
return;
|
||||
}
|
||||
for (vector<WordList>::const_iterator i = m_sourceWordLists.begin(); i != m_sourceWordLists.end(); ++i) {
|
||||
const Phrase& sourcePhrase = topt.GetInputPath().GetPhrase();
|
||||
AddFeatures(*type, sourcePhrase.GetWord(0), "src.first", *i, reoType, scores);
|
||||
AddFeatures(*type, sourcePhrase.GetWord(sourcePhrase.GetSize()-1), "src.last", *i, reoType, scores);
|
||||
}
|
||||
for (vector<WordList>::const_iterator i = m_targetWordLists.begin(); i != m_targetWordLists.end(); ++i) {
|
||||
const Phrase& targetPhrase = topt.GetTargetPhrase();
|
||||
AddFeatures(*type, targetPhrase.GetWord(0), "tgt.first", *i, reoType, scores);
|
||||
AddFeatures(*type, targetPhrase.GetWord(targetPhrase.GetSize()-1), "tgt.last", *i, reoType, scores);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
} //namespace
|
||||
|
@ -10,6 +10,8 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/unordered_set.hpp>
|
||||
|
||||
#include "moses/ScoreComponentCollection.h"
|
||||
#include "LexicalReorderingState.h"
|
||||
|
||||
@ -32,7 +34,7 @@ namespace Moses
|
||||
class SparseReordering
|
||||
{
|
||||
public:
|
||||
SparseReordering(const std::map<std::string,std::string>& config);
|
||||
SparseReordering(const std::map<std::string,std::string>& config, const LexicalReordering* producer);
|
||||
|
||||
//If direction is backward topt is the current option, otherwise the previous
|
||||
void CopyScores(const TranslationOption& topt,
|
||||
@ -41,7 +43,8 @@ public:
|
||||
ScoreComponentCollection* scores) const ;
|
||||
|
||||
private:
|
||||
typedef std::pair<std::string, std::set<std::string> > WordList; //id and list
|
||||
const LexicalReordering* m_producer;
|
||||
typedef std::pair<std::string, boost::unordered_set<const Factor*> > WordList; //id and list
|
||||
std::vector<WordList> m_sourceWordLists;
|
||||
std::vector<WordList> m_targetWordLists;
|
||||
bool m_usePhrase;
|
||||
@ -49,6 +52,10 @@ private:
|
||||
bool m_useStack;
|
||||
|
||||
void ReadWordList(const std::string& filename, const std::string& id, std::vector<WordList>* pWordLists);
|
||||
void AddFeatures(
|
||||
const std::string& type, const Word& word, const std::string& position, const WordList& words,
|
||||
LexicalReorderingState::ReorderingType reoType,
|
||||
ScoreComponentCollection* scores) const;
|
||||
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user