mosesdecoder/moses/FF/TargetBigramFeature.cpp
2013-06-19 00:24:21 +01:00

138 lines
3.8 KiB
C++

#include "TargetBigramFeature.h"
#include "moses/Phrase.h"
#include "moses/TargetPhrase.h"
#include "moses/Hypothesis.h"
#include "moses/ScoreComponentCollection.h"
#include "util/string_piece_hash.hh"
#include "util/exception.hh"
using namespace std;
namespace Moses
{
int TargetBigramState::Compare(const FFState& other) const
{
const TargetBigramState& rhs = dynamic_cast<const TargetBigramState&>(other);
return Word::Compare(m_word,rhs.m_word);
}
TargetBigramFeature::TargetBigramFeature(const std::string &line)
:StatefulFeatureFunction("TargetBigramFeature", 0, line)
{
std::cerr << "Initializing target bigram feature.." << std::endl;
size_t ind = 0;
while (ind < m_args.size()) {
vector<string> &args = m_args[ind];
bool consumed = SetParameter(args[0], args[1]);
if (consumed) {
m_args.erase(m_args.begin() + ind);
} else {
++ind;
}
}
CHECK(m_args.size() == 0);
FactorCollection& factorCollection = FactorCollection::Instance();
const Factor* bosFactor =
factorCollection.AddFactor(Output,m_factorType,BOS_);
m_bos.SetFactor(m_factorType,bosFactor);
}
bool TargetBigramFeature::SetParameter(const std::string& key, const std::string& value)
{
if (key == "factor") {
m_factorType = Scan<FactorType>(value);
} else if (key == "path") {
m_filePath = value;
} else {
StatefulFeatureFunction::SetParameter(key, value);
}
return true;
}
void TargetBigramFeature::Load()
{
if (m_filePath == "*")
return ; //allow all
ifstream inFile(m_filePath.c_str());
UTIL_THROW_IF(!inFile, util::Exception, "Can't open file " << m_filePath);
std::string line;
m_vocab.insert(BOS_);
m_vocab.insert(BOS_);
while (getline(inFile, line)) {
m_vocab.insert(line);
}
inFile.close();
}
const FFState* TargetBigramFeature::EmptyHypothesisState(const InputType &/*input*/) const
{
return new TargetBigramState(m_bos);
}
FFState* TargetBigramFeature::Evaluate(const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
{
const TargetBigramState* tbState = dynamic_cast<const TargetBigramState*>(prev_state);
CHECK(tbState);
// current hypothesis target phrase
const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
if (targetPhrase.GetSize() == 0) {
return new TargetBigramState(*tbState);
}
// extract all bigrams w1 w2 from current hypothesis
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const Factor* f1 = NULL;
if (i == 0) {
f1 = tbState->GetWord().GetFactor(m_factorType);
} else {
f1 = targetPhrase.GetWord(i-1).GetFactor(m_factorType);
}
const Factor* f2 = targetPhrase.GetWord(i).GetFactor(m_factorType);
const StringPiece w1 = f1->GetString();
const StringPiece w2 = f2->GetString();
// skip bigrams if they don't belong to a given restricted vocabulary
if (m_vocab.size() &&
(FindStringPiece(m_vocab, w1) == m_vocab.end() || FindStringPiece(m_vocab, w2) == m_vocab.end())) {
continue;
}
string name(w1.data(), w1.size());
name += ":";
name.append(w2.data(), w2.size());
accumulator->PlusEquals(this,name,1);
}
if (cur_hypo.GetWordsBitmap().IsComplete()) {
const StringPiece w1 = targetPhrase.GetWord(targetPhrase.GetSize()-1).GetFactor(m_factorType)->GetString();
const string& w2 = EOS_;
if (m_vocab.empty() || (FindStringPiece(m_vocab, w1) != m_vocab.end())) {
string name(w1.data(), w1.size());
name += ":";
name += w2;
accumulator->PlusEquals(this,name,1);
}
return NULL;
}
return new TargetBigramState(targetPhrase.GetWord(targetPhrase.GetSize()-1));
}
bool TargetBigramFeature::IsUseable(const FactorMask &mask) const
{
bool ret = mask[m_factorType];
return ret;
}
}