2010-09-17 16:42:33 +04:00
|
|
|
#include "TargetBigramFeature.h"
|
2013-05-24 21:02:49 +04:00
|
|
|
#include "moses/Phrase.h"
|
|
|
|
#include "moses/TargetPhrase.h"
|
|
|
|
#include "moses/Hypothesis.h"
|
|
|
|
#include "moses/ScoreComponentCollection.h"
|
2013-04-25 22:42:30 +04:00
|
|
|
#include "util/string_piece_hash.hh"
|
2013-06-10 21:11:55 +04:00
|
|
|
#include "util/exception.hh"
|
2010-09-17 16:42:33 +04:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
2013-01-15 18:10:49 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
int TargetBigramState::Compare(const FFState& other) const
|
|
|
|
{
|
2010-10-15 01:52:35 +04:00
|
|
|
const TargetBigramState& rhs = dynamic_cast<const TargetBigramState&>(other);
|
|
|
|
return Word::Compare(m_word,rhs.m_word);
|
|
|
|
}
|
|
|
|
|
2013-01-15 18:10:49 +04:00
|
|
|
TargetBigramFeature::TargetBigramFeature(const std::string &line)
|
2013-10-29 22:59:53 +04:00
|
|
|
:StatefulFeatureFunction(0, line)
|
2013-01-15 18:10:49 +04:00
|
|
|
{
|
|
|
|
std::cerr << "Initializing target bigram feature.." << std::endl;
|
2013-06-20 16:06:03 +04:00
|
|
|
ReadParameters();
|
2013-01-15 18:10:49 +04:00
|
|
|
|
|
|
|
FactorCollection& factorCollection = FactorCollection::Instance();
|
|
|
|
const Factor* bosFactor =
|
2013-05-29 21:16:15 +04:00
|
|
|
factorCollection.AddFactor(Output,m_factorType,BOS_);
|
2013-01-15 18:10:49 +04:00
|
|
|
m_bos.SetFactor(m_factorType,bosFactor);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2013-06-20 16:25:02 +04:00
|
|
|
void TargetBigramFeature::SetParameter(const std::string& key, const std::string& value)
|
2010-09-17 17:55:00 +04:00
|
|
|
{
|
2013-06-10 21:11:55 +04:00
|
|
|
if (key == "factor") {
|
|
|
|
m_factorType = Scan<FactorType>(value);
|
|
|
|
} else if (key == "path") {
|
|
|
|
m_filePath = value;
|
|
|
|
} else {
|
2013-06-11 03:05:12 +04:00
|
|
|
StatefulFeatureFunction::SetParameter(key, value);
|
2010-10-15 01:52:35 +04:00
|
|
|
}
|
2013-06-10 21:11:55 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
void TargetBigramFeature::Load()
|
|
|
|
{
|
|
|
|
if (m_filePath == "*")
|
|
|
|
return ; //allow all
|
|
|
|
ifstream inFile(m_filePath.c_str());
|
2013-11-23 00:27:46 +04:00
|
|
|
UTIL_THROW_IF2(!inFile, "Can't open file " << m_filePath);
|
2013-06-10 21:11:55 +04:00
|
|
|
|
2010-09-17 17:55:00 +04:00
|
|
|
std::string line;
|
2010-10-15 01:52:35 +04:00
|
|
|
m_vocab.insert(BOS_);
|
2010-10-15 19:19:17 +04:00
|
|
|
m_vocab.insert(BOS_);
|
2010-09-17 17:55:00 +04:00
|
|
|
while (getline(inFile, line)) {
|
2010-10-15 01:52:35 +04:00
|
|
|
m_vocab.insert(line);
|
2010-09-17 17:55:00 +04:00
|
|
|
}
|
|
|
|
|
2010-10-15 01:52:35 +04:00
|
|
|
inFile.close();
|
2010-09-17 17:55:00 +04:00
|
|
|
}
|
|
|
|
|
2010-09-17 16:42:33 +04:00
|
|
|
|
2010-09-17 19:01:14 +04:00
|
|
|
const FFState* TargetBigramFeature::EmptyHypothesisState(const InputType &/*input*/) const
|
2010-09-17 16:42:33 +04:00
|
|
|
{
|
2010-10-15 01:52:35 +04:00
|
|
|
return new TargetBigramState(m_bos);
|
2010-09-17 16:42:33 +04:00
|
|
|
}
|
|
|
|
|
2010-09-17 16:50:29 +04:00
|
|
|
FFState* TargetBigramFeature::Evaluate(const Hypothesis& cur_hypo,
|
|
|
|
const FFState* prev_state,
|
|
|
|
ScoreComponentCollection* accumulator) const
|
2010-09-17 16:42:33 +04:00
|
|
|
{
|
2010-10-15 01:52:35 +04:00
|
|
|
const TargetBigramState* tbState = dynamic_cast<const TargetBigramState*>(prev_state);
|
2013-11-19 21:23:19 +04:00
|
|
|
assert(tbState);
|
2011-05-31 19:39:48 +04:00
|
|
|
|
|
|
|
// current hypothesis target phrase
|
2010-10-15 01:52:35 +04:00
|
|
|
const Phrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
|
|
|
|
if (targetPhrase.GetSize() == 0) {
|
|
|
|
return new TargetBigramState(*tbState);
|
|
|
|
}
|
2011-05-31 19:39:48 +04:00
|
|
|
|
|
|
|
// extract all bigrams w1 w2 from current hypothesis
|
2010-10-15 01:52:35 +04:00
|
|
|
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
|
|
|
|
const Factor* f1 = NULL;
|
|
|
|
if (i == 0) {
|
|
|
|
f1 = tbState->GetWord().GetFactor(m_factorType);
|
|
|
|
} else {
|
|
|
|
f1 = targetPhrase.GetWord(i-1).GetFactor(m_factorType);
|
|
|
|
}
|
|
|
|
const Factor* f2 = targetPhrase.GetWord(i).GetFactor(m_factorType);
|
2013-04-29 21:46:48 +04:00
|
|
|
const StringPiece w1 = f1->GetString();
|
|
|
|
const StringPiece w2 = f2->GetString();
|
2011-05-31 19:39:48 +04:00
|
|
|
|
|
|
|
// skip bigrams if they don't belong to a given restricted vocabulary
|
2013-05-29 21:16:15 +04:00
|
|
|
if (m_vocab.size() &&
|
2013-04-25 22:42:30 +04:00
|
|
|
(FindStringPiece(m_vocab, w1) == m_vocab.end() || FindStringPiece(m_vocab, w2) == m_vocab.end())) {
|
2010-10-15 01:52:35 +04:00
|
|
|
continue;
|
|
|
|
}
|
2011-05-31 19:39:48 +04:00
|
|
|
|
2013-04-25 22:42:30 +04:00
|
|
|
string name(w1.data(), w1.size());
|
2013-04-29 21:46:48 +04:00
|
|
|
name += ":";
|
2013-04-25 22:42:30 +04:00
|
|
|
name.append(w2.data(), w2.size());
|
2010-10-15 01:52:35 +04:00
|
|
|
accumulator->PlusEquals(this,name,1);
|
|
|
|
}
|
2011-05-31 19:39:48 +04:00
|
|
|
|
2010-10-15 19:19:17 +04:00
|
|
|
if (cur_hypo.GetWordsBitmap().IsComplete()) {
|
2013-04-29 21:46:48 +04:00
|
|
|
const StringPiece w1 = targetPhrase.GetWord(targetPhrase.GetSize()-1).GetFactor(m_factorType)->GetString();
|
2010-10-15 19:19:17 +04:00
|
|
|
const string& w2 = EOS_;
|
2013-04-25 22:42:30 +04:00
|
|
|
if (m_vocab.empty() || (FindStringPiece(m_vocab, w1) != m_vocab.end())) {
|
|
|
|
string name(w1.data(), w1.size());
|
2013-04-29 21:46:48 +04:00
|
|
|
name += ":";
|
2013-04-25 22:42:30 +04:00
|
|
|
name += w2;
|
2010-10-15 19:19:17 +04:00
|
|
|
accumulator->PlusEquals(this,name,1);
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
2010-10-15 01:52:35 +04:00
|
|
|
return new TargetBigramState(targetPhrase.GetWord(targetPhrase.GetSize()-1));
|
2010-10-15 19:19:17 +04:00
|
|
|
}
|
2013-05-02 15:15:26 +04:00
|
|
|
|
2013-05-30 15:41:08 +04:00
|
|
|
bool TargetBigramFeature::IsUseable(const FactorMask &mask) const
|
|
|
|
{
|
2013-05-30 15:51:40 +04:00
|
|
|
bool ret = mask[m_factorType];
|
|
|
|
return ret;
|
2013-05-30 15:41:08 +04:00
|
|
|
}
|
|
|
|
|
2010-09-17 16:42:33 +04:00
|
|
|
}
|
|
|
|
|