2011-08-13 05:39:35 +04:00
|
|
|
#include <sstream>
|
|
|
|
#include "SourceWordDeletionFeature.h"
|
2013-05-24 21:02:49 +04:00
|
|
|
#include "moses/Phrase.h"
|
|
|
|
#include "moses/TargetPhrase.h"
|
|
|
|
#include "moses/Hypothesis.h"
|
|
|
|
#include "moses/ChartHypothesis.h"
|
|
|
|
#include "moses/ScoreComponentCollection.h"
|
|
|
|
#include "moses/TranslationOption.h"
|
|
|
|
#include "moses/Util.h"
|
|
|
|
|
2013-04-25 22:42:30 +04:00
|
|
|
#include "util/string_piece_hash.hh"
|
2013-06-10 21:11:55 +04:00
|
|
|
#include "util/exception.hh"
|
2011-08-13 05:39:35 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
2011-08-13 05:39:35 +04:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
2013-01-02 15:31:59 +04:00
|
|
|
SourceWordDeletionFeature::SourceWordDeletionFeature(const std::string &line)
|
2013-10-29 22:44:33 +04:00
|
|
|
:StatelessFeatureFunction(0, line),
|
2013-05-29 21:16:15 +04:00
|
|
|
m_unrestricted(true)
|
2013-01-02 15:31:59 +04:00
|
|
|
{
|
|
|
|
std::cerr << "Initializing source word deletion feature.." << std::endl;
|
2013-06-20 16:06:03 +04:00
|
|
|
ReadParameters();
|
2013-06-10 21:11:55 +04:00
|
|
|
}
|
|
|
|
|
2013-06-20 16:25:02 +04:00
|
|
|
void SourceWordDeletionFeature::SetParameter(const std::string& key, const std::string& value)
|
2013-06-10 21:11:55 +04:00
|
|
|
{
|
|
|
|
if (key == "factor") {
|
|
|
|
m_factorType = Scan<FactorType>(value);
|
|
|
|
} else if (key == "path") {
|
|
|
|
m_filename = value;
|
|
|
|
} else {
|
2013-06-20 16:25:02 +04:00
|
|
|
StatelessFeatureFunction::SetParameter(key, value);
|
2013-01-02 15:31:59 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-06-10 21:11:55 +04:00
|
|
|
void SourceWordDeletionFeature::Load()
|
2011-08-13 05:39:35 +04:00
|
|
|
{
|
2013-06-10 21:11:55 +04:00
|
|
|
if (m_filename == "") {
|
|
|
|
return;
|
2011-08-13 05:39:35 +04:00
|
|
|
}
|
|
|
|
|
2013-06-10 21:11:55 +04:00
|
|
|
cerr << "loading source word deletion word list from " << m_filename << endl;
|
|
|
|
|
|
|
|
ifstream inFile(m_filename.c_str());
|
2013-11-23 00:27:46 +04:00
|
|
|
UTIL_THROW_IF2(!inFile, "Can't open file " << m_filename);
|
2013-06-10 21:11:55 +04:00
|
|
|
|
2011-08-13 05:39:35 +04:00
|
|
|
std::string line;
|
|
|
|
while (getline(inFile, line)) {
|
|
|
|
m_vocab.insert(line);
|
|
|
|
}
|
|
|
|
|
|
|
|
inFile.close();
|
|
|
|
|
|
|
|
m_unrestricted = false;
|
|
|
|
}
|
|
|
|
|
2013-06-14 22:49:51 +04:00
|
|
|
bool SourceWordDeletionFeature::IsUseable(const FactorMask &mask) const
|
|
|
|
{
|
|
|
|
bool ret = mask[m_factorType];
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-07-10 01:35:59 +04:00
|
|
|
void SourceWordDeletionFeature::EvaluateInIsolation(const Phrase &source
|
2013-05-29 21:16:15 +04:00
|
|
|
, const TargetPhrase &targetPhrase
|
|
|
|
, ScoreComponentCollection &scoreBreakdown
|
|
|
|
, ScoreComponentCollection &estimatedFutureScore) const
|
2013-05-02 15:15:26 +04:00
|
|
|
{
|
2013-05-24 14:33:24 +04:00
|
|
|
const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignTerm();
|
2013-05-28 13:51:28 +04:00
|
|
|
ComputeFeatures(source, targetPhrase, &scoreBreakdown, alignmentInfo);
|
2013-05-02 15:15:26 +04:00
|
|
|
}
|
|
|
|
|
2013-05-28 13:51:28 +04:00
|
|
|
void SourceWordDeletionFeature::ComputeFeatures(const Phrase &source,
|
2013-05-29 21:16:15 +04:00
|
|
|
const TargetPhrase& targetPhrase,
|
|
|
|
ScoreComponentCollection* accumulator,
|
|
|
|
const AlignmentInfo &alignmentInfo) const
|
2011-08-13 05:39:35 +04:00
|
|
|
{
|
|
|
|
// handle special case: unknown words (they have no word alignment)
|
2013-05-29 21:16:15 +04:00
|
|
|
size_t targetLength = targetPhrase.GetSize();
|
|
|
|
size_t sourceLength = source.GetSize();
|
|
|
|
if (targetLength == 1 && sourceLength == 1 && !alignmentInfo.GetSize()) return;
|
2011-08-13 05:39:35 +04:00
|
|
|
|
|
|
|
// flag aligned words
|
2014-10-15 17:20:40 +04:00
|
|
|
std::vector<bool> aligned(sourceLength, false);
|
2012-10-19 19:00:42 +04:00
|
|
|
for (AlignmentInfo::const_iterator alignmentPoint = alignmentInfo.begin(); alignmentPoint != alignmentInfo.end(); alignmentPoint++)
|
2011-08-13 05:39:35 +04:00
|
|
|
aligned[ alignmentPoint->first ] = true;
|
2013-05-29 21:16:15 +04:00
|
|
|
|
2011-08-13 05:39:35 +04:00
|
|
|
// process unaligned source words
|
|
|
|
for(size_t i=0; i<sourceLength; i++) {
|
|
|
|
if (!aligned[i]) {
|
2013-05-29 21:16:15 +04:00
|
|
|
const Word &w = source.GetWord(i);
|
|
|
|
if (!w.IsNonTerminal()) {
|
|
|
|
const StringPiece word = w.GetFactor(m_factorType)->GetString();
|
|
|
|
if (word != "<s>" && word != "</s>") {
|
|
|
|
if (!m_unrestricted && FindStringPiece(m_vocab, word ) == m_vocab.end()) {
|
|
|
|
accumulator->PlusEquals(this, StringPiece("OTHER"),1);
|
|
|
|
} else {
|
|
|
|
accumulator->PlusEquals(this,word,1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2011-08-13 05:39:35 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|