mosesdecoder/moses2/TargetPhrase.h

165 lines
4.8 KiB
C
Raw Normal View History

2016-04-26 13:40:57 +03:00
/*
* TargetPhrase.h
*
* Created on: 26 Apr 2016
* Author: hieu
*/
#pragma once
2016-06-20 16:59:31 +03:00
#include <sstream>
2016-04-27 10:53:51 +03:00
#include "PhraseImplTemplate.h"
2016-04-26 16:47:31 +03:00
#include "System.h"
#include "Scores.h"
2016-08-26 13:50:12 +03:00
#include "AlignmentInfoCollection.h"
2016-08-26 19:19:39 +03:00
#include "TranslationModel/PhraseTable.h"
2016-04-26 13:40:57 +03:00
namespace Moses2
{
2016-08-26 13:50:12 +03:00
class AlignmentInfo;
2016-04-26 13:40:57 +03:00
2016-04-26 22:53:53 +03:00
template<typename WORD>
2016-04-27 10:53:51 +03:00
class TargetPhrase: public PhraseImplTemplate<WORD>
2016-04-26 13:40:57 +03:00
{
public:
2016-08-26 13:50:12 +03:00
typedef PhraseImplTemplate<WORD> Parent;
2016-04-26 13:40:57 +03:00
const PhraseTable &pt;
mutable void **ffData;
SCORE *scoreProperties;
2016-04-27 10:53:51 +03:00
TargetPhrase(MemPool &pool, const PhraseTable &pt, const System &system, size_t size)
2017-02-01 03:27:14 +03:00
: PhraseImplTemplate<WORD>(pool, size)
, pt(pt)
, scoreProperties(NULL)
, m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo()) {
2016-04-26 16:47:31 +03:00
m_scores = new (pool.Allocate<Scores>()) Scores(system, pool,
2017-02-01 03:27:14 +03:00
system.featureFunctions.GetNumScores());
2016-04-26 13:40:57 +03:00
}
2017-02-01 03:27:14 +03:00
Scores &GetScores() {
return *m_scores;
}
2016-04-26 16:47:31 +03:00
2017-02-01 03:27:14 +03:00
const Scores &GetScores() const {
return *m_scores;
}
2016-04-26 13:40:57 +03:00
2016-07-15 18:35:38 +03:00
virtual SCORE GetScoreForPruning() const = 0;
2017-02-01 03:27:14 +03:00
SCORE *GetScoresProperty(int propertyInd) const {
return scoreProperties ? scoreProperties + propertyInd : NULL;
}
2016-04-26 13:40:57 +03:00
2016-08-26 13:50:12 +03:00
const AlignmentInfo &GetAlignTerm() const {
return *m_alignTerm;
}
void SetAlignTerm(const AlignmentInfo &alignInfo) {
m_alignTerm = &alignInfo;
}
2016-08-26 13:59:46 +03:00
// ALNREP = alignment representation,
// see AlignmentInfo constructors for supported representations
template<typename ALNREP>
void
SetAlignTerm(const ALNREP &coll) {
m_alignTerm = AlignmentInfoCollection::Instance().Add(coll);
}
2017-02-01 03:27:14 +03:00
virtual void SetAlignmentInfo(const std::string &alignString) {
2016-08-26 14:13:35 +03:00
AlignmentInfo::CollType alignTerm;
std::vector<std::string> toks = Tokenize(alignString);
for (size_t i = 0; i < toks.size(); ++i) {
std::vector<size_t> alignPair = Tokenize<size_t>(toks[i], "-");
UTIL_THROW_IF2(alignPair.size() != 2, "Wrong alignment format");
size_t sourcePos = alignPair[0];
size_t targetPos = alignPair[1];
alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
}
SetAlignTerm(alignTerm);
// cerr << "TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) this:|" << *this << "|\n";
//cerr << "alignTerm=" << alignTerm.size() << endl;
//cerr << "alignNonTerm=" << alignNonTerm.size() << endl;
}
2017-02-01 03:27:14 +03:00
void OutputToStream(const System &system, const Phrase<WORD> &inputPhrase, std::ostream &out) const {
// get placeholders
FactorType placeholderFactor = system.options.input.placeholder_factor;
std::map<size_t, const Factor*> placeholders;
if (placeholderFactor != NOT_FOUND) {
// creates map of target position -> factor for placeholders
placeholders = GetPlaceholders(system, inputPhrase);
}
size_t size = PhraseImplTemplate<WORD>::GetSize();
for (size_t i = 0; i < size; ++i) {
// output placeholder, if any
std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(i);
if (iter == placeholders.end()) {
const WORD &word = (*this)[i];
word.OutputToStream(system, out);
} else {
const Factor *factor = iter->second;
out << *factor;
}
out << " ";
}
2016-08-26 13:04:22 +03:00
}
2017-02-01 03:27:14 +03:00
std::map<size_t, const Factor*> GetPlaceholders(const System &system, const Phrase<WORD> &inputPhrase) const {
2016-08-26 19:19:39 +03:00
FactorType placeholderFactor = system.options.input.placeholder_factor;
2016-08-26 15:46:12 +03:00
std::map<size_t, const Factor*> ret;
2016-08-26 23:48:10 +03:00
//std::cerr << "inputPhrase=" << inputPhrase.Debug(system) << std::endl;
2016-08-26 15:46:12 +03:00
for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
const Factor *factor = inputPhrase[sourcePos][placeholderFactor];
if (factor) {
2017-02-01 03:27:14 +03:00
//std::cerr << "factor=" << *factor << std::endl;
//std::cerr << "tp=" << Debug(system) << std::endl;
2016-08-26 15:46:12 +03:00
std::set<size_t> targetPos = GetAlignTerm().GetAlignmentsForSource(sourcePos);
UTIL_THROW_IF2(targetPos.size() != 1,
2016-08-26 19:19:39 +03:00
"Placeholder should be aligned to 1, and only 1, word:" << targetPos.size() << "!=1");
2016-08-26 15:46:12 +03:00
ret[*targetPos.begin()] = factor;
}
}
return ret;
}
2017-02-01 03:27:14 +03:00
virtual std::string Debug(const System &system) const {
2016-08-26 19:19:39 +03:00
std::stringstream out;
out << Phrase<WORD>::Debug(system);
out << " pt=" << pt.GetName() << " ";
out << " SCORES:" << GetScores().Debug(system);
out << " ALIGN-T:";
out << GetAlignTerm().Debug(system);
return out.str();
}
2016-04-26 13:40:57 +03:00
protected:
Scores *m_scores;
2016-08-26 13:50:12 +03:00
const AlignmentInfo *m_alignTerm;
2016-04-26 13:40:57 +03:00
};
2016-04-26 16:47:31 +03:00
///////////////////////////////////////////////////////////////////////
2016-06-13 15:59:26 +03:00
template<typename TP>
2017-02-01 03:27:14 +03:00
struct CompareScoreForPruning {
bool operator()(const TP *a, const TP *b) const {
2016-07-15 18:35:38 +03:00
return a->GetScoreForPruning() > b->GetScoreForPruning();
2016-04-26 13:40:57 +03:00
}
2017-02-01 03:27:14 +03:00
bool operator()(const TP &a, const TP &b) const {
2016-07-15 18:35:38 +03:00
return a.GetScoreForPruning() > b.GetScoreForPruning();
2016-04-26 13:40:57 +03:00
}
};
} /* namespace Moses2a */