2016-04-26 13:40:57 +03:00
|
|
|
/*
|
|
|
|
* TargetPhrase.h
|
|
|
|
*
|
|
|
|
* Created on: 26 Apr 2016
|
|
|
|
* Author: hieu
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
2016-06-20 16:59:31 +03:00
|
|
|
#include <sstream>
|
2016-04-27 10:53:51 +03:00
|
|
|
#include "PhraseImplTemplate.h"
|
2016-04-26 16:47:31 +03:00
|
|
|
#include "System.h"
|
|
|
|
#include "Scores.h"
|
2016-08-26 13:50:12 +03:00
|
|
|
#include "AlignmentInfoCollection.h"
|
2016-08-26 19:19:39 +03:00
|
|
|
#include "TranslationModel/PhraseTable.h"
|
2016-04-26 13:40:57 +03:00
|
|
|
|
|
|
|
namespace Moses2
|
|
|
|
{
|
2016-08-26 13:50:12 +03:00
|
|
|
class AlignmentInfo;
|
2016-04-26 13:40:57 +03:00
|
|
|
|
2016-04-26 22:53:53 +03:00
|
|
|
template<typename WORD>
|
2016-04-27 10:53:51 +03:00
|
|
|
class TargetPhrase: public PhraseImplTemplate<WORD>
|
2016-04-26 13:40:57 +03:00
|
|
|
{
|
|
|
|
public:
|
2016-08-26 13:50:12 +03:00
|
|
|
typedef PhraseImplTemplate<WORD> Parent;
|
2016-04-26 13:40:57 +03:00
|
|
|
const PhraseTable &pt;
|
|
|
|
mutable void **ffData;
|
|
|
|
SCORE *scoreProperties;
|
|
|
|
|
2016-04-27 10:53:51 +03:00
|
|
|
TargetPhrase(MemPool &pool, const PhraseTable &pt, const System &system, size_t size)
|
|
|
|
: PhraseImplTemplate<WORD>(pool, size)
|
|
|
|
, pt(pt)
|
2016-04-26 16:47:31 +03:00
|
|
|
, scoreProperties(NULL)
|
2016-08-26 13:50:12 +03:00
|
|
|
, m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
|
2016-04-26 13:40:57 +03:00
|
|
|
{
|
2016-04-26 16:47:31 +03:00
|
|
|
m_scores = new (pool.Allocate<Scores>()) Scores(system, pool,
|
|
|
|
system.featureFunctions.GetNumScores());
|
2016-04-26 13:40:57 +03:00
|
|
|
}
|
|
|
|
|
2016-04-26 16:47:31 +03:00
|
|
|
Scores &GetScores()
|
|
|
|
{ return *m_scores; }
|
|
|
|
|
2016-04-26 13:40:57 +03:00
|
|
|
const Scores &GetScores() const
|
2016-04-26 16:47:31 +03:00
|
|
|
{ return *m_scores; }
|
2016-04-26 13:40:57 +03:00
|
|
|
|
2016-07-15 18:35:38 +03:00
|
|
|
virtual SCORE GetScoreForPruning() const = 0;
|
|
|
|
|
2016-04-26 16:47:31 +03:00
|
|
|
SCORE *GetScoresProperty(int propertyInd) const
|
|
|
|
{ return scoreProperties ? scoreProperties + propertyInd : NULL; }
|
2016-04-26 13:40:57 +03:00
|
|
|
|
2016-08-26 13:50:12 +03:00
|
|
|
const AlignmentInfo &GetAlignTerm() const {
|
|
|
|
return *m_alignTerm;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SetAlignTerm(const AlignmentInfo &alignInfo) {
|
|
|
|
m_alignTerm = &alignInfo;
|
|
|
|
}
|
|
|
|
|
2016-08-26 13:59:46 +03:00
|
|
|
// ALNREP = alignment representation,
|
|
|
|
// see AlignmentInfo constructors for supported representations
|
|
|
|
template<typename ALNREP>
|
|
|
|
void
|
|
|
|
SetAlignTerm(const ALNREP &coll) {
|
|
|
|
m_alignTerm = AlignmentInfoCollection::Instance().Add(coll);
|
|
|
|
}
|
|
|
|
|
2016-08-26 14:13:35 +03:00
|
|
|
virtual void SetAlignmentInfo(const std::string &alignString)
|
|
|
|
{
|
|
|
|
AlignmentInfo::CollType alignTerm;
|
|
|
|
|
|
|
|
std::vector<std::string> toks = Tokenize(alignString);
|
|
|
|
for (size_t i = 0; i < toks.size(); ++i) {
|
|
|
|
std::vector<size_t> alignPair = Tokenize<size_t>(toks[i], "-");
|
|
|
|
UTIL_THROW_IF2(alignPair.size() != 2, "Wrong alignment format");
|
|
|
|
|
|
|
|
size_t sourcePos = alignPair[0];
|
|
|
|
size_t targetPos = alignPair[1];
|
|
|
|
|
|
|
|
alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
|
|
|
|
}
|
|
|
|
|
|
|
|
SetAlignTerm(alignTerm);
|
|
|
|
// cerr << "TargetPhrase::SetAlignmentInfo(const StringPiece &alignString) this:|" << *this << "|\n";
|
|
|
|
|
|
|
|
//cerr << "alignTerm=" << alignTerm.size() << endl;
|
|
|
|
//cerr << "alignNonTerm=" << alignNonTerm.size() << endl;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2016-12-05 16:25:00 +03:00
|
|
|
void OutputToStream(const System &system, const Phrase<WORD> &inputPhrase, std::ostream &out) const
|
2016-08-26 13:04:22 +03:00
|
|
|
{
|
2016-08-26 19:19:39 +03:00
|
|
|
// get placeholders
|
|
|
|
FactorType placeholderFactor = system.options.input.placeholder_factor;
|
2016-08-26 15:46:12 +03:00
|
|
|
std::map<size_t, const Factor*> placeholders;
|
|
|
|
if (placeholderFactor != NOT_FOUND) {
|
|
|
|
// creates map of target position -> factor for placeholders
|
2016-08-26 19:19:39 +03:00
|
|
|
placeholders = GetPlaceholders(system, inputPhrase);
|
2016-08-26 15:46:12 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
size_t size = PhraseImplTemplate<WORD>::GetSize();
|
|
|
|
for (size_t i = 0; i < size; ++i) {
|
2016-08-26 19:19:39 +03:00
|
|
|
// output placeholder, if any
|
|
|
|
std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(i);
|
|
|
|
if (iter == placeholders.end()) {
|
|
|
|
const WORD &word = (*this)[i];
|
2016-08-29 20:47:41 +03:00
|
|
|
word.OutputToStream(system, out);
|
2016-08-26 19:19:39 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
const Factor *factor = iter->second;
|
|
|
|
out << *factor;
|
|
|
|
}
|
|
|
|
|
|
|
|
out << " ";
|
2016-08-26 13:04:22 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-26 19:19:39 +03:00
|
|
|
std::map<size_t, const Factor*> GetPlaceholders(const System &system, const Phrase<WORD> &inputPhrase) const
|
2016-08-26 15:46:12 +03:00
|
|
|
{
|
2016-08-26 19:19:39 +03:00
|
|
|
FactorType placeholderFactor = system.options.input.placeholder_factor;
|
2016-08-26 15:46:12 +03:00
|
|
|
std::map<size_t, const Factor*> ret;
|
2016-08-26 23:48:10 +03:00
|
|
|
//std::cerr << "inputPhrase=" << inputPhrase.Debug(system) << std::endl;
|
2016-08-26 15:46:12 +03:00
|
|
|
|
|
|
|
for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
|
|
|
|
const Factor *factor = inputPhrase[sourcePos][placeholderFactor];
|
|
|
|
if (factor) {
|
2016-08-26 23:48:10 +03:00
|
|
|
//std::cerr << "factor=" << *factor << std::endl;
|
|
|
|
//std::cerr << "tp=" << Debug(system) << std::endl;
|
2016-08-26 15:46:12 +03:00
|
|
|
std::set<size_t> targetPos = GetAlignTerm().GetAlignmentsForSource(sourcePos);
|
|
|
|
UTIL_THROW_IF2(targetPos.size() != 1,
|
2016-08-26 19:19:39 +03:00
|
|
|
"Placeholder should be aligned to 1, and only 1, word:" << targetPos.size() << "!=1");
|
2016-08-26 15:46:12 +03:00
|
|
|
ret[*targetPos.begin()] = factor;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-08-26 19:19:39 +03:00
|
|
|
virtual std::string Debug(const System &system) const
|
|
|
|
{
|
|
|
|
std::stringstream out;
|
|
|
|
out << Phrase<WORD>::Debug(system);
|
|
|
|
out << " pt=" << pt.GetName() << " ";
|
|
|
|
out << " SCORES:" << GetScores().Debug(system);
|
|
|
|
out << " ALIGN-T:";
|
|
|
|
out << GetAlignTerm().Debug(system);
|
|
|
|
|
|
|
|
return out.str();
|
|
|
|
}
|
|
|
|
|
2016-04-26 13:40:57 +03:00
|
|
|
protected:
|
|
|
|
Scores *m_scores;
|
2016-08-26 13:50:12 +03:00
|
|
|
const AlignmentInfo *m_alignTerm;
|
2016-04-26 13:40:57 +03:00
|
|
|
};
|
|
|
|
|
2016-04-26 16:47:31 +03:00
|
|
|
///////////////////////////////////////////////////////////////////////
|
2016-06-13 15:59:26 +03:00
|
|
|
template<typename TP>
|
2016-07-15 18:35:38 +03:00
|
|
|
struct CompareScoreForPruning
|
2016-04-26 13:40:57 +03:00
|
|
|
{
|
2016-06-13 15:59:26 +03:00
|
|
|
bool operator()(const TP *a, const TP *b) const
|
2016-04-26 13:40:57 +03:00
|
|
|
{
|
2016-07-15 18:35:38 +03:00
|
|
|
return a->GetScoreForPruning() > b->GetScoreForPruning();
|
2016-04-26 13:40:57 +03:00
|
|
|
}
|
|
|
|
|
2016-06-13 15:59:26 +03:00
|
|
|
bool operator()(const TP &a, const TP &b) const
|
2016-04-26 13:40:57 +03:00
|
|
|
{
|
2016-07-15 18:35:38 +03:00
|
|
|
return a.GetScoreForPruning() > b.GetScoreForPruning();
|
2016-04-26 13:40:57 +03:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
} /* namespace Moses2a */
|
|
|
|
|