mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-08 20:46:59 +03:00
240b88c683
strong them locally where appropriate, so that compontents can become independent of StaticData once instantiated.
230 lines
7.4 KiB
C++
230 lines
7.4 KiB
C++
#include <sstream>
|
|
#include "Manager.h"
|
|
#include "PVertex.h"
|
|
#include "moses/OutputCollector.h"
|
|
#include "moses/Util.h"
|
|
|
|
namespace Moses
|
|
{
|
|
namespace Syntax
|
|
{
|
|
|
|
Manager::Manager(ttasksptr const& ttask)
|
|
: Moses::BaseManager(ttask)
|
|
{ }
|
|
|
|
void Manager::OutputBest(OutputCollector *collector) const
|
|
{
|
|
if (!collector) {
|
|
return;
|
|
}
|
|
std::ostringstream out;
|
|
FixPrecision(out);
|
|
const SHyperedge *best = GetBestSHyperedge();
|
|
if (best == NULL) {
|
|
VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
|
|
if (options()->output.ReportHypoScore) {
|
|
out << "0 ";
|
|
}
|
|
out << '\n';
|
|
} else {
|
|
if (options()->output.ReportHypoScore) {
|
|
out << best->label.futureScore << " ";
|
|
}
|
|
Phrase yield = GetOneBestTargetYield(*best);
|
|
// delete 1st & last
|
|
UTIL_THROW_IF2(yield.GetSize() < 2,
|
|
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
|
|
yield.RemoveWord(0);
|
|
yield.RemoveWord(yield.GetSize()-1);
|
|
out << yield.GetStringRep(options()->output.factor_order);
|
|
out << '\n';
|
|
}
|
|
collector->Write(m_source.GetTranslationId(), out.str());
|
|
}
|
|
|
|
void Manager::OutputNBest(OutputCollector *collector) const
|
|
{
|
|
if (collector) {
|
|
long translationId = m_source.GetTranslationId();
|
|
KBestExtractor::KBestVec nBestList;
|
|
ExtractKBest(options()->nbest.nbest_size, nBestList,
|
|
options()->nbest.only_distinct);
|
|
OutputNBestList(collector, nBestList, translationId);
|
|
}
|
|
}
|
|
|
|
void Manager::OutputUnknowns(OutputCollector *collector) const
|
|
{
|
|
if (collector) {
|
|
long translationId = m_source.GetTranslationId();
|
|
|
|
std::ostringstream out;
|
|
for (boost::unordered_set<Moses::Word>::const_iterator p = m_oovs.begin();
|
|
p != m_oovs.end(); ++p) {
|
|
out << *p;
|
|
}
|
|
out << std::endl;
|
|
collector->Write(translationId, out.str());
|
|
}
|
|
}
|
|
|
|
void Manager::OutputNBestList(OutputCollector *collector,
|
|
const KBestExtractor::KBestVec &nBestList,
|
|
long translationId) const
|
|
{
|
|
const std::vector<FactorType> &outputFactorOrder = options()->output.factor_order;
|
|
|
|
std::ostringstream out;
|
|
|
|
if (collector->OutputIsCout()) {
|
|
// Set precision only if we're writing the n-best list to cout. This is to
|
|
// preserve existing behaviour, but should probably be done either way.
|
|
FixPrecision(out);
|
|
}
|
|
|
|
bool includeWordAlignment = options()->nbest.include_alignment_info;
|
|
bool PrintNBestTrees = options()->nbest.print_trees; // PrintNBestTrees();
|
|
|
|
for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
|
|
p != nBestList.end(); ++p) {
|
|
const KBestExtractor::Derivation &derivation = **p;
|
|
|
|
// get the derivation's target-side yield
|
|
Phrase outputPhrase = KBestExtractor::GetOutputPhrase(derivation);
|
|
|
|
// delete <s> and </s>
|
|
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
|
|
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
|
|
outputPhrase.RemoveWord(0);
|
|
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
|
|
|
|
// print the translation ID, surface factors, and scores
|
|
out << translationId << " ||| ";
|
|
OutputSurface(out, outputPhrase); // , outputFactorOrder, false);
|
|
out << " ||| ";
|
|
bool with_labels = options()->nbest.include_feature_labels;
|
|
derivation.scoreBreakdown.OutputAllFeatureScores(out, with_labels);
|
|
out << " ||| " << derivation.score;
|
|
|
|
// optionally, print word alignments
|
|
if (includeWordAlignment) {
|
|
out << " ||| ";
|
|
Alignments align;
|
|
OutputAlignmentNBest(align, derivation, 0);
|
|
for (Alignments::const_iterator q = align.begin(); q != align.end();
|
|
++q) {
|
|
out << q->first << "-" << q->second << " ";
|
|
}
|
|
}
|
|
|
|
// optionally, print tree
|
|
if (PrintNBestTrees) {
|
|
TreePointer tree = KBestExtractor::GetOutputTree(derivation);
|
|
out << " ||| " << tree->GetString();
|
|
}
|
|
|
|
out << std::endl;
|
|
}
|
|
|
|
assert(collector);
|
|
collector->Write(translationId, out.str());
|
|
}
|
|
|
|
std::size_t Manager::OutputAlignmentNBest(
|
|
Alignments &retAlign,
|
|
const KBestExtractor::Derivation &derivation,
|
|
std::size_t startTarget) const
|
|
{
|
|
const SHyperedge ­peredge = derivation.edge->shyperedge;
|
|
|
|
std::size_t totalTargetSize = 0;
|
|
std::size_t startSource = shyperedge.head->pvertex->span.GetStartPos();
|
|
|
|
const TargetPhrase &tp = *(shyperedge.label.translation);
|
|
|
|
std::size_t thisSourceSize = CalcSourceSize(derivation);
|
|
|
|
// position of each terminal word in translation rule, irrespective of
|
|
// alignment if non-term, number is undefined
|
|
std::vector<std::size_t> sourceOffsets(thisSourceSize, 0);
|
|
std::vector<std::size_t> targetOffsets(tp.GetSize(), 0);
|
|
|
|
const AlignmentInfo &aiNonTerm =
|
|
shyperedge.label.translation->GetAlignNonTerm();
|
|
std::vector<std::size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
|
|
const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd =
|
|
aiNonTerm.GetNonTermIndexMap();
|
|
|
|
UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
|
|
"Error");
|
|
|
|
std::size_t targetInd = 0;
|
|
for (std::size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
|
|
if (tp.GetWord(targetPos).IsNonTerminal()) {
|
|
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
|
|
std::size_t sourceInd = targetPos2SourceInd[targetPos];
|
|
std::size_t sourcePos = sourceInd2pos[sourceInd];
|
|
|
|
const KBestExtractor::Derivation &subderivation =
|
|
*derivation.subderivations[sourceInd];
|
|
|
|
// calc source size
|
|
std::size_t sourceSize =
|
|
subderivation.edge->head->svertex.pvertex->span.GetNumWordsCovered();
|
|
sourceOffsets[sourcePos] = sourceSize;
|
|
|
|
// calc target size.
|
|
// Recursively look thru child hypos
|
|
std::size_t currStartTarget = startTarget + totalTargetSize;
|
|
std::size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
|
|
currStartTarget);
|
|
targetOffsets[targetPos] = targetSize;
|
|
|
|
totalTargetSize += targetSize;
|
|
++targetInd;
|
|
} else {
|
|
++totalTargetSize;
|
|
}
|
|
}
|
|
|
|
// convert position within translation rule to absolute position within
|
|
// source sentence / output sentence
|
|
ShiftOffsets(sourceOffsets, startSource);
|
|
ShiftOffsets(targetOffsets, startTarget);
|
|
|
|
// get alignments from this hypo
|
|
const AlignmentInfo &aiTerm = shyperedge.label.translation->GetAlignTerm();
|
|
|
|
// add to output arg, offsetting by source & target
|
|
AlignmentInfo::const_iterator iter;
|
|
for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
|
|
const std::pair<std::size_t, std::size_t> &align = *iter;
|
|
std::size_t relSource = align.first;
|
|
std::size_t relTarget = align.second;
|
|
std::size_t absSource = sourceOffsets[relSource];
|
|
std::size_t absTarget = targetOffsets[relTarget];
|
|
|
|
std::pair<std::size_t, std::size_t> alignPoint(absSource, absTarget);
|
|
std::pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
|
|
UTIL_THROW_IF2(!ret.second, "Error");
|
|
}
|
|
|
|
return totalTargetSize;
|
|
}
|
|
|
|
std::size_t Manager::CalcSourceSize(const KBestExtractor::Derivation &d) const
|
|
{
|
|
const SHyperedge ­peredge = d.edge->shyperedge;
|
|
std::size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered();
|
|
for (std::size_t i = 0; i < shyperedge.tail.size(); ++i) {
|
|
std::size_t childSize =
|
|
shyperedge.tail[i]->pvertex->span.GetNumWordsCovered();
|
|
ret -= (childSize - 1);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
} // Syntax
|
|
} // Moses
|