added phil's functions

This commit is contained in:
Hieu Hoang 2014-11-07 19:51:18 +00:00
parent e1092c0dad
commit 18d2d56cc7
4 changed files with 236 additions and 1 deletions

View File

@ -36,6 +36,12 @@ POSSIBILITY OF SUCH DAMAGE.
#include <stack>
#include <boost/algorithm/string.hpp>
#include "moses/Syntax/KBestExtractor.h"
#include "moses/Syntax/SHyperedge.h"
#include "moses/Syntax/S2T/DerivationWriter.h"
#include "moses/Syntax/PVertex.h"
#include "moses/Syntax/SVertex.h"
#include "moses/TypeDef.h"
#include "moses/Util.h"
#include "moses/Hypothesis.h"
@ -52,6 +58,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "moses/Incremental.h"
#include "moses/ChartManager.h"
#include "util/exception.hh"
#include "IOWrapper.h"
@ -1403,6 +1410,216 @@ IOWrapper *IOWrapper::GetIOWrapper(const StaticData &staticData)
}
////////////////////////////
#include "moses/Syntax/PVertex.h"
#include "moses/Syntax/S2T/DerivationWriter.h"
void IOWrapper::OutputDetailedTranslationReport(const Syntax::SHyperedge *best,
long translationId)
{
if (best == NULL) {
return;
}
std::ostringstream out;
Syntax::S2T::DerivationWriter::Write(*best, translationId, out);
UTIL_THROW_IF2(m_detailedTranslationCollector == NULL,
"No ouput file for detailed reports specified");
m_detailedTranslationCollector->Write(translationId, out.str());
}
void IOWrapper::OutputBestHypo(const Syntax::SHyperedge *best,
long translationId)
{
if (!m_singleBestOutputCollector) {
return;
}
std::ostringstream out;
IOWrapper::FixPrecision(out);
if (best == NULL) {
VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
if (StaticData::Instance().GetOutputHypoScore()) {
out << "0 ";
}
} else {
if (StaticData::Instance().GetOutputHypoScore()) {
out << best->score << " ";
}
Phrase yield = Syntax::GetOneBestTargetYield(*best);
// delete 1st & last
UTIL_THROW_IF2(yield.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
yield.RemoveWord(0);
yield.RemoveWord(yield.GetSize()-1);
out << yield.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
out << '\n';
}
m_singleBestOutputCollector->Write(translationId, out.str());
}
void IOWrapper::OutputNBestList(
const Syntax::KBestExtractor::KBestVec &nBestList, long translationId)
{
std::ostringstream out;
if (m_nBestOutputCollector->OutputIsCout()) {
// Set precision only if we're writing the n-best list to cout. This is to
// preserve existing behaviour, but should probably be done either way.
IOWrapper::FixPrecision(out);
}
bool includeWordAlignment =
StaticData::Instance().PrintAlignmentInfoInNbest();
bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
for (Syntax::KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
p != nBestList.end(); ++p) {
const Syntax::KBestExtractor::Derivation &derivation = **p;
// get the derivation's target-side yield
Phrase outputPhrase = Syntax::KBestExtractor::GetOutputPhrase(derivation);
// delete <s> and </s>
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
// print the translation ID, surface factors, and scores
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
out << " ||| ";
OutputAllFeatureScores(derivation.scoreBreakdown, out);
out << " ||| " << derivation.score;
// optionally, print word alignments
if (includeWordAlignment) {
out << " ||| ";
Alignments align;
OutputAlignmentNBest(align, derivation, 0);
for (Alignments::const_iterator q = align.begin(); q != align.end();
++q) {
out << q->first << "-" << q->second << " ";
}
}
// optionally, print tree
if (PrintNBestTrees) {
TreePointer tree = Syntax::KBestExtractor::GetOutputTree(derivation);
out << " ||| " << tree->GetString();
}
out << std::endl;
}
assert(m_nBestOutputCollector);
m_nBestOutputCollector->Write(translationId, out.str());
}
size_t IOWrapper::CalcSourceSize(const Syntax::KBestExtractor::Derivation &d) const
{
using namespace Moses::Syntax;
const Syntax::SHyperedge &shyperedge = d.edge->shyperedge;
size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered();
for (size_t i = 0; i < shyperedge.tail.size(); ++i) {
size_t childSize = shyperedge.tail[i]->pvertex->span.GetNumWordsCovered();
ret -= (childSize - 1);
}
return ret;
}
size_t IOWrapper::OutputAlignmentNBest(
Alignments &retAlign,
const Syntax::KBestExtractor::Derivation &derivation,
size_t startTarget)
{
const Syntax::SHyperedge &shyperedge = derivation.edge->shyperedge;
size_t totalTargetSize = 0;
size_t startSource = shyperedge.head->pvertex->span.GetStartPos();
const TargetPhrase &tp = *(shyperedge.translation);
size_t thisSourceSize = CalcSourceSize(derivation);
// position of each terminal word in translation rule, irrespective of alignment
// if non-term, number is undefined
vector<size_t> sourceOffsets(thisSourceSize, 0);
vector<size_t> targetOffsets(tp.GetSize(), 0);
const AlignmentInfo &aiNonTerm = shyperedge.translation->GetAlignNonTerm();
vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
"Error");
size_t targetInd = 0;
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
if (tp.GetWord(targetPos).IsNonTerminal()) {
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
size_t sourceInd = targetPos2SourceInd[targetPos];
size_t sourcePos = sourceInd2pos[sourceInd];
const Moses::Syntax::KBestExtractor::Derivation &subderivation =
*derivation.subderivations[sourceInd];
// calc source size
size_t sourceSize =
subderivation.edge->head->svertex.pvertex->span.GetNumWordsCovered();
sourceOffsets[sourcePos] = sourceSize;
// calc target size.
// Recursively look thru child hypos
size_t currStartTarget = startTarget + totalTargetSize;
size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
currStartTarget);
targetOffsets[targetPos] = targetSize;
totalTargetSize += targetSize;
++targetInd;
} else {
++totalTargetSize;
}
}
// convert position within translation rule to absolute position within
// source sentence / output sentence
ShiftOffsets(sourceOffsets, startSource);
ShiftOffsets(targetOffsets, startTarget);
// get alignments from this hypo
const AlignmentInfo &aiTerm = shyperedge.translation->GetAlignTerm();
// add to output arg, offsetting by source & target
AlignmentInfo::const_iterator iter;
for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
const std::pair<size_t,size_t> &align = *iter;
size_t relSource = align.first;
size_t relTarget = align.second;
size_t absSource = sourceOffsets[relSource];
size_t absTarget = targetOffsets[relTarget];
pair<size_t, size_t> alignPoint(absSource, absTarget);
pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
UTIL_THROW_IF2(!ret.second, "Error");
}
return totalTargetSize;
}
void IOWrapper::OutputUnknowns(const std::set<Moses::Word> &unknowns,
long translationId)
{
std::ostringstream out;
for (std::set<Moses::Word>::const_iterator p = unknowns.begin();
p != unknowns.end(); ++p) {
out << *p;
}
out << std::endl;
m_unknownsCollector->Write(translationId, out.str());
}
} // namespace

View File

@ -51,6 +51,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "moses/WordLattice.h"
#include "moses/LatticeMBR.h"
#include "moses/ChartKBestExtractor.h"
#include "moses/Syntax/KBestExtractor.h"
#include "search/applied.hh"
@ -61,6 +62,11 @@ class Hypothesis;
class ChartHypothesis;
class Factor;
namespace Syntax
{
struct SHyperedge;
}
/** Helper class that holds misc variables to write data out to command line.
*/
class IOWrapper
@ -131,8 +137,10 @@ protected:
size_t OutputAlignmentNBest(Alignments &retAlign,
const Moses::ChartKBestExtractor::Derivation &derivation,
size_t startTarget);
std::size_t OutputAlignmentNBest(Alignments &retAlign, const Moses::Syntax::KBestExtractor::Derivation &derivation, std::size_t startTarget);
size_t CalcSourceSize(const Moses::ChartHypothesis *hypo);
size_t CalcSourceSize(const Syntax::KBestExtractor::Derivation &d) const;
template <class T>
void ShiftOffsets(std::vector<T> &offsets, T shift)
@ -202,15 +210,23 @@ public:
// CHART
void OutputBestHypo(const Moses::ChartHypothesis *hypo, long translationId);
void OutputBestHypo(search::Applied applied, long translationId);
void OutputBestHypo(const Moses::Syntax::SHyperedge *, long translationId);
void OutputBestNone(long translationId);
void OutputNBestList(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, long translationId);
void OutputNBestList(const std::vector<search::Applied> &nbest, long translationId);
void OutputNBestList(const Moses::Syntax::KBestExtractor::KBestVec &nBestList, long translationId);
void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
void OutputDetailedTranslationReport(const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
void OutputDetailedTranslationReport(const Moses::Syntax::SHyperedge *, long translationId);
void OutputDetailedAllTranslationReport(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, const Moses::ChartManager &manager, const Moses::Sentence &sentence, long translationId);
void OutputAlignment(size_t translationId , const Moses::ChartHypothesis *hypo);
void OutputUnknowns(const std::vector<Moses::Phrase*> &, long);
void OutputUnknowns(const std::set<Moses::Word> &, long);
void OutputDetailedTreeFragmentsTranslationReport(const Moses::ChartHypothesis *hypo,
const Moses::Sentence &sentence,

View File

@ -2,6 +2,7 @@
#include "moses/Factor.h"
#include "moses/Syntax/PVertex.h"
#include "moses/Syntax/SHyperedge.h"
namespace Moses
{

View File

@ -3,13 +3,14 @@
#include <ostream>
#include "moses/Syntax/KBestExtractor.h"
#include "moses/Syntax/SHyperedge.h"
#include "moses/Word.h"
namespace Moses
{
namespace Syntax
{
struct SHyperedge;
namespace S2T
{