Code cleanup and bug fix in (Base)Manager::OutputSurface:

Mark-up of unkown words in output.
This commit is contained in:
Ulrich Germann 2015-12-02 20:43:56 +00:00
parent 33f4e93915
commit 5a63286447
10 changed files with 117 additions and 81 deletions

View File

@ -94,31 +94,38 @@ OutputSearchGraphAsHypergraph(std::string const& fname, size_t const precision)
/***
* print surface factor only for the given phrase
*/
void BaseManager::OutputSurface(std::ostream &out, const Phrase &phrase,
const std::vector<FactorType> &outputFactorOrder,
bool reportAllFactors) const
void
BaseManager::
OutputSurface(std::ostream &out, Phrase const& phrase) const
{
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Cannot be empty phrase");
if (reportAllFactors == true) {
out << phrase;
} else {
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
out << *factor;
UTIL_THROW_IF2(factor == NULL,
"Empty factor 0 at position " << pos);
std::vector<FactorType> const& factor_order = options().output.factor_order;
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor == NULL,
"Empty factor " << i << " at position " << pos);
bool markUnknown = options().unk.mark;
std::string const& fd = options().output.FactorDelimiter;
out << "|" << *factor;
}
out << " ";
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++) {
const Factor *factor = phrase.GetFactor(pos, factor_order[0]);
UTIL_THROW_IF2(factor == NULL, "Empty factor 0 at position " << pos);
const Word &word = phrase.GetWord(pos);
if(markUnknown && word.IsOOV()) {
out << options().unk.prefix;
}
out << *factor;
for (size_t i = 1 ; i < factor_order.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, factor_order[i]);
UTIL_THROW_IF2(!factor, "Empty factor " << i << " at position " << pos);
out << fd << *factor;
}
if(markUnknown && word.IsOOV()) {
out << options().unk.suffix;
}
out << " ";
}
}

View File

@ -25,10 +25,8 @@ protected:
typedef std::vector<std::pair<Moses::Word, Moses::Range> > ApplicationContext;
typedef std::set< std::pair<size_t, size_t> > Alignments;
void OutputSurface(std::ostream &out,
const Phrase &phrase,
const std::vector<FactorType> &outputFactorOrder,
bool reportAllFactors) const;
void OutputSurface(std::ostream &out, Phrase const& phrase) const;
void WriteApplicationContext(std::ostream &out,
const ApplicationContext &context) const;

View File

@ -317,15 +317,15 @@ void ChartManager::OutputBest(OutputCollector *collector) const
void ChartManager::OutputNBest(OutputCollector *collector) const
{
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.options().nbest.nbest_size;
// const StaticData &staticData = StaticData::Instance();
size_t nBestSize = options().nbest.nbest_size;
if (nBestSize > 0) {
const size_t translationId = m_source.GetTranslationId();
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO "
<< staticData.options().nbest.output_file_path << endl);
<< options().nbest.output_file_path << endl);
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
CalcNBest(nBestSize, nBestList,staticData.options().nbest.only_distinct);
CalcNBest(nBestSize, nBestList, options().nbest.only_distinct);
OutputNBestList(collector, nBestList, translationId);
IFVERBOSE(2) {
PrintUserTime("N-Best Hypotheses Generation Time:");
@ -338,8 +338,8 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
const ChartKBestExtractor::KBestVec &nBestList,
long translationId) const
{
const StaticData &staticData = StaticData::Instance();
const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
// const StaticData &staticData = StaticData::Instance();
// const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
std::ostringstream out;
@ -349,7 +349,7 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
FixPrecision(out);
}
NBestOptions const& nbo = StaticData::Instance().options().nbest;
NBestOptions const& nbo = options().nbest;
bool includeWordAlignment = nbo.include_alignment_info;
bool PrintNBestTrees = nbo.print_trees;
@ -368,7 +368,7 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
// print the translation ID, surface factors, and scores
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, outputFactorOrder, false);
OutputSurface(out, outputPhrase); // , outputFactorOrder, false);
out << " ||| ";
boost::shared_ptr<ScoreComponentCollection> scoreBreakdown = ChartKBestExtractor::GetOutputScoreBreakdown(derivation);
bool with_labels = options().nbest.include_feature_labels;

View File

@ -349,7 +349,7 @@ OutputNBestList(OutputCollector *collector,
outputPhrase.RemoveWord(0);
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, outputFactorOrder, false);
OutputSurface(out, outputPhrase); // , outputFactorOrder, false);
out << " ||| ";
bool with_labels = options().nbest.include_feature_labels;
features.OutputAllFeatureScores(out, with_labels);

View File

@ -131,7 +131,7 @@ void Manager::Decode()
TRACE_ERR("Line "<< m_source.GetTranslationId()
<< ": Collecting options took "
<< GetSentenceStats().GetTimeCollectOpts() << " seconds at "
<< __FILE__ << ":" << __LINE__ << endl);
<< __FILE__ << " Line " << __LINE__ << endl);
}
// search for best translation with the specified algorithm
@ -1666,7 +1666,7 @@ OutputNBest(std::ostream& out,
const std::vector<Moses::FactorType>& outputFactorOrder,
long translationId, char reportSegmentation) const
{
const StaticData &staticData = StaticData::Instance();
// const StaticData &staticData = StaticData::Instance();
NBestOptions const& nbo = options().nbest;
bool reportAllFactors = nbo.include_all_factors;
bool includeSegmentation = nbo.include_segmentation;
@ -1681,8 +1681,7 @@ OutputNBest(std::ostream& out,
out << translationId << " ||| ";
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
const Hypothesis &edge = *edges[currEdge];
OutputSurface(out, edge, outputFactorOrder, reportSegmentation,
reportAllFactors);
OutputSurface(out, edge); //, outputFactorOrder, reportSegmentation, reportAllFactors);
}
out << " |||";
@ -1743,57 +1742,59 @@ OutputNBest(std::ostream& out,
*/
void
Manager::
OutputSurface(std::ostream &out, const Hypothesis &edge,
const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const
OutputSurface(std::ostream &out, const Hypothesis &edge) const
{
std::vector<FactorType> outputFactorOrder = options().output.factor_order;
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
"Must specific at least 1 output factor");
const TargetPhrase& phrase = edge.GetCurrTargetPhrase();
bool markUnknown = options().unk.mark;
if (reportAllFactors == true) {
out << phrase;
} else {
FactorType placeholderFactor = options().input.placeholder_factor;
std::map<size_t, const Factor*> placeholders;
FactorType placeholderFactor = options().input.placeholder_factor;
std::map<size_t, const Factor*> placeholders;
if (placeholderFactor != NOT_FOUND) {
// creates map of target position -> factor for placeholders
placeholders = GetPlaceholders(edge, placeholderFactor);
}
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
bool markUnknown = options().unk.mark;
std::string const& fd = options().output.FactorDelimiter;
if (placeholders.size()) {
// do placeholders
std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
if (iter != placeholders.end()) {
factor = iter->second;
}
TargetPhrase const& phrase = edge.GetCurrTargetPhrase();
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
if (placeholders.size()) {
// do placeholders
std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
if (iter != placeholders.end()) {
factor = iter->second;
}
UTIL_THROW_IF2(factor == NULL, "No factor 0 at position " << pos);
//preface surface form with UNK if marking unknowns
const Word &word = phrase.GetWord(pos);
if(markUnknown && word.IsOOV()) {
out << options().unk.prefix << *factor << options().unk.suffix;
} else {
out << *factor;
}
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor==NULL,"No factor "<<i<<" at position "<< pos);
out << "|" << *factor;
}
out << " ";
}
UTIL_THROW_IF2(factor == NULL, "No factor 0 at position " << pos);
//preface surface form with UNK if marking unknowns
const Word &word = phrase.GetWord(pos);
if(markUnknown && word.IsOOV()) {
out << options().unk.prefix;
}
out << *factor;
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
UTIL_THROW_IF2(factor==NULL,"No factor "<<i<<" at position "<< pos);
out << fd << *factor;
}
if(markUnknown && word.IsOOV()) {
out << options().unk.suffix;
}
out << " ";
}
// trace ("report segmentation") option "-t" / "-tt"
int reportSegmentation = options().output.ReportSegmentation;
if (reportSegmentation > 0 && phrase.GetSize() > 0) {
const Range &sourceRange = edge.GetCurrSourceWordsRange();
const int sourceStart = sourceRange.GetStartPos();
@ -2080,7 +2081,8 @@ OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
const Hypothesis &edge = *edges[currEdge];
OutputSurface(out, edge, StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors);
OutputSurface(out, edge);
// , StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors);
}
out << endl;
}

View File

@ -137,8 +137,9 @@ public:
, const std::vector<Moses::FactorType>& outputFactorOrder
, long translationId
, char reportSegmentation) const;
void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
char reportSegmentation, bool reportAllFactors) const;
void OutputSurface(std::ostream &out, const Hypothesis &edge) const;
void OutputAlignment(std::ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset) const;
void OutputInput(std::ostream& os, const Hypothesis* hypo) const;
void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo) const;

View File

@ -107,7 +107,7 @@ void Manager::OutputNBestList(OutputCollector *collector,
// print the translation ID, surface factors, and scores
out << translationId << " ||| ";
OutputSurface(out, outputPhrase, outputFactorOrder, false);
OutputSurface(out, outputPhrase); // , outputFactorOrder, false);
out << " ||| ";
bool with_labels = options().nbest.include_feature_labels;
derivation.scoreBreakdown.OutputAllFeatureScores(out, with_labels);

View File

@ -67,6 +67,14 @@ namespace Moses {
if (params) factor_order = Scan<FactorType>(*params);
if (factor_order.empty()) factor_order.assign(1,0);
if (ReportAllFactors) {
for (size_t i = 1; i < MAX_NUM_FACTORS; ++i)
factor_order.push_back(i);
}
param.SetParameter(FactorDelimiter, "factor-delimiter", std::string("|"));
param.SetParameter(FactorDelimiter, "output-factor-delimiter", FactorDelimiter);
return true;
}
@ -76,6 +84,24 @@ namespace Moses {
update(std::map<std::string, xmlrpc_c::value>const& param)
{
ReportAllFactors = check(param, "report-all-factors", ReportAllFactors);
std::map<std::string, xmlrpc_c::value>::const_iterator m;
m = param.find("output-factors");
if (m != param.end())
factor_order = Tokenize<FactorType>(xmlrpc_c::value_string(m->second), ",");
if (ReportAllFactors) {
factor_order.clear();
for (size_t i = 0; i < MAX_NUM_FACTORS; ++i)
factor_order.push_back(i);
}
m = param.find("factor-delimiter");
if (m != param.end()) FactorDelimiter = Trim(xmlrpc_c::value_string(m->second));
m = param.find("output-factor-delimiter");
if (m != param.end()) FactorDelimiter = Trim(xmlrpc_c::value_string(m->second));
return true;
}
#endif

View File

@ -19,7 +19,8 @@ namespace Moses
bool PrintAlignmentInfo; // m_PrintAlignmentInfo
WordAlignmentSort WA_SortOrder; // 0: no, 1: target order
std::string AlignmentOutputFile;
std::string FactorDelimiter;
bool WordGraph;
std::string SearchGraph;

View File

@ -378,8 +378,9 @@ pack_hypothesis(const Moses::Manager& manager, vector<Hypothesis const* > const&
// target string
ostringstream target;
BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) {
manager.OutputSurface(target, *e, m_options.output.factor_order,
m_options.output.ReportSegmentation, m_options.output.ReportAllFactors);
manager.OutputSurface(target, *e);
// , m_options.output.factor_order,
// m_options.output.ReportSegmentation, m_options.output.ReportAllFactors);
}
XVERBOSE(1, "BEST TRANSLATION: " << *(manager.GetBestHypothesis()) << std::endl);
// XVERBOSE(1,"SERVER TRANSLATION: " << target.str() << std::endl);