mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 21:42:19 +03:00
Added option to output unknown words with prefix 'UNK'
This commit is contained in:
parent
c002a81c96
commit
091bed03af
@ -196,6 +196,7 @@ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<
|
||||
{
|
||||
CHECK(outputFactorOrder.size() > 0);
|
||||
const Phrase& phrase = edge.GetCurrTargetPhrase();
|
||||
bool markUnknown = StaticData::Instance().GetMarkUnknown();
|
||||
if (reportAllFactors == true) {
|
||||
out << phrase;
|
||||
} else {
|
||||
@ -212,8 +213,16 @@ void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<
|
||||
}
|
||||
}
|
||||
CHECK(factor);
|
||||
out << *factor;
|
||||
|
||||
//preface surface form with UNK if marking unknowns
|
||||
const Word &word = phrase.GetWord(pos);
|
||||
if(markUnknown && word.IsOOV()) {
|
||||
out << "UNK" << *factor;
|
||||
}
|
||||
else {
|
||||
out << *factor;
|
||||
}
|
||||
|
||||
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
|
||||
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
|
||||
CHECK(factor);
|
||||
|
@ -51,6 +51,7 @@ Parameter::Parameter()
|
||||
AddParam("input-file", "i", "location of the input file to be translated");
|
||||
AddParam("inputtype", "text (0), confusion network (1), word lattice (2) (default = 0)");
|
||||
AddParam("labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
|
||||
AddParam("mark-unknown", "mu", "mark unknown words in output");
|
||||
AddParam("max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)");
|
||||
AddParam("max-trans-opt-per-coverage", "maximum number of translation options per input span (after applying mapping steps)");
|
||||
AddParam("max-phrase-length", "maximum phrase length (default 20)");
|
||||
|
@ -364,6 +364,7 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
|
||||
// unknown word processing
|
||||
SetBooleanParameter( &m_dropUnknown, "drop-unknown", false );
|
||||
SetBooleanParameter( &m_markUnknown, "mark-unknown", false );
|
||||
|
||||
SetBooleanParameter( &m_lmEnableOOVFeature, "lmodel-oov-feature", false);
|
||||
|
||||
|
@ -113,6 +113,7 @@ protected:
|
||||
std::string m_nBestFilePath, m_latticeSamplesFilePath;
|
||||
bool m_labeledNBestList,m_nBestIncludesSegmentation;
|
||||
bool m_dropUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = drop (ignore) them
|
||||
bool m_markUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = mark and (ignore) them
|
||||
bool m_wordDeletionEnabled;
|
||||
|
||||
bool m_disableDiscarding;
|
||||
@ -276,6 +277,9 @@ public:
|
||||
inline bool GetDropUnknown() const {
|
||||
return m_dropUnknown;
|
||||
}
|
||||
inline bool GetMarkUnknown() const {
|
||||
return m_markUnknown;
|
||||
}
|
||||
inline bool GetDisableDiscarding() const {
|
||||
return m_disableDiscarding;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user