use input path for unknown word processing in phrase-based

This commit is contained in:
Hieu Hoang 2013-08-13 19:44:52 +01:00
parent 0d60b7f2dd
commit 563cdc527a
4 changed files with 16 additions and 6 deletions

View File

@ -201,12 +201,13 @@ void TranslationOptionCollection::ProcessUnknownWord()
* \param length length covered by this word (may be > 1 for lattice input)
* \param inputScores a set of scores associated with unknown word (input scores from latties/CNs)
*/
void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,size_t sourcePos, size_t length, const Scores *inputScores)
void TranslationOptionCollection::ProcessOneUnknownWord(const InputPath &inputPath,size_t sourcePos, size_t length, const Scores *inputScores)
{
const StaticData &staticData = StaticData::Instance();
const UnknownWordPenaltyProducer *unknownWordPenaltyProducer = staticData.GetUnknownWordPenaltyProducer();
float unknownScore = FloorScore(TransformScore(0));
const Word &sourceWord = inputPath.GetPhrase().GetWord(0);
// unknown word, add as trans opt
FactorCollection &factorCollection = FactorCollection::Instance();

View File

@ -79,7 +79,7 @@ protected:
//! Force a creation of a translation option where there are none for a particular source position.
void ProcessUnknownWord();
//! special handling of ONE unknown words.
virtual void ProcessOneUnknownWord(const Word &sourceWord, size_t sourcePos, size_t length = 1, const Scores *inputScores = NULL);
virtual void ProcessOneUnknownWord(const InputPath &inputPath, size_t sourcePos, size_t length = 1, const Scores *inputScores = NULL);
//! pruning: only keep the top n (m_maxNoTransOptPerCoverage) elements */
void Prune();

View File

@ -123,9 +123,18 @@ void TranslationOptionCollectionConfusionNet::ProcessUnknownWord(size_t sourcePo
ConfusionNet const& source=dynamic_cast<ConfusionNet const&>(m_source);
ConfusionNet::Column const& coll=source.GetColumn(sourcePos);
const InputPathList &inputPathList = GetInputPathList(sourcePos, sourcePos);
ConfusionNet::Column::const_iterator iterCol;
InputPathList::const_iterator iterInputPath;
size_t j=0;
for(ConfusionNet::Column::const_iterator i=coll.begin(); i!=coll.end(); ++i) {
ProcessOneUnknownWord(i->first ,sourcePos, source.GetColumnIncrement(sourcePos, j++),&(i->second));
for(iterCol = coll.begin(), iterInputPath = inputPathList.begin();
iterCol != coll.end();
++iterCol , ++iterInputPath) {
const InputPath &inputPath = **iterInputPath;
size_t length = source.GetColumnIncrement(sourcePos, j++);
const Scores &inputScores = iterCol->second;
ProcessOneUnknownWord(inputPath ,sourcePos, length, &inputScores);
}
}

View File

@ -66,8 +66,8 @@ TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const
*/
void TranslationOptionCollectionText::ProcessUnknownWord(size_t sourcePos)
{
const Word &sourceWord = m_source.GetWord(sourcePos);
ProcessOneUnknownWord(sourceWord,sourcePos);
const InputPath &inputPath = GetInputPath(sourcePos, sourcePos);
ProcessOneUnknownWord(inputPath,sourcePos);
}
/**