2010-07-18 03:23:09 +04:00
|
|
|
// $Id$
|
2010-04-12 14:15:49 +04:00
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2010 Hieu Hoang
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
2010-04-08 21:16:10 +04:00
|
|
|
|
|
|
|
#include <algorithm>
|
2010-10-12 18:10:19 +04:00
|
|
|
#include <iostream>
|
2010-09-23 20:32:44 +04:00
|
|
|
#include "../../moses/src/StaticData.h"
|
2010-09-23 21:39:32 +04:00
|
|
|
#include "ChartTranslationOptionList.h"
|
|
|
|
#include "ChartTranslationOption.h"
|
2010-04-08 21:16:10 +04:00
|
|
|
#include "WordsRange.h"
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
using namespace Moses;
|
|
|
|
|
|
|
|
namespace Moses
|
|
|
|
{
|
|
|
|
#ifdef USE_HYPO_POOL
|
2010-09-23 21:39:32 +04:00
|
|
|
ObjectPool<ChartTranslationOptionList> ChartTranslationOptionList::s_objectPool("ChartTranslationOptionList", 3000);
|
2010-04-08 21:16:10 +04:00
|
|
|
#endif
|
|
|
|
|
2010-09-23 21:39:32 +04:00
|
|
|
ChartTranslationOptionList::ChartTranslationOptionList(const WordsRange &range)
|
2010-09-23 16:34:35 +04:00
|
|
|
:m_range(range)
|
2010-04-08 21:16:10 +04:00
|
|
|
{
|
|
|
|
m_collection.reserve(200);
|
|
|
|
m_scoreThreshold = std::numeric_limits<float>::infinity();
|
|
|
|
}
|
|
|
|
|
2010-09-23 21:39:32 +04:00
|
|
|
ChartTranslationOptionList::~ChartTranslationOptionList()
|
2010-04-08 21:16:10 +04:00
|
|
|
{
|
|
|
|
RemoveAllInColl(m_collection);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
class ChartRuleOrderer
|
|
|
|
{
|
|
|
|
public:
|
2010-09-23 21:39:32 +04:00
|
|
|
bool operator()(const ChartTranslationOption* itemA, const ChartTranslationOption* itemB) const
|
2010-04-08 21:16:10 +04:00
|
|
|
{
|
|
|
|
return itemA->GetTargetPhrase().GetFutureScore() > itemB->GetTargetPhrase().GetFutureScore();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2010-09-23 21:39:32 +04:00
|
|
|
void ChartTranslationOptionList::Add(const TargetPhraseCollection &targetPhraseCollection
|
2010-04-08 21:16:10 +04:00
|
|
|
, const WordConsumed &wordConsumed
|
|
|
|
, bool adhereTableLimit
|
|
|
|
, size_t ruleLimit)
|
|
|
|
{
|
2010-09-26 16:09:29 +04:00
|
|
|
TargetPhraseCollection::const_iterator iter;
|
|
|
|
TargetPhraseCollection::const_iterator iterEnd = targetPhraseCollection.end();
|
2010-04-08 21:16:10 +04:00
|
|
|
|
|
|
|
for (iter = targetPhraseCollection.begin(); iter != iterEnd; ++iter)
|
|
|
|
{
|
|
|
|
const TargetPhrase &targetPhrase = **iter;
|
|
|
|
float score = targetPhrase.GetFutureScore();
|
|
|
|
|
|
|
|
if (m_collection.size() < ruleLimit)
|
|
|
|
{ // not yet filled out quota. add everything
|
2010-09-23 21:39:32 +04:00
|
|
|
m_collection.push_back(new ChartTranslationOption(targetPhrase, wordConsumed, m_range));
|
2010-04-08 21:16:10 +04:00
|
|
|
m_scoreThreshold = (score < m_scoreThreshold) ? score : m_scoreThreshold;
|
|
|
|
}
|
|
|
|
else if (score > m_scoreThreshold)
|
|
|
|
{ // full but not bursting. add if better than worst score
|
2010-09-23 21:39:32 +04:00
|
|
|
m_collection.push_back(new ChartTranslationOption(targetPhrase, wordConsumed, m_range));
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
// prune if bursting
|
|
|
|
if (m_collection.size() > ruleLimit * 2)
|
|
|
|
{
|
|
|
|
std::nth_element(m_collection.begin()
|
|
|
|
, m_collection.begin() + ruleLimit
|
|
|
|
, m_collection.end()
|
|
|
|
, ChartRuleOrderer());
|
|
|
|
// delete the bottom half
|
|
|
|
for (size_t ind = ruleLimit; ind < m_collection.size(); ++ind)
|
|
|
|
{
|
|
|
|
// make the best score of bottom half the score threshold
|
|
|
|
const TargetPhrase &targetPhrase = m_collection[ind]->GetTargetPhrase();
|
|
|
|
float score = targetPhrase.GetFutureScore();
|
|
|
|
m_scoreThreshold = (score > m_scoreThreshold) ? score : m_scoreThreshold;
|
|
|
|
delete m_collection[ind];
|
|
|
|
}
|
|
|
|
m_collection.resize(ruleLimit);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-09-23 21:39:32 +04:00
|
|
|
void ChartTranslationOptionList::Add(ChartTranslationOption *transOpt)
|
2010-09-23 20:32:44 +04:00
|
|
|
{
|
|
|
|
assert(transOpt);
|
|
|
|
m_collection.push_back(transOpt);
|
|
|
|
}
|
|
|
|
|
2010-09-23 21:39:32 +04:00
|
|
|
void ChartTranslationOptionList::CreateChartRules(size_t ruleLimit)
|
2010-04-08 21:16:10 +04:00
|
|
|
{
|
|
|
|
if (m_collection.size() > ruleLimit)
|
|
|
|
{
|
|
|
|
std::nth_element(m_collection.begin()
|
|
|
|
, m_collection.begin() + ruleLimit
|
|
|
|
, m_collection.end()
|
|
|
|
, ChartRuleOrderer());
|
|
|
|
|
|
|
|
// delete the bottom half
|
|
|
|
for (size_t ind = ruleLimit; ind < m_collection.size(); ++ind)
|
|
|
|
{
|
|
|
|
delete m_collection[ind];
|
|
|
|
}
|
|
|
|
m_collection.resize(ruleLimit);
|
|
|
|
}
|
|
|
|
|
|
|
|
// finalise creation of chart rules
|
|
|
|
for (size_t ind = 0; ind < m_collection.size(); ++ind)
|
|
|
|
{
|
2010-09-23 21:39:32 +04:00
|
|
|
ChartTranslationOption &rule = *m_collection[ind];
|
2010-04-08 21:16:10 +04:00
|
|
|
rule.CreateNonTermIndex();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-09-23 20:32:44 +04:00
|
|
|
// helper class
|
|
|
|
class ChartTranslationOptionOrderer
|
|
|
|
{
|
|
|
|
public:
|
2010-09-23 21:39:32 +04:00
|
|
|
bool operator()(const ChartTranslationOption* transOptA, const ChartTranslationOption* transOptB) const
|
2010-09-23 20:32:44 +04:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
if (transOptA->GetArity() != transOptB->GetArity())
|
|
|
|
{
|
|
|
|
return transOptA->GetArity() < transOptB->GetArity();
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
return transOptA->GetTotalScore() > transOptB->GetTotalScore();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2010-09-23 21:39:32 +04:00
|
|
|
void ChartTranslationOptionList::Sort()
|
2010-09-23 20:32:44 +04:00
|
|
|
{
|
|
|
|
// keep only those over best + threshold
|
|
|
|
|
|
|
|
float scoreThreshold = -std::numeric_limits<float>::infinity();
|
|
|
|
CollType::const_iterator iter;
|
|
|
|
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter)
|
|
|
|
{
|
2010-09-23 21:39:32 +04:00
|
|
|
const ChartTranslationOption *transOpt = *iter;
|
2010-09-23 20:32:44 +04:00
|
|
|
float score = transOpt->GetTotalScore();
|
|
|
|
scoreThreshold = (score > scoreThreshold) ? score : scoreThreshold;
|
|
|
|
}
|
|
|
|
|
|
|
|
scoreThreshold += StaticData::Instance().GetTranslationOptionThreshold();
|
|
|
|
|
|
|
|
size_t ind = 0;
|
|
|
|
while (ind < m_collection.size())
|
|
|
|
{
|
2010-09-23 21:39:32 +04:00
|
|
|
const ChartTranslationOption *transOpt = m_collection[ind];
|
2010-09-23 20:32:44 +04:00
|
|
|
if (transOpt->GetTotalScore() < scoreThreshold)
|
|
|
|
{
|
|
|
|
delete transOpt;
|
|
|
|
m_collection.erase(m_collection.begin() + ind);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ind++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::sort(m_collection.begin(), m_collection.end(), ChartTranslationOptionOrderer());
|
|
|
|
}
|
|
|
|
|
2010-09-23 21:39:32 +04:00
|
|
|
std::ostream& operator<<(std::ostream &out, const ChartTranslationOptionList &coll)
|
2010-04-08 21:16:10 +04:00
|
|
|
{
|
2010-09-23 21:39:32 +04:00
|
|
|
ChartTranslationOptionList::const_iterator iter;
|
2010-04-08 21:16:10 +04:00
|
|
|
for (iter = coll.begin() ; iter != coll.end() ; ++iter)
|
|
|
|
{
|
2010-09-23 21:39:32 +04:00
|
|
|
const ChartTranslationOption &rule = **iter;
|
2010-04-08 21:16:10 +04:00
|
|
|
out << rule << endl;
|
|
|
|
}
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|