2010-07-18 03:23:09 +04:00
|
|
|
// $Id$
|
2010-04-12 14:15:49 +04:00
|
|
|
// vim:tabstop=2
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2010 Hieu Hoang
|
2011-02-24 15:36:50 +03:00
|
|
|
|
2010-04-12 14:15:49 +04:00
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
2011-02-24 15:36:50 +03:00
|
|
|
|
2010-04-12 14:15:49 +04:00
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
2011-02-24 15:36:50 +03:00
|
|
|
|
2010-04-12 14:15:49 +04:00
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
2010-04-08 21:16:10 +04:00
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include "ChartCell.h"
|
|
|
|
#include "ChartCellCollection.h"
|
2014-08-08 00:20:10 +04:00
|
|
|
#include "HypergraphOutput.h"
|
2011-03-11 19:28:36 +03:00
|
|
|
#include "RuleCubeQueue.h"
|
|
|
|
#include "RuleCube.h"
|
2011-03-11 16:08:43 +03:00
|
|
|
#include "WordsRange.h"
|
|
|
|
#include "Util.h"
|
|
|
|
#include "StaticData.h"
|
2012-09-25 20:34:43 +04:00
|
|
|
#include "ChartTranslationOptions.h"
|
2011-03-11 16:08:43 +03:00
|
|
|
#include "ChartTranslationOptionList.h"
|
2011-06-29 17:38:11 +04:00
|
|
|
#include "ChartManager.h"
|
2013-11-18 23:03:30 +04:00
|
|
|
#include "util/exception.hh"
|
2010-04-08 21:16:10 +04:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
namespace Moses
|
|
|
|
{
|
2013-12-12 00:03:30 +04:00
|
|
|
extern bool g_mosesDebug;
|
2010-04-08 21:16:10 +04:00
|
|
|
|
2012-10-02 17:30:55 +04:00
|
|
|
ChartCellBase::ChartCellBase(size_t startPos, size_t endPos) :
|
|
|
|
m_coverage(startPos, endPos),
|
|
|
|
m_targetLabelSet(m_coverage) {}
|
|
|
|
|
2012-10-02 19:34:25 +04:00
|
|
|
ChartCellBase::~ChartCellBase() {}
|
|
|
|
|
2012-07-02 23:01:07 +04:00
|
|
|
/** Constructor
|
|
|
|
* \param startPos endPos range of this cell
|
2013-05-29 21:16:15 +04:00
|
|
|
* \param manager pointer back to the manager
|
2012-07-02 23:01:07 +04:00
|
|
|
*/
|
2012-10-02 17:30:55 +04:00
|
|
|
ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) :
|
2013-05-29 21:16:15 +04:00
|
|
|
ChartCellBase(startPos, endPos), m_manager(manager)
|
|
|
|
{
|
2011-02-24 15:36:50 +03:00
|
|
|
const StaticData &staticData = StaticData::Instance();
|
|
|
|
m_nBestIsEnabled = staticData.IsNBestEnabled();
|
2011-06-29 17:38:11 +04:00
|
|
|
}
|
|
|
|
|
2012-10-02 19:34:25 +04:00
|
|
|
ChartCell::~ChartCell() {}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
/** Add the given hypothesis to the cell.
|
2012-07-03 21:11:53 +04:00
|
|
|
* Returns true if added, false if not. Maybe it already exists in the collection or score falls below threshold etc.
|
2014-08-08 00:42:30 +04:00
|
|
|
* This function just calls the corresponding AddHypothesis() in ChartHypothesisCollection
|
2012-07-03 21:11:53 +04:00
|
|
|
* \param hypo Hypothesis to be added
|
|
|
|
*/
|
2011-03-11 16:08:43 +03:00
|
|
|
bool ChartCell::AddHypothesis(ChartHypothesis *hypo)
|
2010-04-08 21:16:10 +04:00
|
|
|
{
|
2011-02-24 15:36:50 +03:00
|
|
|
const Word &targetLHS = hypo->GetTargetLHS();
|
|
|
|
return m_hypoColl[targetLHS].AddHypothesis(hypo, m_manager);
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
|
2012-07-02 23:01:07 +04:00
|
|
|
/** Prune each collection in this cell to a particular size */
|
2010-04-08 21:16:10 +04:00
|
|
|
void ChartCell::PruneToSize()
|
|
|
|
{
|
2012-01-23 18:19:19 +04:00
|
|
|
MapType::iterator iter;
|
2011-02-24 15:36:50 +03:00
|
|
|
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
2011-03-11 16:08:43 +03:00
|
|
|
ChartHypothesisCollection &coll = iter->second;
|
2011-02-24 15:36:50 +03:00
|
|
|
coll.PruneToSize(m_manager);
|
|
|
|
}
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
|
2011-03-11 16:08:43 +03:00
|
|
|
/** Decoding at span level: fill chart cell with hypotheses
|
|
|
|
* (implementation of cube pruning)
|
|
|
|
* \param transOptList list of applicable rules to create hypotheses for the cell
|
|
|
|
* \param allChartCells entire chart - needed to look up underlying hypotheses
|
|
|
|
*/
|
2014-12-26 10:41:52 +03:00
|
|
|
void ChartCell::Decode(const ChartTranslationOptionList &transOptList
|
2015-01-14 14:07:42 +03:00
|
|
|
, const ChartCellCollection &allChartCells)
|
2010-04-08 21:16:10 +04:00
|
|
|
{
|
2011-02-24 15:36:50 +03:00
|
|
|
const StaticData &staticData = StaticData::Instance();
|
|
|
|
|
2011-03-09 20:55:27 +03:00
|
|
|
// priority queue for applicable rules with selected hypotheses
|
2011-06-27 19:13:15 +04:00
|
|
|
RuleCubeQueue queue(m_manager);
|
2011-02-24 15:36:50 +03:00
|
|
|
|
|
|
|
// add all trans opt into queue. using only 1st child node.
|
2012-02-07 03:54:01 +04:00
|
|
|
for (size_t i = 0; i < transOptList.GetSize(); ++i) {
|
2012-09-25 20:34:43 +04:00
|
|
|
const ChartTranslationOptions &transOpt = transOptList.Get(i);
|
2011-06-27 19:13:15 +04:00
|
|
|
RuleCube *ruleCube = new RuleCube(transOpt, allChartCells, m_manager);
|
2011-03-11 19:28:36 +03:00
|
|
|
queue.Add(ruleCube);
|
2011-02-24 15:36:50 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// pluck things out of queue and add to hypo collection
|
|
|
|
const size_t popLimit = staticData.GetCubePruningPopLimit();
|
2013-05-29 21:16:15 +04:00
|
|
|
for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
|
2011-06-27 19:13:15 +04:00
|
|
|
ChartHypothesis *hypo = queue.Pop();
|
2011-02-24 15:36:50 +03:00
|
|
|
AddHypothesis(hypo);
|
|
|
|
}
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
|
2012-07-02 23:01:07 +04:00
|
|
|
//! call SortHypotheses() in each hypo collection in this cell
|
2010-04-08 21:16:10 +04:00
|
|
|
void ChartCell::SortHypotheses()
|
|
|
|
{
|
2013-11-23 00:27:46 +04:00
|
|
|
UTIL_THROW_IF2(!m_targetLabelSet.Empty(), "Already sorted");
|
2013-11-18 23:03:30 +04:00
|
|
|
|
2012-01-23 18:19:19 +04:00
|
|
|
MapType::iterator iter;
|
2011-02-24 15:36:50 +03:00
|
|
|
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
2011-03-11 16:08:43 +03:00
|
|
|
ChartHypothesisCollection &coll = iter->second;
|
2014-04-23 20:11:25 +04:00
|
|
|
|
|
|
|
if (coll.GetSize()) {
|
|
|
|
coll.SortHypotheses();
|
|
|
|
m_targetLabelSet.AddConstituent(iter->first, &coll.GetSortedHypotheses());
|
|
|
|
}
|
2011-02-24 15:36:50 +03:00
|
|
|
}
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
|
2012-07-03 21:11:53 +04:00
|
|
|
/** Return the highest scoring hypothesis out of all the hypo collection in this cell */
|
2011-03-11 16:08:43 +03:00
|
|
|
const ChartHypothesis *ChartCell::GetBestHypothesis() const
|
2010-04-08 21:16:10 +04:00
|
|
|
{
|
2011-03-11 16:08:43 +03:00
|
|
|
const ChartHypothesis *ret = NULL;
|
2011-02-24 15:36:50 +03:00
|
|
|
float bestScore = -std::numeric_limits<float>::infinity();
|
|
|
|
|
2012-01-23 18:19:19 +04:00
|
|
|
MapType::const_iterator iter;
|
2011-02-24 15:36:50 +03:00
|
|
|
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
|
|
|
const HypoList &sortedList = iter->second.GetSortedHypotheses();
|
2013-09-18 17:57:59 +04:00
|
|
|
if (sortedList.size() > 0) {
|
2013-09-27 12:35:24 +04:00
|
|
|
const ChartHypothesis *hypo = sortedList[0];
|
|
|
|
if (hypo->GetTotalScore() > bestScore) {
|
|
|
|
bestScore = hypo->GetTotalScore();
|
|
|
|
ret = hypo;
|
|
|
|
}
|
2013-09-18 17:57:59 +04:00
|
|
|
}
|
2011-02-24 15:36:50 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
|
2012-07-02 23:01:07 +04:00
|
|
|
//! call CleanupArcList() in each hypo collection in this cell
|
2010-04-08 21:16:10 +04:00
|
|
|
void ChartCell::CleanupArcList()
|
|
|
|
{
|
2011-02-24 15:36:50 +03:00
|
|
|
// only necessary if n-best calculations are enabled
|
|
|
|
if (!m_nBestIsEnabled) return;
|
|
|
|
|
2012-01-23 18:19:19 +04:00
|
|
|
MapType::iterator iter;
|
2011-02-24 15:36:50 +03:00
|
|
|
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
2011-03-11 16:08:43 +03:00
|
|
|
ChartHypothesisCollection &coll = iter->second;
|
2011-02-24 15:36:50 +03:00
|
|
|
coll.CleanupArcList();
|
|
|
|
}
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
|
2012-07-02 23:01:07 +04:00
|
|
|
//! debug info - size of each hypo collection in this cell
|
2010-04-08 21:16:10 +04:00
|
|
|
void ChartCell::OutputSizes(std::ostream &out) const
|
|
|
|
{
|
2012-01-23 18:19:19 +04:00
|
|
|
MapType::const_iterator iter;
|
2011-02-24 15:36:50 +03:00
|
|
|
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
2011-03-11 19:28:36 +03:00
|
|
|
const Word &targetLHS = iter->first;
|
2011-03-11 16:08:43 +03:00
|
|
|
const ChartHypothesisCollection &coll = iter->second;
|
2011-02-24 15:36:50 +03:00
|
|
|
|
|
|
|
out << targetLHS << "=" << coll.GetSize() << " ";
|
|
|
|
}
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
|
2012-07-02 23:01:07 +04:00
|
|
|
//! debug info - total number of hypos in all hypo collection in this cell
|
2010-04-08 21:16:10 +04:00
|
|
|
size_t ChartCell::GetSize() const
|
|
|
|
{
|
2011-02-24 15:36:50 +03:00
|
|
|
size_t ret = 0;
|
2012-01-23 18:19:19 +04:00
|
|
|
MapType::const_iterator iter;
|
2011-02-24 15:36:50 +03:00
|
|
|
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
2011-03-11 16:08:43 +03:00
|
|
|
const ChartHypothesisCollection &coll = iter->second;
|
2011-02-24 15:36:50 +03:00
|
|
|
|
|
|
|
ret += coll.GetSize();
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
|
2012-11-13 21:43:52 +04:00
|
|
|
const HypoList *ChartCell::GetAllSortedHypotheses() const
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
HypoList *ret = new HypoList();
|
2012-11-13 21:43:52 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
MapType::const_iterator iter;
|
|
|
|
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
|
|
|
|
const ChartHypothesisCollection &coll = iter->second;
|
|
|
|
const HypoList &list = coll.GetSortedHypotheses();
|
2012-11-13 21:43:52 +04:00
|
|
|
std::copy(list.begin(), list.end(), std::inserter(*ret, ret->end()));
|
2013-05-29 21:16:15 +04:00
|
|
|
}
|
|
|
|
return ret;
|
2012-11-13 21:43:52 +04:00
|
|
|
}
|
|
|
|
|
2014-08-08 00:20:10 +04:00
|
|
|
//! call WriteSearchGraph() for each hypo collection
|
|
|
|
void ChartCell::WriteSearchGraph(const ChartSearchGraphWriter& writer, const std::map<unsigned, bool> &reachable) const
|
2010-04-08 21:16:10 +04:00
|
|
|
{
|
2012-01-23 18:19:19 +04:00
|
|
|
MapType::const_iterator iterOutside;
|
2011-02-24 15:36:50 +03:00
|
|
|
for (iterOutside = m_hypoColl.begin(); iterOutside != m_hypoColl.end(); ++iterOutside) {
|
2011-03-11 16:08:43 +03:00
|
|
|
const ChartHypothesisCollection &coll = iterOutside->second;
|
2014-08-08 00:20:10 +04:00
|
|
|
coll.WriteSearchGraph(writer, reachable);
|
2011-02-24 15:36:50 +03:00
|
|
|
}
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
std::ostream& operator<<(std::ostream &out, const ChartCell &cell)
|
|
|
|
{
|
2012-01-23 18:19:19 +04:00
|
|
|
ChartCell::MapType::const_iterator iterOutside;
|
2011-02-24 15:36:50 +03:00
|
|
|
for (iterOutside = cell.m_hypoColl.begin(); iterOutside != cell.m_hypoColl.end(); ++iterOutside) {
|
2011-03-11 19:28:36 +03:00
|
|
|
const Word &targetLHS = iterOutside->first;
|
2011-02-24 15:36:50 +03:00
|
|
|
cerr << targetLHS << ":" << endl;
|
|
|
|
|
2011-03-11 16:08:43 +03:00
|
|
|
const ChartHypothesisCollection &coll = iterOutside->second;
|
2011-02-24 15:36:50 +03:00
|
|
|
cerr << coll;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
ChartCell::HCType::const_iterator iter;
|
|
|
|
for (iter = cell.m_hypos.begin(); iter != cell.m_hypos.end(); ++iter)
|
|
|
|
{
|
2011-03-11 16:08:43 +03:00
|
|
|
const ChartHypothesis &hypo = **iter;
|
2011-02-24 15:36:50 +03:00
|
|
|
out << hypo << endl;
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
|
|
|
|
return out;
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace
|