mosesdecoder/moses/ChartCell.cpp

232 lines
7.3 KiB
C++
Raw Normal View History

// $Id$
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 Hieu Hoang
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <algorithm>
#include "ChartCell.h"
#include "ChartCellCollection.h"
#include "RuleCubeQueue.h"
#include "RuleCube.h"
#include "WordsRange.h"
#include "Util.h"
#include "StaticData.h"
2012-09-25 20:34:43 +04:00
#include "ChartTranslationOptions.h"
#include "ChartTranslationOptionList.h"
#include "ChartManager.h"
#include "util/exception.hh"
using namespace std;
namespace Moses
{
2013-12-12 00:03:30 +04:00
extern bool g_mosesDebug;
ChartCellBase::ChartCellBase(size_t startPos, size_t endPos) :
m_coverage(startPos, endPos),
m_targetLabelSet(m_coverage) {}
ChartCellBase::~ChartCellBase() {}
2012-07-02 23:01:07 +04:00
/** Constructor
* \param startPos endPos range of this cell
2013-05-29 21:16:15 +04:00
* \param manager pointer back to the manager
2012-07-02 23:01:07 +04:00
*/
ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) :
2013-05-29 21:16:15 +04:00
ChartCellBase(startPos, endPos), m_manager(manager)
{
const StaticData &staticData = StaticData::Instance();
m_nBestIsEnabled = staticData.IsNBestEnabled();
}
ChartCell::~ChartCell() {}
2013-05-29 21:16:15 +04:00
/** Add the given hypothesis to the cell.
2012-07-03 21:11:53 +04:00
* Returns true if added, false if not. Maybe it already exists in the collection or score falls below threshold etc.
* This function just calls the correspondind AddHypothesis() in ChartHypothesisCollection
* \param hypo Hypothesis to be added
*/
bool ChartCell::AddHypothesis(ChartHypothesis *hypo)
{
const Word &targetLHS = hypo->GetTargetLHS();
return m_hypoColl[targetLHS].AddHypothesis(hypo, m_manager);
}
2012-07-02 23:01:07 +04:00
/** Prune each collection in this cell to a particular size */
void ChartCell::PruneToSize()
{
MapType::iterator iter;
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
ChartHypothesisCollection &coll = iter->second;
coll.PruneToSize(m_manager);
}
}
/** Decoding at span level: fill chart cell with hypotheses
* (implementation of cube pruning)
* \param transOptList list of applicable rules to create hypotheses for the cell
* \param allChartCells entire chart - needed to look up underlying hypotheses
*/
void ChartCell::ProcessSentence(const ChartTranslationOptionList &transOptList
, const ChartCellCollection &allChartCells)
{
const StaticData &staticData = StaticData::Instance();
// priority queue for applicable rules with selected hypotheses
RuleCubeQueue queue(m_manager);
// add all trans opt into queue. using only 1st child node.
for (size_t i = 0; i < transOptList.GetSize(); ++i) {
2012-09-25 20:34:43 +04:00
const ChartTranslationOptions &transOpt = transOptList.Get(i);
RuleCube *ruleCube = new RuleCube(transOpt, allChartCells, m_manager);
queue.Add(ruleCube);
}
// pluck things out of queue and add to hypo collection
const size_t popLimit = staticData.GetCubePruningPopLimit();
2013-05-29 21:16:15 +04:00
for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
ChartHypothesis *hypo = queue.Pop();
AddHypothesis(hypo);
}
}
2012-07-02 23:01:07 +04:00
//! call SortHypotheses() in each hypo collection in this cell
void ChartCell::SortHypotheses()
{
2013-11-23 00:27:46 +04:00
UTIL_THROW_IF2(!m_targetLabelSet.Empty(), "Already sorted");
MapType::iterator iter;
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
ChartHypothesisCollection &coll = iter->second;
if (coll.GetSize()) {
coll.SortHypotheses();
m_targetLabelSet.AddConstituent(iter->first, &coll.GetSortedHypotheses());
}
}
}
2012-07-03 21:11:53 +04:00
/** Return the highest scoring hypothesis out of all the hypo collection in this cell */
const ChartHypothesis *ChartCell::GetBestHypothesis() const
{
const ChartHypothesis *ret = NULL;
float bestScore = -std::numeric_limits<float>::infinity();
MapType::const_iterator iter;
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
const HypoList &sortedList = iter->second.GetSortedHypotheses();
2013-09-18 17:57:59 +04:00
if (sortedList.size() > 0) {
2013-09-27 12:35:24 +04:00
const ChartHypothesis *hypo = sortedList[0];
if (hypo->GetTotalScore() > bestScore) {
bestScore = hypo->GetTotalScore();
ret = hypo;
}
2013-09-18 17:57:59 +04:00
}
}
return ret;
}
2012-07-02 23:01:07 +04:00
//! call CleanupArcList() in each hypo collection in this cell
void ChartCell::CleanupArcList()
{
// only necessary if n-best calculations are enabled
if (!m_nBestIsEnabled) return;
MapType::iterator iter;
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
ChartHypothesisCollection &coll = iter->second;
coll.CleanupArcList();
}
}
2012-07-02 23:01:07 +04:00
//! debug info - size of each hypo collection in this cell
void ChartCell::OutputSizes(std::ostream &out) const
{
MapType::const_iterator iter;
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
const Word &targetLHS = iter->first;
const ChartHypothesisCollection &coll = iter->second;
out << targetLHS << "=" << coll.GetSize() << " ";
}
}
2012-07-02 23:01:07 +04:00
//! debug info - total number of hypos in all hypo collection in this cell
size_t ChartCell::GetSize() const
{
size_t ret = 0;
MapType::const_iterator iter;
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
const ChartHypothesisCollection &coll = iter->second;
ret += coll.GetSize();
}
return ret;
}
const HypoList *ChartCell::GetAllSortedHypotheses() const
{
2013-05-29 21:16:15 +04:00
HypoList *ret = new HypoList();
2013-05-29 21:16:15 +04:00
MapType::const_iterator iter;
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
const ChartHypothesisCollection &coll = iter->second;
const HypoList &list = coll.GetSortedHypotheses();
std::copy(list.begin(), list.end(), std::inserter(*ret, ret->end()));
2013-05-29 21:16:15 +04:00
}
return ret;
}
2012-07-02 23:01:07 +04:00
//! call GetSearchGraph() for each hypo collection
void ChartCell::GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream, const std::map<unsigned, bool> &reachable) const
{
MapType::const_iterator iterOutside;
for (iterOutside = m_hypoColl.begin(); iterOutside != m_hypoColl.end(); ++iterOutside) {
const ChartHypothesisCollection &coll = iterOutside->second;
coll.GetSearchGraph(translationId, outputSearchGraphStream, reachable);
}
}
std::ostream& operator<<(std::ostream &out, const ChartCell &cell)
{
ChartCell::MapType::const_iterator iterOutside;
for (iterOutside = cell.m_hypoColl.begin(); iterOutside != cell.m_hypoColl.end(); ++iterOutside) {
const Word &targetLHS = iterOutside->first;
cerr << targetLHS << ":" << endl;
const ChartHypothesisCollection &coll = iterOutside->second;
cerr << coll;
}
/*
ChartCell::HCType::const_iterator iter;
for (iter = cell.m_hypos.begin(); iter != cell.m_hypos.end(); ++iter)
{
const ChartHypothesis &hypo = **iter;
out << hypo << endl;
}
*/
return out;
}
} // namespace