// $Id$ // vim:tabstop=2 /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2010 Hieu Hoang This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #include #include "StaticData.h" #include "ChartHypothesisCollection.h" #include "ChartHypothesis.h" #include "ChartManager.h" #include "util/exception.hh" using namespace std; using namespace Moses; namespace Moses { ChartHypothesisCollection::ChartHypothesisCollection() { const StaticData &staticData = StaticData::Instance(); m_beamWidth = staticData.GetBeamWidth(); m_maxHypoStackSize = staticData.GetMaxHypoStackSize(); m_nBestIsEnabled = staticData.IsNBestEnabled(); m_bestScore = -std::numeric_limits::infinity(); } ChartHypothesisCollection::~ChartHypothesisCollection() { HCType::iterator iter; for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) { ChartHypothesis *hypo = *iter; ChartHypothesis::Delete(hypo); } //RemoveAllInColl(m_hypos); } /** public function to add hypothesis to this collection. * Returns false if equiv hypo exists in collection, otherwise returns true. * Takes care of update arc list for n-best list creation. * Will delete hypo is it exist - once this function is call don't delete hypothesis. * \param hypo hypothesis to add * \param manager pointer back to manager */ bool ChartHypothesisCollection::AddHypothesis(ChartHypothesis *hypo, ChartManager &manager) { if (hypo->GetTotalScore() == - std::numeric_limits::infinity()) { manager.GetSentenceStats().AddDiscarded(); VERBOSE(3,"discarded, -inf score" << std::endl); ChartHypothesis::Delete(hypo); return false; } if (hypo->GetTotalScore() < m_bestScore + m_beamWidth) { // really bad score. don't bother adding hypo into collection manager.GetSentenceStats().AddDiscarded(); VERBOSE(3,"discarded, too bad for stack" << std::endl); ChartHypothesis::Delete(hypo); return false; } // over threshold, try to add to collection std::pair addRet = Add(hypo, manager); // does it have the same state as an existing hypothesis? if (addRet.second) { // nothing found. add to collection return true; } // equiv hypo exists, recombine with other hypo HCType::iterator &iterExisting = addRet.first; ChartHypothesis *hypoExisting = *iterExisting; UTIL_THROW_IF2(iterExisting == m_hypos.end(), "Adding a hypothesis should have returned a valid iterator"); //StaticData::Instance().GetSentenceStats().AddRecombination(*hypo, **iterExisting); // found existing hypo with same target ending. // keep the best 1 if (hypo->GetTotalScore() > hypoExisting->GetTotalScore()) { // incoming hypo is better than the one we have VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, "); if (m_nBestIsEnabled) { hypo->AddArc(hypoExisting); Detach(iterExisting); } else { Remove(iterExisting); } bool added = Add(hypo, manager).second; if (!added) { iterExisting = m_hypos.find(hypo); UTIL_THROW2("Offending hypo = " << **iterExisting); } return false; } else { // already storing the best hypo. discard current hypo VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl) if (m_nBestIsEnabled) { hypoExisting->AddArc(hypo); } else { ChartHypothesis::Delete(hypo); } return false; } } /** add hypothesis to stack. Prune if necessary. * Returns false if equiv hypo exists in collection, otherwise returns true, and the iterator that points to the place where the hypo was added * \param hypo hypothesis to add * \param manager pointer back to manager */ pair ChartHypothesisCollection::Add(ChartHypothesis *hypo, ChartManager &manager) { std::pair ret = m_hypos.insert(hypo); if (ret.second) { // equiv hypo doesn't exists VERBOSE(3,"added hyp to stack"); // Update best score, if this hypothesis is new best if (hypo->GetTotalScore() > m_bestScore) { VERBOSE(3,", best on stack"); m_bestScore = hypo->GetTotalScore(); } // Prune only if stack is twice as big as needed (lazy pruning) VERBOSE(3,", now size " << m_hypos.size()); if (m_hypos.size() > 2*m_maxHypoStackSize-1) { PruneToSize(manager); } else { VERBOSE(3,std::endl); } } return ret; } /** Remove hypothesis pointed to by iterator but DOES NOT delete the object. * \param iter iterator to delete */ void ChartHypothesisCollection::Detach(const HCType::iterator &iter) { m_hypos.erase(iter); } /** destroy iterator AND hypothesis pointed to by iterator. If in an object pool, takes care of that too */ void ChartHypothesisCollection::Remove(const HCType::iterator &iter) { ChartHypothesis *h = *iter; /* stringstream strme(""); strme << h->GetOutputPhrase(); string toFind = "the goal of gene scientists is "; size_t pos = toFind.find(strme.str()); if (pos == 0) { cerr << pos << " " << strme.str() << *h << endl; cerr << *this << endl; } */ Detach(iter); ChartHypothesis::Delete(h); } /** prune number of hypo to a particular number of hypos, specified by m_maxHypoStackSize, according to score * Don't prune of hypos have identical scores on the boundary, so occasionally number of hypo can remain above m_maxHypoStackSize. * \param manager reference back to manager. Used for collecting stats */ void ChartHypothesisCollection::PruneToSize(ChartManager &manager) { if (m_maxHypoStackSize == 0) return; // no limit if (GetSize() > m_maxHypoStackSize) { // ok, if not over the limit priority_queue bestScores; // push all scores to a heap // (but never push scores below m_bestScore+m_beamWidth) HCType::iterator iter = m_hypos.begin(); float score = 0; while (iter != m_hypos.end()) { ChartHypothesis *hypo = *iter; score = hypo->GetTotalScore(); if (score > m_bestScore+m_beamWidth) { bestScores.push(score); } ++iter; } // pop the top newSize scores (and ignore them, these are the scores of hyps that will remain) // ensure to never pop beyond heap size size_t minNewSizeHeapSize = m_maxHypoStackSize > bestScores.size() ? bestScores.size() : m_maxHypoStackSize; for (size_t i = 1 ; i < minNewSizeHeapSize ; i++) bestScores.pop(); // and remember the threshold float scoreThreshold = bestScores.top(); // delete all hypos under score threshold iter = m_hypos.begin(); while (iter != m_hypos.end()) { ChartHypothesis *hypo = *iter; float score = hypo->GetTotalScore(); if (score < scoreThreshold) { HCType::iterator iterRemove = iter++; Remove(iterRemove); manager.GetSentenceStats().AddPruning(); } else { ++iter; } } VERBOSE(3,", pruned to size " << m_hypos.size() << endl); IFVERBOSE(3) { TRACE_ERR("stack now contains: "); for(iter = m_hypos.begin(); iter != m_hypos.end(); iter++) { ChartHypothesis *hypo = *iter; TRACE_ERR( hypo->GetId() << " (" << hypo->GetTotalScore() << ") "); } TRACE_ERR( endl); } // desperation pruning if (m_hypos.size() > m_maxHypoStackSize * 2) { std::vector hyposOrdered; // sort hypos std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(hyposOrdered, hyposOrdered.end())); std::sort(hyposOrdered.begin(), hyposOrdered.end(), ChartHypothesisScoreOrderer()); //keep only |size|. delete the rest std::vector::iterator iter; for (iter = hyposOrdered.begin() + (m_maxHypoStackSize * 2); iter != hyposOrdered.end(); ++iter) { ChartHypothesis *hypo = *iter; HCType::iterator iterFindHypo = m_hypos.find(hypo); UTIL_THROW_IF2(iterFindHypo == m_hypos.end(), "Adding a hypothesis should have returned a valid iterator"); Remove(iterFindHypo); } } } } //! sort hypothses by descending score. Put these hypos into a vector m_hyposOrdered to be returned by function GetSortedHypotheses() void ChartHypothesisCollection::SortHypotheses() { UTIL_THROW_IF2(!m_hyposOrdered.empty(), "Hypotheses already sorted"); if (!m_hypos.empty()) { // done everything for this cell. // sort // put into vec m_hyposOrdered.reserve(m_hypos.size()); std::copy(m_hypos.begin(), m_hypos.end(), back_inserter(m_hyposOrdered)); std::sort(m_hyposOrdered.begin(), m_hyposOrdered.end(), ChartHypothesisScoreOrderer()); } } //! Call CleanupArcList() for each main hypo in collection void ChartHypothesisCollection::CleanupArcList() { HCType::iterator iter; for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) { ChartHypothesis *mainHypo = *iter; mainHypo->CleanupArcList(); } } /** Return all hypos, and all hypos in the arclist, in order to create the output searchgraph, ie. the hypergraph. The output is the debug hypo information. * @todo this is a useful function. Make sure it outputs everything required, especially scores. * \param translationId unique, contiguous id for the input sentence * \param outputSearchGraphStream stream to output the info to * \param reachable @todo don't know */ void ChartHypothesisCollection::GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream, const std::map &reachable) const { HCType::const_iterator iter; for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) { ChartHypothesis &mainHypo = **iter; if (StaticData::Instance().GetUnprunedSearchGraph() || reachable.find(mainHypo.GetId()) != reachable.end()) { outputSearchGraphStream << translationId << " " << mainHypo << endl; } const ChartArcList *arcList = mainHypo.GetArcList(); if (arcList) { ChartArcList::const_iterator iterArc; for (iterArc = arcList->begin(); iterArc != arcList->end(); ++iterArc) { const ChartHypothesis &arc = **iterArc; if (reachable.find(arc.GetId()) != reachable.end()) { outputSearchGraphStream << translationId << " " << arc << endl; } } } } } std::ostream& operator<<(std::ostream &out, const ChartHypothesisCollection &coll) { HypoList::const_iterator iterInside; for (iterInside = coll.m_hyposOrdered.begin(); iterInside != coll.m_hyposOrdered.end(); ++iterInside) { const ChartHypothesis &hypo = **iterInside; out << hypo << endl; } return out; } } // namespace