mosesdecoder/contrib/other-builds/moses2/SCFG/Manager.cpp

248 lines
6.3 KiB
C++
Raw Normal View History

2016-02-26 15:26:32 +03:00
/*
* Manager.cpp
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#include <boost/foreach.hpp>
2016-05-06 13:09:52 +03:00
#include <cstdlib>
2016-02-26 15:26:32 +03:00
#include <vector>
#include <sstream>
#include "Manager.h"
2016-03-03 16:04:27 +03:00
#include "InputPath.h"
2016-04-17 09:16:58 +03:00
#include "Hypothesis.h"
2016-04-30 13:47:51 +03:00
#include "TargetPhraseImpl.h"
2016-05-06 13:09:52 +03:00
#include "ActiveChart.h"
2016-04-27 21:59:03 +03:00
#include "Sentence.h"
2016-03-03 16:04:27 +03:00
#include "../System.h"
#include "../TranslationModel/PhraseTable.h"
2016-02-26 15:26:32 +03:00
using namespace std;
namespace Moses2
{
namespace SCFG
{
2016-03-31 23:00:16 +03:00
Manager::Manager(System &sys, const TranslationTask &task,
const std::string &inputStr, long translationId) :
ManagerBase(sys, task, inputStr, translationId)
2016-02-26 15:51:50 +03:00
{
}
2016-02-26 15:26:32 +03:00
2016-02-26 15:51:50 +03:00
Manager::~Manager()
{
2016-02-26 15:26:32 +03:00
}
void Manager::Decode()
{
2016-03-31 23:00:16 +03:00
// init pools etc
2016-04-16 16:56:15 +03:00
//cerr << "START InitPools()" << endl;
2016-03-31 23:00:16 +03:00
InitPools();
2016-04-16 16:56:15 +03:00
//cerr << "START ParseInput()" << endl;
2016-02-26 15:35:24 +03:00
2016-04-27 12:36:15 +03:00
FactorCollection &vocab = system.GetVocab();
m_input = Sentence::CreateFromString(GetPool(), vocab, system, m_inputStr,
m_translationId);
const Sentence &sentence = static_cast<const Sentence&>(GetInput());
size_t inputSize = sentence.GetSize();
2016-05-25 18:31:48 +03:00
//cerr << "inputSize=" << inputSize << endl;
2016-03-02 00:41:32 +03:00
2016-04-27 12:36:15 +03:00
m_inputPaths.Init(sentence, *this);
2016-04-16 16:56:15 +03:00
//cerr << "CREATED m_inputPaths" << endl;
2016-03-02 00:41:32 +03:00
2016-04-16 20:59:15 +03:00
m_stacks.Init(*this, inputSize);
2016-04-16 16:56:15 +03:00
//cerr << "CREATED m_stacks" << endl;
2016-03-01 02:28:24 +03:00
2016-04-16 20:59:15 +03:00
for (int startPos = inputSize - 1; startPos >= 0; --startPos) {
2016-03-31 23:00:16 +03:00
InitActiveChart(startPos);
2016-03-02 00:41:32 +03:00
int maxPhraseSize = inputSize - startPos + 1;
for (int phraseSize = 1; phraseSize < maxPhraseSize; ++phraseSize) {
2016-04-29 01:45:23 +03:00
InputPath &path = *m_inputPaths.GetMatrix().GetValue(startPos, phraseSize);
2016-05-25 18:31:48 +03:00
//cerr << endl << "path=" << path << endl;
2016-04-29 01:45:23 +03:00
Stack &stack = m_stacks.GetStack(startPos, phraseSize);
Lookup(path);
Decode(path, stack);
LookupUnary(path);
2016-04-29 02:09:02 +03:00
2016-04-29 14:23:43 +03:00
//cerr << "#rules=" << path.GetNumRules() << endl;
2016-03-31 23:00:16 +03:00
}
}
2016-04-17 21:33:21 +03:00
m_stacks.OutputStacks();
2016-02-26 15:26:32 +03:00
}
2016-03-02 00:41:32 +03:00
void Manager::InitActiveChart(size_t pos)
{
2016-04-15 15:38:01 +03:00
InputPath &path = *m_inputPaths.GetMatrix().GetValue(pos, 0);
2016-04-16 16:56:15 +03:00
//cerr << "pos=" << pos << " path=" << path << endl;
2016-03-31 23:00:16 +03:00
size_t numPt = system.mappings.size();
2016-04-16 16:56:15 +03:00
//cerr << "numPt=" << numPt << endl;
2016-03-31 23:00:16 +03:00
for (size_t i = 0; i < numPt; ++i) {
2016-04-14 17:55:13 +03:00
const PhraseTable &pt = *system.mappings[i];
2016-04-16 16:56:15 +03:00
//cerr << "START InitActiveChart" << endl;
2016-05-25 23:02:34 +03:00
pt.InitActiveChart(GetPool(), path);
2016-04-16 16:56:15 +03:00
//cerr << "FINISHED InitActiveChart" << endl;
2016-03-31 23:00:16 +03:00
}
2016-04-15 15:38:01 +03:00
}
2016-04-29 01:45:23 +03:00
void Manager::Lookup(InputPath &path)
2016-04-15 15:38:01 +03:00
{
size_t numPt = system.mappings.size();
2016-04-16 16:56:15 +03:00
//cerr << "numPt=" << numPt << endl;
2016-04-14 17:55:13 +03:00
2016-04-15 15:38:01 +03:00
for (size_t i = 0; i < numPt; ++i) {
const PhraseTable &pt = *system.mappings[i];
2016-05-25 18:22:24 +03:00
size_t maxChartSpan = system.maxChartSpans[i];
pt.Lookup(GetPool(), *this, maxChartSpan, m_stacks, path);
2016-04-15 15:38:01 +03:00
}
2016-04-16 16:56:15 +03:00
/*
2016-04-16 16:56:15 +03:00
size_t tpsNum = path.targetPhrases.GetSize();
if (tpsNum) {
2016-04-17 21:33:21 +03:00
cerr << tpsNum << " " << path << endl;
2016-04-16 16:56:15 +03:00
}
*/
2016-03-02 00:41:32 +03:00
}
2016-04-29 01:45:23 +03:00
void Manager::LookupUnary(InputPath &path)
2016-04-29 01:41:09 +03:00
{
size_t numPt = system.mappings.size();
//cerr << "numPt=" << numPt << endl;
for (size_t i = 0; i < numPt; ++i) {
const PhraseTable &pt = *system.mappings[i];
pt.LookupUnary(GetPool(), *this, m_stacks, path);
}
/*
size_t tpsNum = path.targetPhrases.GetSize();
if (tpsNum) {
cerr << tpsNum << " " << path << endl;
}
*/
}
2016-04-29 01:45:23 +03:00
void Manager::Decode(InputPath &path, Stack &stack)
2016-04-17 09:16:58 +03:00
{
2016-05-31 20:34:26 +03:00
//cerr << "path=" << path << endl;
2016-05-26 16:46:27 +03:00
boost::unordered_map<SCFG::SymbolBind, SCFG::TargetPhrases*>::const_iterator iterOuter;
2016-05-26 13:42:00 +03:00
for (iterOuter = path.targetPhrases->begin(); iterOuter != path.targetPhrases->end(); ++iterOuter) {
2016-04-20 22:22:57 +03:00
const SCFG::SymbolBind &symbolBind = iterOuter->first;
2016-05-26 16:46:27 +03:00
const SCFG::TargetPhrases &tps = *iterOuter->second;
2016-05-25 01:35:43 +03:00
//cerr << "symbolBind=" << symbolBind << " tps=" << tps.GetSize() << endl;
2016-04-20 19:13:05 +03:00
SCFG::TargetPhrases::const_iterator iter;
for (iter = tps.begin(); iter != tps.end(); ++iter) {
const SCFG::TargetPhraseImpl &tp = **iter;
2016-05-25 01:35:43 +03:00
//cerr << "tp=" << tp << endl;
2016-05-05 19:41:50 +03:00
ExpandHypo(path, symbolBind, tp, stack);
2016-04-20 19:13:05 +03:00
}
2016-04-17 09:16:58 +03:00
}
}
2016-05-31 20:34:26 +03:00
void Manager::ExpandHypo(
const SCFG::InputPath &path,
const SCFG::SymbolBind &symbolBind,
const SCFG::TargetPhraseImpl &tp,
Stack &stack)
{
Recycler<HypothesisBase*> &hypoRecycler = GetHypoRecycle();
std::vector<const SymbolBindElement*> ntEles = symbolBind.GetNTElements();
vector<size_t> prevHyposIndices(symbolBind.numNT);
assert(ntEles.size() == symbolBind.numNT);
//cerr << "ntEles:" << ntEles.size() << endl;
size_t ind = 0;
while (IncrPrevHypoIndices(prevHyposIndices, ind, ntEles)) {
SCFG::Hypothesis *hypo = new (GetPool().Allocate<SCFG::Hypothesis>()) SCFG::Hypothesis(GetPool(), system);
hypo->Init(*this, path, symbolBind, tp, prevHyposIndices);
hypo->EvaluateWhenApplied();
StackAdd added = stack.Add(hypo, hypoRecycler, arcLists);
//cerr << " added=" << added.added << " " << tp << endl;
++ind;
}
}
2016-05-06 13:09:52 +03:00
bool Manager::IncrPrevHypoIndices(
vector<size_t> &prevHyposIndices,
size_t ind,
const std::vector<const SymbolBindElement*> ntEles)
{
if (ntEles.size() == 0) {
// no nt. Do the 1st
return ind ? false : true;
}
size_t numHypos = 0;
2016-05-25 01:35:43 +03:00
//cerr << "IncrPrevHypoIndices:" << ind << " " << ntEles.size() << " ";
2016-05-06 13:09:52 +03:00
for (size_t i = 0; i < ntEles.size() - 1; ++i) {
const SymbolBindElement &ele = *ntEles[i];
Hypotheses &hypos = ele.hypos->GetSortedAndPruneHypos(*this, arcLists);
numHypos = hypos.size();
std::div_t divRet = std::div((int)ind, (int)numHypos);
ind = divRet.quot;
size_t hypoInd = divRet.rem;
prevHyposIndices[i] = hypoInd;
2016-05-25 01:35:43 +03:00
//cerr << "(" << i << "," << ind << "," << numHypos << "," << hypoInd << ")";
2016-05-06 13:09:52 +03:00
}
// last
prevHyposIndices.back() = ind;
// check if last is over limit
const SymbolBindElement &ele = *ntEles.back();
Hypotheses &hypos = ele.hypos->GetSortedAndPruneHypos(*this, arcLists);
numHypos = hypos.size();
2016-05-25 01:35:43 +03:00
//cerr << "(" << (ntEles.size() - 1) << "," << ind << "," << numHypos << "," << ind << ")";
//cerr << endl;
2016-05-06 13:09:52 +03:00
if (ind >= numHypos) {
return false;
}
else {
return true;
}
}
2016-05-06 17:41:50 +03:00
std::string Manager::OutputBest() const
{
stringstream out;
const Stack &lastStack = m_stacks.GetLastStack();
const Hypothesis *bestHypo = lastStack.GetBestHypo(*this, const_cast<ArcLists&>(arcLists));
if (bestHypo) {
bestHypo->OutputToStream(out);
2016-05-25 18:31:48 +03:00
cerr << "BEST TRANSLATION: " << *bestHypo << " " << out.str() << endl;
2016-05-06 17:41:50 +03:00
}
else {
cerr << "NO TRANSLATION " << m_input->GetTranslationId() << endl;
}
2016-05-25 18:31:48 +03:00
out << endl;
2016-05-06 17:41:50 +03:00
return out.str();
2016-02-26 15:26:32 +03:00
}
2016-05-06 17:41:50 +03:00
} // namespace
2016-02-26 15:26:32 +03:00
}