2013-05-27 18:54:50 +04:00
|
|
|
#include <stdexcept>
|
|
|
|
|
2012-11-15 22:24:21 +04:00
|
|
|
#include "moses/Incremental.h"
|
2012-10-11 19:38:39 +04:00
|
|
|
|
2012-11-12 23:56:18 +04:00
|
|
|
#include "moses/ChartCell.h"
|
2012-11-15 22:04:07 +04:00
|
|
|
#include "moses/ChartParserCallback.h"
|
2012-11-16 18:46:10 +04:00
|
|
|
#include "moses/FeatureVector.h"
|
2012-11-12 23:56:18 +04:00
|
|
|
#include "moses/StaticData.h"
|
2012-11-16 18:46:10 +04:00
|
|
|
#include "moses/Util.h"
|
2013-05-27 18:54:50 +04:00
|
|
|
#include "moses/LM/Base.h"
|
2014-12-02 22:09:10 +03:00
|
|
|
#include "moses/OutputCollector.h"
|
2012-10-11 19:38:39 +04:00
|
|
|
|
2012-11-15 22:04:07 +04:00
|
|
|
#include "lm/model.hh"
|
|
|
|
#include "search/applied.hh"
|
2012-10-11 19:38:39 +04:00
|
|
|
#include "search/config.hh"
|
2012-11-15 22:04:07 +04:00
|
|
|
#include "search/context.hh"
|
|
|
|
#include "search/edge_generator.hh"
|
|
|
|
#include "search/rule.hh"
|
|
|
|
#include "search/vertex_generator.hh"
|
2012-10-11 19:38:39 +04:00
|
|
|
|
2012-10-12 17:38:07 +04:00
|
|
|
#include <boost/lexical_cast.hpp>
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
|
|
|
namespace Incremental
|
|
|
|
{
|
|
|
|
namespace
|
|
|
|
{
|
2012-11-15 22:04:07 +04:00
|
|
|
|
|
|
|
// This is called by EdgeGenerator. Route hypotheses to separate vertices for
|
2013-05-29 21:16:15 +04:00
|
|
|
// each left hand side label, populating ChartCellLabelSet out.
|
|
|
|
template <class Best> class HypothesisCallback
|
|
|
|
{
|
|
|
|
private:
|
|
|
|
typedef search::VertexGenerator<Best> Gen;
|
|
|
|
public:
|
|
|
|
HypothesisCallback(search::ContextBase &context, Best &best, ChartCellLabelSet &out, boost::object_pool<search::Vertex> &vertex_pool)
|
|
|
|
: context_(context), best_(best), out_(out), vertex_pool_(vertex_pool) {}
|
|
|
|
|
|
|
|
void NewHypothesis(search::PartialEdge partial) {
|
|
|
|
// Get the LHS, look it up in the output ChartCellLabel, and upcast it.
|
|
|
|
// It's not part of the union because it would have been ugly to expose template types in ChartCellLabel.
|
|
|
|
ChartCellLabel::Stack &stack = out_.FindOrInsert(static_cast<const TargetPhrase *>(partial.GetNote().vp)->GetTargetLHS());
|
|
|
|
Gen *entry = static_cast<Gen*>(stack.incr_generator);
|
|
|
|
if (!entry) {
|
2014-06-03 19:48:10 +04:00
|
|
|
entry = generator_pool_.construct(boost::ref(context_), boost::ref(*vertex_pool_.construct()), boost::ref(best_));
|
2013-05-29 21:16:15 +04:00
|
|
|
stack.incr_generator = entry;
|
2012-11-15 22:04:07 +04:00
|
|
|
}
|
2013-05-29 21:16:15 +04:00
|
|
|
entry->NewHypothesis(partial);
|
|
|
|
}
|
2012-11-15 22:04:07 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
void FinishedSearch() {
|
|
|
|
for (ChartCellLabelSet::iterator i(out_.mutable_begin()); i != out_.mutable_end(); ++i) {
|
2014-03-21 14:53:15 +04:00
|
|
|
if ((*i) == NULL) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
ChartCellLabel::Stack &stack = (*i)->MutableStack();
|
2013-05-29 21:16:15 +04:00
|
|
|
Gen *gen = static_cast<Gen*>(stack.incr_generator);
|
|
|
|
gen->FinishedSearch();
|
|
|
|
stack.incr = &gen->Generating();
|
2012-11-15 22:04:07 +04:00
|
|
|
}
|
2013-05-29 21:16:15 +04:00
|
|
|
}
|
2012-11-15 22:04:07 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
private:
|
|
|
|
search::ContextBase &context_;
|
2012-11-15 22:04:07 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
Best &best_;
|
2012-11-15 22:04:07 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
ChartCellLabelSet &out_;
|
2012-11-15 22:04:07 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
boost::object_pool<search::Vertex> &vertex_pool_;
|
|
|
|
boost::object_pool<Gen> generator_pool_;
|
2012-11-15 22:04:07 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
// This is called by the moses parser to collect hypotheses. It converts to my
|
2013-05-29 21:16:15 +04:00
|
|
|
// edges (search::PartialEdge).
|
|
|
|
template <class Model> class Fill : public ChartParserCallback
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
Fill(search::Context<Model> &context, const std::vector<lm::WordIndex> &vocab_mapping, search::Score oov_weight)
|
|
|
|
: context_(context), vocab_mapping_(vocab_mapping), oov_weight_(oov_weight) {}
|
2012-11-15 22:04:07 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
void Add(const TargetPhraseCollection &targets, const StackVec &nts, const WordsRange &ignored);
|
2012-11-15 22:04:07 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
void AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &waste_memory, const WordsRange &range);
|
2012-11-15 22:04:07 +04:00
|
|
|
|
2014-03-26 15:23:23 +04:00
|
|
|
float GetBestScore(const ChartCellLabel *chartCell) const;
|
2014-03-21 14:53:15 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
bool Empty() const {
|
|
|
|
return edges_.Empty();
|
|
|
|
}
|
2012-11-15 22:04:07 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
template <class Best> void Search(Best &best, ChartCellLabelSet &out, boost::object_pool<search::Vertex> &vertex_pool) {
|
|
|
|
HypothesisCallback<Best> callback(context_, best, out, vertex_pool);
|
|
|
|
edges_.Search(context_, callback);
|
|
|
|
}
|
2012-11-15 22:04:07 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
// Root: everything into one vertex.
|
|
|
|
template <class Best> search::History RootSearch(Best &best) {
|
|
|
|
search::Vertex vertex;
|
|
|
|
search::RootVertexGenerator<Best> gen(vertex, best);
|
|
|
|
edges_.Search(context_, gen);
|
|
|
|
return vertex.BestChild();
|
|
|
|
}
|
2012-11-16 18:46:10 +04:00
|
|
|
|
2014-08-08 21:11:30 +04:00
|
|
|
void EvaluateWithSourceContext(const InputType &input, const InputPath &inputPath) {
|
2013-08-16 00:14:04 +04:00
|
|
|
// TODO for input lattice
|
2013-08-13 15:12:58 +04:00
|
|
|
}
|
2013-05-29 21:16:15 +04:00
|
|
|
private:
|
|
|
|
lm::WordIndex Convert(const Word &word) const;
|
2012-11-15 22:04:07 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
search::Context<Model> &context_;
|
2012-11-15 22:04:07 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
const std::vector<lm::WordIndex> &vocab_mapping_;
|
2012-11-15 22:04:07 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
search::EdgeGenerator edges_;
|
2012-11-15 22:04:07 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
const search::Score oov_weight_;
|
2012-11-15 22:04:07 +04:00
|
|
|
};
|
|
|
|
|
2014-03-21 14:53:15 +04:00
|
|
|
template <class Model> void Fill<Model>::Add(const TargetPhraseCollection &targets, const StackVec &nts, const WordsRange &range)
|
2013-05-29 21:16:15 +04:00
|
|
|
{
|
2012-11-15 22:04:07 +04:00
|
|
|
std::vector<search::PartialVertex> vertices;
|
|
|
|
vertices.reserve(nts.size());
|
|
|
|
float below_score = 0.0;
|
|
|
|
for (StackVec::const_iterator i(nts.begin()); i != nts.end(); ++i) {
|
2013-02-14 17:11:53 +04:00
|
|
|
vertices.push_back((*i)->GetStack().incr->RootAlternate());
|
2014-03-26 15:23:23 +04:00
|
|
|
below_score += (*i)->GetBestScore(this);
|
2012-11-15 22:04:07 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<lm::WordIndex> words;
|
|
|
|
for (TargetPhraseCollection::const_iterator p(targets.begin()); p != targets.end(); ++p) {
|
|
|
|
words.clear();
|
|
|
|
const TargetPhrase &phrase = **p;
|
|
|
|
const AlignmentInfo::NonTermIndexMap &align = phrase.GetAlignNonTerm().GetNonTermIndexMap();
|
|
|
|
search::PartialEdge edge(edges_.AllocateEdge(nts.size()));
|
|
|
|
|
|
|
|
search::PartialVertex *nt = edge.NT();
|
|
|
|
for (size_t i = 0; i < phrase.GetSize(); ++i) {
|
|
|
|
const Word &word = phrase.GetWord(i);
|
|
|
|
if (word.IsNonTerminal()) {
|
|
|
|
*(nt++) = vertices[align[i]];
|
|
|
|
words.push_back(search::kNonTerminal);
|
|
|
|
} else {
|
|
|
|
words.push_back(Convert(word));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
edge.SetScore(phrase.GetFutureScore() + below_score);
|
2013-05-29 21:16:15 +04:00
|
|
|
// prob and oov were already accounted for.
|
2012-11-15 22:04:07 +04:00
|
|
|
search::ScoreRule(context_.LanguageModel(), words, edge.Between());
|
|
|
|
|
|
|
|
search::Note note;
|
|
|
|
note.vp = &phrase;
|
|
|
|
edge.SetNote(note);
|
2014-04-04 18:54:48 +04:00
|
|
|
edge.SetRange(range);
|
2012-11-15 22:04:07 +04:00
|
|
|
|
|
|
|
edges_.AddEdge(edge);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-04-04 18:54:48 +04:00
|
|
|
template <class Model> void Fill<Model>::AddPhraseOOV(TargetPhrase &phrase, std::list<TargetPhraseCollection*> &, const WordsRange &range)
|
2013-05-29 21:16:15 +04:00
|
|
|
{
|
2012-11-15 22:04:07 +04:00
|
|
|
std::vector<lm::WordIndex> words;
|
2013-11-23 00:27:46 +04:00
|
|
|
UTIL_THROW_IF2(phrase.GetSize() > 1,
|
2014-01-15 19:42:02 +04:00
|
|
|
"OOV target phrase should be 0 or 1 word in length");
|
2012-11-15 22:04:07 +04:00
|
|
|
if (phrase.GetSize())
|
|
|
|
words.push_back(Convert(phrase.GetWord(0)));
|
|
|
|
|
|
|
|
search::PartialEdge edge(edges_.AllocateEdge(0));
|
2013-05-29 21:16:15 +04:00
|
|
|
// Appears to be a bug that FutureScore does not already include language model.
|
2012-11-15 22:04:07 +04:00
|
|
|
search::ScoreRuleRet scored(search::ScoreRule(context_.LanguageModel(), words, edge.Between()));
|
|
|
|
edge.SetScore(phrase.GetFutureScore() + scored.prob * context_.LMWeight() + static_cast<search::Score>(scored.oov) * oov_weight_);
|
|
|
|
|
|
|
|
search::Note note;
|
|
|
|
note.vp = &phrase;
|
|
|
|
edge.SetNote(note);
|
2014-04-04 18:54:48 +04:00
|
|
|
edge.SetRange(range);
|
2012-11-15 22:04:07 +04:00
|
|
|
|
|
|
|
edges_.AddEdge(edge);
|
|
|
|
}
|
|
|
|
|
2014-03-26 15:23:23 +04:00
|
|
|
// for pruning
|
|
|
|
template <class Model> float Fill<Model>::GetBestScore(const ChartCellLabel *chartCell) const
|
2014-03-21 14:53:15 +04:00
|
|
|
{
|
2014-05-19 17:34:27 +04:00
|
|
|
search::PartialVertex vertex = chartCell->GetStack().incr->RootAlternate();
|
|
|
|
UTIL_THROW_IF2(vertex.Empty(), "hypothesis with empty stack");
|
|
|
|
return vertex.Bound();
|
2014-03-21 14:53:15 +04:00
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
// TODO: factors (but chart doesn't seem to support factors anyway).
|
|
|
|
template <class Model> lm::WordIndex Fill<Model>::Convert(const Word &word) const
|
|
|
|
{
|
2012-11-15 22:04:07 +04:00
|
|
|
std::size_t factor = word.GetFactor(0)->GetId();
|
|
|
|
return (factor >= vocab_mapping_.size() ? 0 : vocab_mapping_[factor]);
|
|
|
|
}
|
|
|
|
|
2012-10-11 19:38:39 +04:00
|
|
|
struct ChartCellBaseFactory {
|
|
|
|
ChartCellBase *operator()(size_t startPos, size_t endPos) const {
|
|
|
|
return new ChartCellBase(startPos, endPos);
|
|
|
|
}
|
|
|
|
};
|
2012-11-15 22:04:07 +04:00
|
|
|
|
2012-10-11 19:38:39 +04:00
|
|
|
} // namespace
|
|
|
|
|
2013-05-11 17:13:26 +04:00
|
|
|
Manager::Manager(const InputType &source) :
|
2014-12-06 00:33:59 +03:00
|
|
|
BaseManager(source),
|
2012-10-11 19:38:39 +04:00
|
|
|
cells_(source, ChartCellBaseFactory()),
|
2013-05-11 17:13:26 +04:00
|
|
|
parser_(source, cells_),
|
2012-11-16 18:46:10 +04:00
|
|
|
n_best_(search::NBestConfig(StaticData::Instance().GetNBestSize())) {}
|
2012-10-11 19:38:39 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
Manager::~Manager()
|
|
|
|
{
|
2012-10-11 19:38:39 +04:00
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
template <class Model, class Best> search::History Manager::PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best &out)
|
|
|
|
{
|
2013-05-28 20:35:06 +04:00
|
|
|
const LanguageModel &abstract = LanguageModel::GetFirstLM();
|
2012-11-15 22:04:07 +04:00
|
|
|
const float oov_weight = abstract.OOVFeatureEnabled() ? abstract.GetOOVWeight() : 0.0;
|
|
|
|
const StaticData &data = StaticData::Instance();
|
2013-08-02 20:56:11 +04:00
|
|
|
search::Config config(abstract.GetWeight() * M_LN10, data.GetCubePruningPopLimit(), search::NBestConfig(data.GetNBestSize()));
|
2012-10-11 19:38:39 +04:00
|
|
|
search::Context<Model> context(config, model);
|
|
|
|
|
2014-12-06 00:33:59 +03:00
|
|
|
size_t size = m_source.GetSize();
|
2012-10-15 14:43:43 +04:00
|
|
|
boost::object_pool<search::Vertex> vertex_pool(std::max<size_t>(size * size / 2, 32));
|
2013-05-29 21:16:15 +04:00
|
|
|
|
2014-03-21 14:53:15 +04:00
|
|
|
for (int startPos = size-1; startPos >= 0; --startPos) {
|
|
|
|
for (size_t width = 1; width <= size-startPos; ++width) {
|
|
|
|
// full range uses RootSearch
|
|
|
|
if (startPos == 0 && startPos + width == size) {
|
|
|
|
break;
|
|
|
|
}
|
2012-11-16 18:46:10 +04:00
|
|
|
WordsRange range(startPos, startPos + width - 1);
|
2012-11-15 22:04:07 +04:00
|
|
|
Fill<Model> filler(context, words, oov_weight);
|
2012-10-12 16:53:08 +04:00
|
|
|
parser_.Create(range, filler);
|
2012-11-16 18:46:10 +04:00
|
|
|
filler.Search(out, cells_.MutableBase(range).MutableTargetLabelSet(), vertex_pool);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
WordsRange range(0, size - 1);
|
|
|
|
Fill<Model> filler(context, words, oov_weight);
|
|
|
|
parser_.Create(range, filler);
|
|
|
|
return filler.RootSearch(out);
|
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
template <class Model> void Manager::LMCallback(const Model &model, const std::vector<lm::WordIndex> &words)
|
|
|
|
{
|
2012-11-16 18:46:10 +04:00
|
|
|
std::size_t nbest = StaticData::Instance().GetNBestSize();
|
|
|
|
if (nbest <= 1) {
|
|
|
|
search::History ret = PopulateBest(model, words, single_best_);
|
|
|
|
if (ret) {
|
|
|
|
backing_for_single_.resize(1);
|
|
|
|
backing_for_single_[0] = search::Applied(ret);
|
|
|
|
} else {
|
|
|
|
backing_for_single_.clear();
|
|
|
|
}
|
|
|
|
completed_nbest_ = &backing_for_single_;
|
|
|
|
} else {
|
|
|
|
search::History ret = PopulateBest(model, words, n_best_);
|
|
|
|
if (ret) {
|
|
|
|
completed_nbest_ = &n_best_.Extract(ret);
|
|
|
|
} else {
|
|
|
|
backing_for_single_.clear();
|
|
|
|
completed_nbest_ = &backing_for_single_;
|
2012-10-11 19:38:39 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-12 16:53:08 +04:00
|
|
|
template void Manager::LMCallback<lm::ngram::ProbingModel>(const lm::ngram::ProbingModel &model, const std::vector<lm::WordIndex> &words);
|
|
|
|
template void Manager::LMCallback<lm::ngram::RestProbingModel>(const lm::ngram::RestProbingModel &model, const std::vector<lm::WordIndex> &words);
|
|
|
|
template void Manager::LMCallback<lm::ngram::TrieModel>(const lm::ngram::TrieModel &model, const std::vector<lm::WordIndex> &words);
|
|
|
|
template void Manager::LMCallback<lm::ngram::QuantTrieModel>(const lm::ngram::QuantTrieModel &model, const std::vector<lm::WordIndex> &words);
|
|
|
|
template void Manager::LMCallback<lm::ngram::ArrayTrieModel>(const lm::ngram::ArrayTrieModel &model, const std::vector<lm::WordIndex> &words);
|
|
|
|
template void Manager::LMCallback<lm::ngram::QuantArrayTrieModel>(const lm::ngram::QuantArrayTrieModel &model, const std::vector<lm::WordIndex> &words);
|
|
|
|
|
2014-12-05 20:59:53 +03:00
|
|
|
void Manager::Decode()
|
2013-05-29 21:16:15 +04:00
|
|
|
{
|
2013-05-28 20:35:06 +04:00
|
|
|
LanguageModel::GetFirstLM().IncrementalCallback(*this);
|
2014-12-05 20:59:53 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
const std::vector<search::Applied> &Manager::GetNBest() const
|
|
|
|
{
|
2012-11-16 18:46:10 +04:00
|
|
|
return *completed_nbest_;
|
|
|
|
}
|
|
|
|
|
2014-12-10 14:28:47 +03:00
|
|
|
void Manager::OutputBest(OutputCollector *collector) const
|
|
|
|
{
|
|
|
|
const long translationId = m_source.GetTranslationId();
|
|
|
|
const std::vector<search::Applied> &nbest = GetNBest();
|
|
|
|
if (!nbest.empty()) {
|
|
|
|
OutputBestHypo(collector, nbest[0], translationId);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
OutputBestNone(collector, translationId);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-12-02 22:09:10 +03:00
|
|
|
void Manager::OutputNBest(OutputCollector *collector) const
|
|
|
|
{
|
|
|
|
if (collector == NULL) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-12-06 00:33:59 +03:00
|
|
|
OutputNBestList(collector, *completed_nbest_, m_source.GetTranslationId());
|
2014-12-02 22:09:10 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
void Manager::OutputNBestList(OutputCollector *collector, const std::vector<search::Applied> &nbest, long translationId) const
|
|
|
|
{
|
|
|
|
const StaticData &staticData = StaticData::Instance();
|
|
|
|
const std::vector<Moses::FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
|
|
|
|
|
|
|
|
std::ostringstream out;
|
|
|
|
// wtf? copied from the original OutputNBestList
|
|
|
|
if (collector->OutputIsCout()) {
|
|
|
|
FixPrecision(out);
|
|
|
|
}
|
|
|
|
Phrase outputPhrase;
|
|
|
|
ScoreComponentCollection features;
|
|
|
|
for (std::vector<search::Applied>::const_iterator i = nbest.begin(); i != nbest.end(); ++i) {
|
|
|
|
Incremental::PhraseAndFeatures(*i, outputPhrase, features);
|
|
|
|
// <s> and </s>
|
|
|
|
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
|
|
|
|
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
|
|
|
|
|
|
|
|
outputPhrase.RemoveWord(0);
|
|
|
|
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
|
|
|
|
out << translationId << " ||| ";
|
|
|
|
OutputSurface(out, outputPhrase, outputFactorOrder, false);
|
|
|
|
out << " ||| ";
|
|
|
|
OutputAllFeatureScores(features, out);
|
|
|
|
out << " ||| " << i->GetScore() << '\n';
|
|
|
|
}
|
|
|
|
out << std::flush;
|
|
|
|
assert(collector);
|
|
|
|
collector->Write(translationId, out.str());
|
|
|
|
}
|
|
|
|
|
2014-12-03 20:04:10 +03:00
|
|
|
void Manager::OutputDetailedTranslationReport(OutputCollector *collector) const
|
|
|
|
{
|
|
|
|
if (collector && !completed_nbest_->empty()) {
|
|
|
|
const search::Applied &applied = completed_nbest_->at(0);
|
|
|
|
OutputDetailedTranslationReport(collector,
|
|
|
|
&applied,
|
2014-12-06 00:33:59 +03:00
|
|
|
static_cast<const Sentence&>(m_source),
|
|
|
|
m_source.GetTranslationId());
|
2014-12-03 20:04:10 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
void Manager::OutputDetailedTranslationReport(
|
|
|
|
OutputCollector *collector,
|
|
|
|
const search::Applied *applied,
|
|
|
|
const Sentence &sentence,
|
|
|
|
long translationId) const
|
|
|
|
{
|
|
|
|
if (applied == NULL) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
std::ostringstream out;
|
|
|
|
ApplicationContext applicationContext;
|
|
|
|
|
|
|
|
OutputTranslationOptions(out, applicationContext, applied, sentence, translationId);
|
|
|
|
collector->Write(translationId, out.str());
|
|
|
|
}
|
|
|
|
|
|
|
|
void Manager::OutputTranslationOptions(std::ostream &out,
|
|
|
|
ApplicationContext &applicationContext,
|
|
|
|
const search::Applied *applied,
|
|
|
|
const Sentence &sentence, long translationId) const
|
|
|
|
{
|
|
|
|
if (applied != NULL) {
|
|
|
|
OutputTranslationOption(out, applicationContext, applied, sentence, translationId);
|
|
|
|
out << std::endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
// recursive
|
|
|
|
const search::Applied *child = applied->Children();
|
|
|
|
for (size_t i = 0; i < applied->GetArity(); i++) {
|
|
|
|
OutputTranslationOptions(out, applicationContext, child++, sentence, translationId);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Manager::OutputTranslationOption(std::ostream &out,
|
|
|
|
ApplicationContext &applicationContext,
|
|
|
|
const search::Applied *applied,
|
|
|
|
const Sentence &sentence,
|
|
|
|
long translationId) const
|
|
|
|
{
|
|
|
|
ReconstructApplicationContext(applied, sentence, applicationContext);
|
|
|
|
const TargetPhrase &phrase = *static_cast<const TargetPhrase*>(applied->GetNote().vp);
|
|
|
|
out << "Trans Opt " << translationId
|
|
|
|
<< " " << applied->GetRange()
|
|
|
|
<< ": ";
|
|
|
|
WriteApplicationContext(out, applicationContext);
|
|
|
|
out << ": " << phrase.GetTargetLHS()
|
|
|
|
<< "->" << phrase
|
|
|
|
<< " " << applied->GetScore(); // << hypo->GetScoreBreakdown() TODO: missing in incremental search hypothesis
|
|
|
|
}
|
|
|
|
|
|
|
|
// Given a hypothesis and sentence, reconstructs the 'application context' --
|
|
|
|
// the source RHS symbols of the SCFG rule that was applied, plus their spans.
|
|
|
|
void Manager::ReconstructApplicationContext(const search::Applied *applied,
|
|
|
|
const Sentence &sentence,
|
|
|
|
ApplicationContext &context) const
|
|
|
|
{
|
|
|
|
context.clear();
|
|
|
|
const WordsRange &span = applied->GetRange();
|
|
|
|
const search::Applied *child = applied->Children();
|
|
|
|
size_t i = span.GetStartPos();
|
|
|
|
size_t j = 0;
|
|
|
|
|
|
|
|
while (i <= span.GetEndPos()) {
|
|
|
|
if (j == applied->GetArity() || i < child->GetRange().GetStartPos()) {
|
|
|
|
// Symbol is a terminal.
|
|
|
|
const Word &symbol = sentence.GetWord(i);
|
|
|
|
context.push_back(std::make_pair(symbol, WordsRange(i, i)));
|
|
|
|
++i;
|
|
|
|
} else {
|
|
|
|
// Symbol is a non-terminal.
|
|
|
|
const Word &symbol = static_cast<const TargetPhrase*>(child->GetNote().vp)->GetTargetLHS();
|
|
|
|
const WordsRange &range = child->GetRange();
|
|
|
|
context.push_back(std::make_pair(symbol, range));
|
|
|
|
i = range.GetEndPos()+1;
|
|
|
|
++child;
|
|
|
|
++j;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-12-02 22:09:10 +03:00
|
|
|
|
2014-12-04 21:35:19 +03:00
|
|
|
void Manager::OutputDetailedTreeFragmentsTranslationReport(OutputCollector *collector) const
|
|
|
|
{
|
|
|
|
if (collector == NULL || Completed().empty()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
const search::Applied *applied = &Completed()[0];
|
2014-12-06 00:33:59 +03:00
|
|
|
const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
|
|
|
|
const size_t translationId = m_source.GetTranslationId();
|
2014-12-04 21:35:19 +03:00
|
|
|
|
|
|
|
std::ostringstream out;
|
|
|
|
ApplicationContext applicationContext;
|
|
|
|
|
|
|
|
OutputTreeFragmentsTranslationOptions(out, applicationContext, applied, sentence, translationId);
|
|
|
|
|
|
|
|
//Tree of full sentence
|
|
|
|
//TODO: incremental search doesn't support stateful features
|
|
|
|
|
|
|
|
collector->Write(translationId, out.str());
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
void Manager::OutputTreeFragmentsTranslationOptions(std::ostream &out,
|
|
|
|
ApplicationContext &applicationContext,
|
|
|
|
const search::Applied *applied,
|
|
|
|
const Sentence &sentence,
|
|
|
|
long translationId) const
|
|
|
|
{
|
|
|
|
|
|
|
|
if (applied != NULL) {
|
|
|
|
OutputTranslationOption(out, applicationContext, applied, sentence, translationId);
|
|
|
|
|
|
|
|
const TargetPhrase &currTarPhr = *static_cast<const TargetPhrase*>(applied->GetNote().vp);
|
|
|
|
|
|
|
|
out << " ||| ";
|
|
|
|
if (const PhraseProperty *property = currTarPhr.GetProperty("Tree")) {
|
|
|
|
out << " " << *property->GetValueString();
|
|
|
|
} else {
|
|
|
|
out << " " << "noTreeInfo";
|
|
|
|
}
|
|
|
|
out << std::endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
// recursive
|
|
|
|
const search::Applied *child = applied->Children();
|
|
|
|
for (size_t i = 0; i < applied->GetArity(); i++) {
|
|
|
|
OutputTreeFragmentsTranslationOptions(out, applicationContext, child++, sentence, translationId);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-12-10 14:28:47 +03:00
|
|
|
void Manager::OutputBestHypo(OutputCollector *collector, search::Applied applied, long translationId) const
|
|
|
|
{
|
|
|
|
if (collector == NULL) return;
|
|
|
|
std::ostringstream out;
|
|
|
|
FixPrecision(out);
|
|
|
|
if (StaticData::Instance().GetOutputHypoScore()) {
|
|
|
|
out << applied.GetScore() << ' ';
|
|
|
|
}
|
|
|
|
Phrase outPhrase;
|
|
|
|
Incremental::ToPhrase(applied, outPhrase);
|
|
|
|
// delete 1st & last
|
|
|
|
UTIL_THROW_IF2(outPhrase.GetSize() < 2,
|
|
|
|
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
|
|
|
|
outPhrase.RemoveWord(0);
|
|
|
|
outPhrase.RemoveWord(outPhrase.GetSize() - 1);
|
|
|
|
out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
|
|
|
|
out << '\n';
|
|
|
|
collector->Write(translationId, out.str());
|
|
|
|
|
|
|
|
VERBOSE(1,"BEST TRANSLATION: " << outPhrase << "[total=" << applied.GetScore() << "]" << std::endl);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Manager::OutputBestNone(OutputCollector *collector, long translationId) const
|
|
|
|
{
|
|
|
|
if (collector == NULL) return;
|
|
|
|
if (StaticData::Instance().GetOutputHypoScore()) {
|
|
|
|
collector->Write(translationId, "0 \n");
|
|
|
|
} else {
|
|
|
|
collector->Write(translationId, "\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
namespace
|
|
|
|
{
|
2012-11-16 18:46:10 +04:00
|
|
|
|
|
|
|
struct NoOp {
|
|
|
|
void operator()(const TargetPhrase &) const {}
|
|
|
|
};
|
|
|
|
struct AccumScore {
|
|
|
|
AccumScore(ScoreComponentCollection &out) : out_(&out) {}
|
|
|
|
void operator()(const TargetPhrase &phrase) {
|
|
|
|
out_->PlusEquals(phrase.GetScoreBreakdown());
|
|
|
|
}
|
|
|
|
ScoreComponentCollection *out_;
|
|
|
|
};
|
2013-05-29 21:16:15 +04:00
|
|
|
template <class Action> void AppendToPhrase(const search::Applied final, Phrase &out, Action action)
|
|
|
|
{
|
2012-11-16 18:46:10 +04:00
|
|
|
assert(final.Valid());
|
|
|
|
const TargetPhrase &phrase = *static_cast<const TargetPhrase*>(final.GetNote().vp);
|
|
|
|
action(phrase);
|
|
|
|
const search::Applied *child = final.Children();
|
|
|
|
for (std::size_t i = 0; i < phrase.GetSize(); ++i) {
|
|
|
|
const Word &word = phrase.GetWord(i);
|
|
|
|
if (word.IsNonTerminal()) {
|
|
|
|
AppendToPhrase(*child++, out, action);
|
|
|
|
} else {
|
|
|
|
out.AddWord(word);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
void ToPhrase(const search::Applied final, Phrase &out)
|
|
|
|
{
|
2012-11-16 18:46:10 +04:00
|
|
|
out.Clear();
|
|
|
|
AppendToPhrase(final, out, NoOp());
|
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
void PhraseAndFeatures(const search::Applied final, Phrase &phrase, ScoreComponentCollection &features)
|
|
|
|
{
|
2012-11-16 18:46:10 +04:00
|
|
|
phrase.Clear();
|
|
|
|
features.ZeroAll();
|
|
|
|
AppendToPhrase(final, phrase, AccumScore(features));
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
// If we made it this far, there is only one language model.
|
2012-11-16 18:46:10 +04:00
|
|
|
float full, ignored_ngram;
|
|
|
|
std::size_t ignored_oov;
|
2012-12-20 20:38:15 +04:00
|
|
|
|
2013-05-28 20:35:06 +04:00
|
|
|
const LanguageModel &model = LanguageModel::GetFirstLM();
|
2012-11-16 18:46:10 +04:00
|
|
|
model.CalcScore(phrase, full, ignored_ngram, ignored_oov);
|
2014-07-10 02:54:16 +04:00
|
|
|
// CalcScore transforms, but EvaluateWhenApplied doesn't.
|
2013-04-26 22:39:29 +04:00
|
|
|
features.Assign(&model, full);
|
2012-10-11 19:38:39 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace Incremental
|
|
|
|
} // namespace Moses
|