mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 21:42:19 +03:00
5240c430ce
This adds a new string-to-tree decoder, which can be enabled with the -s2t option. It's intended to be faster and simpler than the generic chart decoder, and is designed to support lattice input (still WIP). For a en-de system trained on WMT14 data, it's approximately 40% faster in practice. For background information, see the decoding section of the EMNLP tutorial on syntax-based MT: http://www.emnlp2014.org/tutorials/5_notes.pdf Some features are not implemented yet, including support for internal tree structure and soft source-syntactic constraints.
60 lines
1.8 KiB
C++
60 lines
1.8 KiB
C++
#include "SHyperedge.h"
|
|
|
|
#include "moses/StaticData.h"
|
|
|
|
#include "SVertex.h"
|
|
|
|
namespace Moses
|
|
{
|
|
namespace Syntax
|
|
{
|
|
|
|
Phrase GetOneBestTargetYield(const SHyperedge &h)
|
|
{
|
|
FactorType placeholderFactor = StaticData::Instance().GetPlaceholderFactor();
|
|
|
|
Phrase ret(ARRAY_SIZE_INCR);
|
|
|
|
const AlignmentInfo::NonTermIndexMap &targetToSourceMap =
|
|
h.translation->GetAlignNonTerm().GetNonTermIndexMap2();
|
|
|
|
for (std::size_t pos = 0; pos < h.translation->GetSize(); ++pos) {
|
|
const Word &word = h.translation->GetWord(pos);
|
|
if (word.IsNonTerminal()) {
|
|
std::size_t sourceIndex = targetToSourceMap[pos];
|
|
const SHyperedge &incoming = *h.tail[sourceIndex]->best;
|
|
Phrase subPhrase = GetOneBestTargetYield(incoming);
|
|
ret.Append(subPhrase);
|
|
} else {
|
|
ret.AddWord(word);
|
|
if (placeholderFactor == NOT_FOUND) {
|
|
continue;
|
|
}
|
|
assert(false);
|
|
// FIXME Modify this chunk of code to work for SHyperedge.
|
|
/*
|
|
std::set<std::size_t> sourcePosSet =
|
|
h.translation->GetAlignTerm().GetAlignmentsForTarget(pos);
|
|
if (sourcePosSet.size() == 1) {
|
|
const std::vector<const Word*> *ruleSourceFromInputPath =
|
|
hypo.GetTranslationOption().GetSourceRuleFromInputPath();
|
|
UTIL_THROW_IF2(ruleSourceFromInputPath == NULL,
|
|
"Source Words in of the rules hasn't been filled out");
|
|
std::size_t sourcePos = *sourcePosSet.begin();
|
|
const Word *sourceWord = ruleSourceFromInputPath->at(sourcePos);
|
|
UTIL_THROW_IF2(sourceWord == NULL,
|
|
"Null source word at position " << sourcePos);
|
|
const Factor *factor = sourceWord->GetFactor(placeholderFactor);
|
|
if (factor) {
|
|
ret.Back()[0] = factor;
|
|
}
|
|
}
|
|
*/
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
} // Syntax
|
|
} // Moses
|