mosesdecoder/moses/DecodeStepTranslation.cpp

277 lines
10 KiB
C++
Raw Normal View History

// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "DecodeStepTranslation.h"
#include "TranslationOption.h"
#include "TranslationOptionCollection.h"
#include "PartialTranslOptColl.h"
#include "FactorCollection.h"
#include "util/exception.hh"
using namespace std;
namespace Moses
{
DecodeStepTranslation::DecodeStepTranslation(PhraseDictionary* pdf,
2013-05-30 15:51:40 +04:00
const DecodeStep* prev,
const std::vector<FeatureFunction*> &features)
: DecodeStep(pdf, prev, features)
{
// don't apply feature functions that are from current phrase table.It should already have been
// dont by the phrase table.
const std::vector<FeatureFunction*> &pdfFeatures = pdf->GetFeaturesToApply();
for (size_t i = 0; i < pdfFeatures.size(); ++i) {
FeatureFunction *ff = pdfFeatures[i];
RemoveFeature(ff);
}
}
2013-06-28 21:15:12 +04:00
void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslOpt
2013-07-05 02:38:18 +04:00
, const DecodeStep &decodeStep
, PartialTranslOptColl &outputPartialTranslOptColl
, TranslationOptionCollection *toc
, bool adhereTableLimit
, const TargetPhraseCollection *phraseColl) const
2013-06-28 21:15:12 +04:00
{
if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0) {
2013-07-05 02:38:18 +04:00
// word deletion
outputPartialTranslOptColl.Add(new TranslationOption(inputPartialTranslOpt));
return;
2013-06-28 21:15:12 +04:00
}
// normal trans step
const WordsRange &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();
const InputPath &inputPath = inputPartialTranslOpt.GetInputPath();
2013-06-28 21:15:12 +04:00
const PhraseDictionary* phraseDictionary =
2013-07-05 02:38:18 +04:00
decodeStep.GetPhraseDictionaryFeature();
2013-06-28 21:15:12 +04:00
const TargetPhrase &inPhrase = inputPartialTranslOpt.GetTargetPhrase();
const size_t currSize = inPhrase.GetSize();
const size_t tableLimit = phraseDictionary->GetTableLimit();
if (phraseColl != NULL) {
2013-07-05 02:38:18 +04:00
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
2013-06-28 21:15:12 +04:00
2013-07-05 02:38:18 +04:00
for (iterTargetPhrase = phraseColl->begin(); iterTargetPhrase != iterEnd; ++iterTargetPhrase) {
const TargetPhrase& targetPhrase = **iterTargetPhrase;
2013-09-25 03:47:55 +04:00
// const ScoreComponentCollection &transScores = targetPhrase.GetScoreBreakdown();
2013-07-05 02:38:18 +04:00
// skip if the
if (targetPhrase.GetSize() != currSize) continue;
2013-06-28 21:15:12 +04:00
2013-07-05 02:38:18 +04:00
TargetPhrase outPhrase(inPhrase);
2013-06-28 21:15:12 +04:00
2013-07-05 02:38:18 +04:00
if (IsFilteringStep()) {
if (!inputPartialTranslOpt.IsCompatible(targetPhrase, m_conflictFactors))
continue;
}
2013-06-28 21:15:12 +04:00
2013-07-05 02:38:18 +04:00
outPhrase.Merge(targetPhrase, m_newOutputFactors);
2014-08-08 18:59:34 +04:00
outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
2013-06-28 21:15:12 +04:00
2013-07-05 02:38:18 +04:00
TranslationOption *newTransOpt = new TranslationOption(sourceWordsRange, outPhrase);
assert(newTransOpt != NULL);
2013-06-28 21:15:12 +04:00
newTransOpt->SetInputPath(inputPath);
2013-07-05 02:38:18 +04:00
outputPartialTranslOptColl.Add(newTransOpt );
2013-06-28 21:15:12 +04:00
2013-07-05 02:38:18 +04:00
}
2013-06-28 21:15:12 +04:00
} else if (sourceWordsRange.GetNumWordsCovered() == 1) {
2013-07-05 02:38:18 +04:00
// unknown handler
//toc->ProcessUnknownWord(sourceWordsRange.GetStartPos(), factorCollection);
2013-06-28 21:15:12 +04:00
}
}
void DecodeStepTranslation::ProcessInitialTranslation(
2013-05-29 21:16:15 +04:00
const InputType &source
,PartialTranslOptColl &outputPartialTranslOptColl
, size_t startPos, size_t endPos, bool adhereTableLimit
, const InputPath &inputPath
, const TargetPhraseCollection *phraseColl) const
{
2013-02-22 23:17:57 +04:00
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
const size_t tableLimit = phraseDictionary->GetTableLimit();
const WordsRange wordsRange(startPos, endPos);
if (phraseColl != NULL) {
IFVERBOSE(3) {
if(StaticData::Instance().GetInputType() == SentenceInput)
TRACE_ERR("[" << source.GetSubString(wordsRange) << "; " << startPos << "-" << endPos << "]\n");
else
TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
}
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
for (iterTargetPhrase = phraseColl->begin() ; iterTargetPhrase != iterEnd ; ++iterTargetPhrase) {
const TargetPhrase &targetPhrase = **iterTargetPhrase;
TranslationOption *transOpt = new TranslationOption(wordsRange, targetPhrase);
transOpt->SetInputPath(inputPath);
2013-05-11 17:13:26 +04:00
outputPartialTranslOptColl.Add (transOpt);
VERBOSE(3,"\t" << targetPhrase << "\n");
}
VERBOSE(3,std::endl);
}
}
2013-08-24 00:34:10 +04:00
void DecodeStepTranslation::ProcessInitialTranslationLEGACY(
2013-06-28 21:05:49 +04:00
const InputType &source
,PartialTranslOptColl &outputPartialTranslOptColl
, size_t startPos, size_t endPos, bool adhereTableLimit
, const InputPathList &inputPathList) const
2013-06-28 21:05:49 +04:00
{
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature();
const size_t tableLimit = phraseDictionary->GetTableLimit();
const WordsRange wordsRange(startPos, endPos);
2013-08-24 00:34:10 +04:00
const TargetPhraseCollectionWithSourcePhrase *phraseColl = phraseDictionary->GetTargetPhraseCollectionLEGACY(source,wordsRange);
2013-06-28 21:05:49 +04:00
if (phraseColl != NULL) {
2013-06-28 21:06:36 +04:00
IFVERBOSE(3) {
if(StaticData::Instance().GetInputType() == SentenceInput)
TRACE_ERR("[" << source.GetSubString(wordsRange) << "; " << startPos << "-" << endPos << "]\n");
else
TRACE_ERR("[" << startPos << "-" << endPos << "]" << std::endl);
}
2013-06-28 21:05:49 +04:00
const std::vector<Phrase> &sourcePhrases = phraseColl->GetSourcePhrases();
2013-06-28 21:06:36 +04:00
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
std::vector<Phrase>::const_iterator iterSourcePhrase;
2013-06-28 21:06:36 +04:00
iterEnd = (!adhereTableLimit || tableLimit == 0 || phraseColl->GetSize() < tableLimit) ? phraseColl->end() : phraseColl->begin() + tableLimit;
2013-06-28 21:05:49 +04:00
for (iterTargetPhrase = phraseColl->begin(), iterSourcePhrase = sourcePhrases.begin()
2013-08-09 21:17:18 +04:00
; iterTargetPhrase != iterEnd
; ++iterTargetPhrase, ++iterSourcePhrase) {
assert(iterSourcePhrase != sourcePhrases.end());
2013-06-28 21:06:36 +04:00
const TargetPhrase &targetPhrase = **iterTargetPhrase;
const Phrase &sourcePhrase = *iterSourcePhrase;
2013-08-24 00:34:10 +04:00
const InputPath &inputPath = GetInputPathLEGACY(targetPhrase, sourcePhrase, inputPathList);
2013-06-28 21:06:36 +04:00
TranslationOption *transOpt = new TranslationOption(wordsRange, targetPhrase);
transOpt->SetInputPath(inputPath);
2013-06-28 21:05:49 +04:00
2013-06-28 21:06:36 +04:00
outputPartialTranslOptColl.Add (transOpt);
2013-06-28 21:05:49 +04:00
2013-06-28 21:06:36 +04:00
VERBOSE(3,"\t" << targetPhrase << "\n");
}
VERBOSE(3,std::endl);
2013-06-28 21:05:49 +04:00
}
}
2013-08-24 00:34:10 +04:00
const InputPath &DecodeStepTranslation::GetInputPathLEGACY(
2013-08-09 21:17:18 +04:00
const TargetPhrase targetPhrase,
const Phrase sourcePhrase,
const InputPathList &inputPathList) const
{
const Word &wordFromPt = sourcePhrase.GetWord(0);
2013-08-07 17:18:12 +04:00
InputPathList::const_iterator iter;
for (iter = inputPathList.begin(); iter != inputPathList.end(); ++iter) {
const InputPath &inputPath = **iter;
const Phrase &phraseFromIP = inputPath.GetPhrase();
const Word *wordIP = NULL;
for (size_t i = 0; i < phraseFromIP.GetSize(); ++i) {
2015-01-14 14:07:42 +03:00
const Word &tempWord = phraseFromIP.GetWord(i);
if (!tempWord.IsEpsilon()) {
wordIP = &tempWord;
break;
}
}
2013-09-25 03:47:55 +04:00
// const WordsRange &range = inputPath.GetWordsRange();
if (wordIP && *wordIP == wordFromPt) {
2013-08-07 17:18:12 +04:00
return inputPath;
}
}
2013-08-07 17:18:12 +04:00
UTIL_THROW(util::Exception, "Input path not found");
}
2015-03-09 03:34:18 +03:00
void
DecodeStepTranslation::
ProcessLEGACY(TranslationOption const& in,
DecodeStep const& decodeStep,
PartialTranslOptColl &out,
TranslationOptionCollection *toc,
bool adhereTableLimit) const
{
2015-03-09 03:34:18 +03:00
if (in.GetTargetPhrase().GetSize() == 0)
{
// word deletion
out.Add(new TranslationOption(in));
return;
}
// normal trans step
2015-03-09 03:34:18 +03:00
WordsRange const& srcRange = in.GetSourceWordsRange();
InputPath const& inputPath = in.GetInputPath();
PhraseDictionary const* pdict = decodeStep.GetPhraseDictionaryFeature();
TargetPhrase const& inPhrase = in.GetTargetPhrase();
size_t const currSize = inPhrase.GetSize();
size_t const tableLimit = pdict->GetTableLimit();
TargetPhraseCollectionWithSourcePhrase const* phraseColl;
phraseColl = pdict->GetTargetPhraseCollectionLEGACY(toc->GetSource(),srcRange);
if (phraseColl != NULL)
{
TargetPhraseCollection::const_iterator iterTargetPhrase, iterEnd;
iterEnd = ((adhereTableLimit && tableLimit && phraseColl->GetSize() >= tableLimit)
? phraseColl->begin() + tableLimit : phraseColl->end());
for (iterTargetPhrase = phraseColl->begin();
iterTargetPhrase != iterEnd;
++iterTargetPhrase)
{
TargetPhrase const& targetPhrase = **iterTargetPhrase;
if (targetPhrase.GetSize() != currSize ||
(IsFilteringStep() && !in.IsCompatible(targetPhrase, m_conflictFactors)))
continue;
TargetPhrase outPhrase(inPhrase);
outPhrase.Merge(targetPhrase, m_newOutputFactors);
outPhrase.EvaluateInIsolation(inputPath.GetPhrase(), m_featuresToApply); // need to do this as all non-transcores would be screwed up
TranslationOption *newTransOpt = new TranslationOption(srcRange, outPhrase);
assert(newTransOpt != NULL);
newTransOpt->SetInputPath(inputPath);
out.Add(newTransOpt);
}
}
}
}