Implemented -r (-recover-input-path) to recover the actual path through the CN/word lattice that was used in the best translation. Also cleaned up handling of source Phrases in the context of a particular hypothesis, which may improve performance translating with lexicalized reordering models.

Removed PhraseReference since the concept was broken for CN/lattice inputs.



git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1468 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
redpony 2007-09-28 16:43:33 +00:00
parent d2928a0653
commit 7dbcef925d
13 changed files with 51 additions and 90 deletions

View File

@ -194,6 +194,24 @@ void IOStream::OutputBestHypo(const std::vector<const Factor*>& mbrBestHypo, lo
}
}
void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
{
if (hypo->GetPrevHypo())
{
OutputInput(map, hypo->GetPrevHypo());
map[hypo->GetCurrSourceWordsRange().GetStartPos()] = hypo->GetSourcePhrase();
}
}
void OutputInput(std::ostream& os, const Hypothesis* hypo)
{
size_t len = StaticData::Instance().GetInput()->GetSize();
std::vector<const Phrase*> inp_phrases(len, 0);
OutputInput(inp_phrases, hypo);
for (size_t i=0; i<len; ++i)
if (inp_phrases[i]) os << *inp_phrases[i];
}
void IOStream::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors)
{
if (hypo != NULL)
@ -205,6 +223,10 @@ void IOStream::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bo
if (!m_surpressSingleBestOutput)
{
if (StaticData::Instance().IsPathRecoveryEnabled()) {
OutputInput(cout, hypo);
cout << "||| ";
}
OutputSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
cout << endl;
}

View File

@ -156,9 +156,9 @@ public:
return m_currTargetWordsRange.GetEndPos() + 1;
}
inline const InputType &GetSourcePhrase() const
inline const Phrase* GetSourcePhrase() const
{
return m_sourceInput;
return m_sourcePhrase;
}
std::string GetSourcePhraseStringRep(const vector<FactorType> factorsToPrint) const;

View File

@ -41,7 +41,6 @@ libmoses_a_SOURCES = \
PhraseDictionaryNode.cpp \
PhraseDictionaryTree.cpp \
PhraseDictionaryTreeAdaptor.cpp \
PhraseReference.cpp \
PrefixTreeMap.cpp \
ScoreComponentCollection.cpp \
ScoreIndexManager.cpp \

View File

@ -428,7 +428,7 @@ void Manager::CalcDecoderStatistics() const
string buff;
string buff2;
TRACE_ERR( "Source and Target Units:"
<< hypo->GetSourcePhrase());
<< *StaticData::Instance().GetInput());
buff2.insert(0,"] ");
buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString());
buff2.insert(0,":");

View File

@ -78,6 +78,7 @@ Parameter::Parameter()
AddParam("mbr-scale", "scaling factor to convert log linear score into a probability.");
AddParam("decoder-type", "MAP/MBR decoder (default=MAP=0)");
AddParam("use-persistent-cache", "cache translation options across sentences (default=true)");
AddParam("recover-input-path", "r", "(conf net/word lattice only) - recover input path corresponding to the best translation");
}
Parameter::~Parameter()

View File

@ -1,28 +0,0 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "PhraseReference.h"
using std::ostream;
ostream& operator << (ostream& out, const PhraseReference& phrase)
{
return out << phrase.GetSubphrase();
}

View File

@ -1,48 +0,0 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#ifndef MOSES_PHRASE_REFERENCE_H
#define MOSES_PHRASE_REFERENCE_H
#include <iostream>
#include "InputType.h"
#include "WordsRange.h"
/***
* hold a reference to a subphrase, the parent Phrase of which may be separately memory-managed
*/
class PhraseReference
{
public:
PhraseReference() : fullPhrase(NULL), range(0, 0) {}
PhraseReference(const InputType& phrase, const WordsRange& r) : fullPhrase(&phrase), range(r) {}
const InputType& GetFullPhrase() const {return *fullPhrase;}
Phrase GetSubphrase() const {return fullPhrase->GetSubString(range);}
protected:
const InputType* fullPhrase;
WordsRange range;
};
std::ostream& operator << (std::ostream& out, const PhraseReference& phrase);
#endif //MOSES_PHRASE_REFERENCE_H

View File

@ -41,6 +41,6 @@ void SentenceStats::AddDeletedWords(const Hypothesis& hypo)
if(hypo.GetPrevHypo() != NULL && hypo.GetPrevHypo()->GetCurrSourceWordsRange().GetNumWordsCovered() > 0) AddDeletedWords(*hypo.GetPrevHypo());
if(hypo.GetCurrTargetWordsRange().GetNumWordsCovered() == 0)
{
m_deletedWords.push_back(PhraseReference(hypo.GetSourcePhrase(), hypo.GetCurrSourceWordsRange()));
m_deletedWords.push_back(hypo.GetSourcePhrase());
}
}

View File

@ -29,7 +29,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TypeDef.h" //FactorArray
#include "InputType.h"
#include "Util.h" //Join()
#include "PhraseReference.h"
struct RecombinationInfo
{
@ -74,7 +73,7 @@ class SentenceStats
size_t GetTotalSourceWords() const {return m_totalSourceWords;}
size_t GetNumWordsDeleted() const {return m_deletedWords.size();}
size_t GetNumWordsInserted() const {return m_insertedWords.size();}
const std::vector<PhraseReference>& GetDeletedWords() const {return m_deletedWords;}
const std::vector<const Phrase*>& GetDeletedWords() const {return m_deletedWords;}
const std::vector<std::string>& GetInsertedWords() const {return m_insertedWords;}
void AddRecombination(const Hypothesis& worseHypo, const Hypothesis& betterHypo)
@ -99,7 +98,7 @@ class SentenceStats
//words
size_t m_totalSourceWords;
std::vector<PhraseReference> m_deletedWords; //count deleted words/phrases in the final hypothesis
std::vector<const Phrase*> m_deletedWords; //count deleted words/phrases in the final hypothesis
std::vector<std::string> m_insertedWords; //count inserted words in the final hypothesis
};

View File

@ -101,6 +101,14 @@ bool StaticData::LoadData(Parameter *parameter)
if (m_inputType == 2) { s_it = "word lattice"; }
VERBOSE(2,"input type is: "<<s_it<<"\n");
if(m_parameter->GetParam("recover-input-path").size()) {
m_recoverPath = Scan<bool>(m_parameter->GetParam("recover-input-path")[0]);
if (m_recoverPath && m_inputType == SentenceInput) {
TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n");
m_recoverPath = false;
}
}
// factor delimiter
if (m_parameter->GetParam("factor-delimiter").size() > 0) {
m_factorDelimiter = m_parameter->GetParam("factor-delimiter")[0];

View File

@ -90,6 +90,7 @@ protected:
bool m_wordDeletionEnabled;
bool m_sourceStartPosMattersForRecombination;
bool m_recoverPath;
InputTypeEnum m_inputType;
size_t m_numInputScores;
@ -230,6 +231,10 @@ public:
{
return m_maxHypoStackSize;
}
size_t IsPathRecoveryEnabled() const
{
return m_recoverPath;
}
int GetMaxDistortion() const
{
return m_maxDistortion;

View File

@ -55,6 +55,9 @@ T 0
[verbose]
2
[recover-input-path]
1
[n-best-list]
nbest
10

View File

@ -1,9 +1,9 @@
TRANSLATION_1=1 2
TRANSLATION_2=1 2 3
TRANSLATION_3=1 2 3
TRANSLATION_4=1 2 3
TRANSLATION_5=4 1234 5
TRANSLATION_6=1
TRANSLATION_1=A B ||| 1 2
TRANSLATION_2=A B C ||| 1 2 3
TRANSLATION_3=A B C ||| 1 2 3
TRANSLATION_4=A B C ||| 1 2 3
TRANSLATION_5=D E X ||| 4 1234 5
TRANSLATION_6=A ||| 1
LMLOAD_TIME ~ 0.000
PTLOAD_TIME ~ 0.000
SCORE_1 = 1.090