mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
Implemented -r (-recover-input-path) to recover the actual path through the CN/word lattice that was used in the best translation. Also cleaned up handling of source Phrases in the context of a particular hypothesis, which may improve performance translating with lexicalized reordering models.
Removed PhraseReference since the concept was broken for CN/lattice inputs. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1468 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
d2928a0653
commit
7dbcef925d
@ -194,6 +194,24 @@ void IOStream::OutputBestHypo(const std::vector<const Factor*>& mbrBestHypo, lo
|
||||
}
|
||||
}
|
||||
|
||||
void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
|
||||
{
|
||||
if (hypo->GetPrevHypo())
|
||||
{
|
||||
OutputInput(map, hypo->GetPrevHypo());
|
||||
map[hypo->GetCurrSourceWordsRange().GetStartPos()] = hypo->GetSourcePhrase();
|
||||
}
|
||||
}
|
||||
|
||||
void OutputInput(std::ostream& os, const Hypothesis* hypo)
|
||||
{
|
||||
size_t len = StaticData::Instance().GetInput()->GetSize();
|
||||
std::vector<const Phrase*> inp_phrases(len, 0);
|
||||
OutputInput(inp_phrases, hypo);
|
||||
for (size_t i=0; i<len; ++i)
|
||||
if (inp_phrases[i]) os << *inp_phrases[i];
|
||||
}
|
||||
|
||||
void IOStream::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors)
|
||||
{
|
||||
if (hypo != NULL)
|
||||
@ -205,6 +223,10 @@ void IOStream::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bo
|
||||
|
||||
if (!m_surpressSingleBestOutput)
|
||||
{
|
||||
if (StaticData::Instance().IsPathRecoveryEnabled()) {
|
||||
OutputInput(cout, hypo);
|
||||
cout << "||| ";
|
||||
}
|
||||
OutputSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
|
||||
cout << endl;
|
||||
}
|
||||
|
@ -156,9 +156,9 @@ public:
|
||||
return m_currTargetWordsRange.GetEndPos() + 1;
|
||||
}
|
||||
|
||||
inline const InputType &GetSourcePhrase() const
|
||||
inline const Phrase* GetSourcePhrase() const
|
||||
{
|
||||
return m_sourceInput;
|
||||
return m_sourcePhrase;
|
||||
}
|
||||
|
||||
std::string GetSourcePhraseStringRep(const vector<FactorType> factorsToPrint) const;
|
||||
|
@ -41,7 +41,6 @@ libmoses_a_SOURCES = \
|
||||
PhraseDictionaryNode.cpp \
|
||||
PhraseDictionaryTree.cpp \
|
||||
PhraseDictionaryTreeAdaptor.cpp \
|
||||
PhraseReference.cpp \
|
||||
PrefixTreeMap.cpp \
|
||||
ScoreComponentCollection.cpp \
|
||||
ScoreIndexManager.cpp \
|
||||
|
@ -428,7 +428,7 @@ void Manager::CalcDecoderStatistics() const
|
||||
string buff;
|
||||
string buff2;
|
||||
TRACE_ERR( "Source and Target Units:"
|
||||
<< hypo->GetSourcePhrase());
|
||||
<< *StaticData::Instance().GetInput());
|
||||
buff2.insert(0,"] ");
|
||||
buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString());
|
||||
buff2.insert(0,":");
|
||||
|
@ -78,6 +78,7 @@ Parameter::Parameter()
|
||||
AddParam("mbr-scale", "scaling factor to convert log linear score into a probability.");
|
||||
AddParam("decoder-type", "MAP/MBR decoder (default=MAP=0)");
|
||||
AddParam("use-persistent-cache", "cache translation options across sentences (default=true)");
|
||||
AddParam("recover-input-path", "r", "(conf net/word lattice only) - recover input path corresponding to the best translation");
|
||||
}
|
||||
|
||||
Parameter::~Parameter()
|
||||
|
@ -1,28 +0,0 @@
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "PhraseReference.h"
|
||||
using std::ostream;
|
||||
|
||||
ostream& operator << (ostream& out, const PhraseReference& phrase)
|
||||
{
|
||||
return out << phrase.GetSubphrase();
|
||||
}
|
@ -1,48 +0,0 @@
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#ifndef MOSES_PHRASE_REFERENCE_H
|
||||
#define MOSES_PHRASE_REFERENCE_H
|
||||
|
||||
#include <iostream>
|
||||
#include "InputType.h"
|
||||
#include "WordsRange.h"
|
||||
|
||||
/***
|
||||
* hold a reference to a subphrase, the parent Phrase of which may be separately memory-managed
|
||||
*/
|
||||
class PhraseReference
|
||||
{
|
||||
public:
|
||||
|
||||
PhraseReference() : fullPhrase(NULL), range(0, 0) {}
|
||||
PhraseReference(const InputType& phrase, const WordsRange& r) : fullPhrase(&phrase), range(r) {}
|
||||
|
||||
const InputType& GetFullPhrase() const {return *fullPhrase;}
|
||||
Phrase GetSubphrase() const {return fullPhrase->GetSubString(range);}
|
||||
|
||||
protected:
|
||||
|
||||
const InputType* fullPhrase;
|
||||
WordsRange range;
|
||||
};
|
||||
|
||||
std::ostream& operator << (std::ostream& out, const PhraseReference& phrase);
|
||||
|
||||
#endif //MOSES_PHRASE_REFERENCE_H
|
@ -41,6 +41,6 @@ void SentenceStats::AddDeletedWords(const Hypothesis& hypo)
|
||||
if(hypo.GetPrevHypo() != NULL && hypo.GetPrevHypo()->GetCurrSourceWordsRange().GetNumWordsCovered() > 0) AddDeletedWords(*hypo.GetPrevHypo());
|
||||
if(hypo.GetCurrTargetWordsRange().GetNumWordsCovered() == 0)
|
||||
{
|
||||
m_deletedWords.push_back(PhraseReference(hypo.GetSourcePhrase(), hypo.GetCurrSourceWordsRange()));
|
||||
m_deletedWords.push_back(hypo.GetSourcePhrase());
|
||||
}
|
||||
}
|
||||
|
@ -29,7 +29,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "TypeDef.h" //FactorArray
|
||||
#include "InputType.h"
|
||||
#include "Util.h" //Join()
|
||||
#include "PhraseReference.h"
|
||||
|
||||
struct RecombinationInfo
|
||||
{
|
||||
@ -74,7 +73,7 @@ class SentenceStats
|
||||
size_t GetTotalSourceWords() const {return m_totalSourceWords;}
|
||||
size_t GetNumWordsDeleted() const {return m_deletedWords.size();}
|
||||
size_t GetNumWordsInserted() const {return m_insertedWords.size();}
|
||||
const std::vector<PhraseReference>& GetDeletedWords() const {return m_deletedWords;}
|
||||
const std::vector<const Phrase*>& GetDeletedWords() const {return m_deletedWords;}
|
||||
const std::vector<std::string>& GetInsertedWords() const {return m_insertedWords;}
|
||||
|
||||
void AddRecombination(const Hypothesis& worseHypo, const Hypothesis& betterHypo)
|
||||
@ -99,7 +98,7 @@ class SentenceStats
|
||||
|
||||
//words
|
||||
size_t m_totalSourceWords;
|
||||
std::vector<PhraseReference> m_deletedWords; //count deleted words/phrases in the final hypothesis
|
||||
std::vector<const Phrase*> m_deletedWords; //count deleted words/phrases in the final hypothesis
|
||||
std::vector<std::string> m_insertedWords; //count inserted words in the final hypothesis
|
||||
};
|
||||
|
||||
|
@ -101,6 +101,14 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
if (m_inputType == 2) { s_it = "word lattice"; }
|
||||
VERBOSE(2,"input type is: "<<s_it<<"\n");
|
||||
|
||||
if(m_parameter->GetParam("recover-input-path").size()) {
|
||||
m_recoverPath = Scan<bool>(m_parameter->GetParam("recover-input-path")[0]);
|
||||
if (m_recoverPath && m_inputType == SentenceInput) {
|
||||
TRACE_ERR("--recover-input-path should only be used with confusion net or word lattice input!\n");
|
||||
m_recoverPath = false;
|
||||
}
|
||||
}
|
||||
|
||||
// factor delimiter
|
||||
if (m_parameter->GetParam("factor-delimiter").size() > 0) {
|
||||
m_factorDelimiter = m_parameter->GetParam("factor-delimiter")[0];
|
||||
|
@ -90,6 +90,7 @@ protected:
|
||||
bool m_wordDeletionEnabled;
|
||||
|
||||
bool m_sourceStartPosMattersForRecombination;
|
||||
bool m_recoverPath;
|
||||
|
||||
InputTypeEnum m_inputType;
|
||||
size_t m_numInputScores;
|
||||
@ -230,6 +231,10 @@ public:
|
||||
{
|
||||
return m_maxHypoStackSize;
|
||||
}
|
||||
size_t IsPathRecoveryEnabled() const
|
||||
{
|
||||
return m_recoverPath;
|
||||
}
|
||||
int GetMaxDistortion() const
|
||||
{
|
||||
return m_maxDistortion;
|
||||
|
@ -55,6 +55,9 @@ T 0
|
||||
[verbose]
|
||||
2
|
||||
|
||||
[recover-input-path]
|
||||
1
|
||||
|
||||
[n-best-list]
|
||||
nbest
|
||||
10
|
||||
|
@ -1,9 +1,9 @@
|
||||
TRANSLATION_1=1 2
|
||||
TRANSLATION_2=1 2 3
|
||||
TRANSLATION_3=1 2 3
|
||||
TRANSLATION_4=1 2 3
|
||||
TRANSLATION_5=4 1234 5
|
||||
TRANSLATION_6=1
|
||||
TRANSLATION_1=A B ||| 1 2
|
||||
TRANSLATION_2=A B C ||| 1 2 3
|
||||
TRANSLATION_3=A B C ||| 1 2 3
|
||||
TRANSLATION_4=A B C ||| 1 2 3
|
||||
TRANSLATION_5=D E X ||| 4 1234 5
|
||||
TRANSLATION_6=A ||| 1
|
||||
LMLOAD_TIME ~ 0.000
|
||||
PTLOAD_TIME ~ 0.000
|
||||
SCORE_1 = 1.090
|
||||
|
Loading…
Reference in New Issue
Block a user