Merge in the multiple models branch. These changes allow the moses server

to support multiple translation, language and generation models within the
same process. The main design change is the introduction of a TranslationSystem
object to manage the models, which have been moved out of StaticData.
The changes should have no effect on existing systems.


git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3394 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
bhaddow 2010-08-10 13:12:00 +00:00
parent d31b030bc5
commit 904133fcb7
109 changed files with 1371 additions and 616 deletions

View File

@ -11,6 +11,7 @@
#include "../../moses/src/Util.h"
#include "../../moses/src/TargetPhrase.h"
#include "../../moses/src/PhraseDictionary.h"
#include "../../moses/src/DummyScoreProducers.h"
#include "TargetPhrase.h"
#include "OnDiskWrapper.h"
@ -186,7 +187,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
, const Vocab &vocab
, const Moses::PhraseDictionary &phraseDict
, const std::vector<float> &weightT
, float weightWP
, const Moses::WordPenaltyProducer* wpProducer
, const Moses::LMList &lmList
, const Moses::Phrase &sourcePhrase) const
{
@ -208,7 +209,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
}
// scores
ret->SetScoreChart(phraseDict.GetFeature(), m_scores, weightT, lmList);
ret->SetScoreChart(phraseDict.GetFeature(), m_scores, weightT, lmList, wpProducer);
// alignments
std::list<std::pair<size_t, size_t> > alignmentInfo;

View File

@ -19,6 +19,7 @@ namespace Moses
class TargetPhrase;
class LMList;
class Phrase;
class WordPenaltyProducer;
}
namespace OnDiskPt
@ -70,7 +71,7 @@ public:
, const Vocab &vocab
, const Moses::PhraseDictionary &phraseDict
, const std::vector<float> &weightT
, float weightWP
, const Moses::WordPenaltyProducer* wpProducer
, const Moses::LMList &lmList
, const Moses::Phrase &sourcePhrase) const;
UINT64 ReadOtherInfoFromFile(UINT64 filePos, std::fstream &fileTPColl);

View File

@ -101,7 +101,7 @@ Moses::TargetPhraseCollection *TargetPhraseCollection::ConvertToMoses(const std:
, const std::vector<Moses::FactorType> &outputFactors
, const Moses::PhraseDictionary &phraseDict
, const std::vector<float> &weightT
, float weightWP
, const Moses::WordPenaltyProducer* wpProducer
, const Moses::LMList &lmList
, const Moses::Phrase &sourcePhrase
, const std::string &filePath
@ -117,7 +117,7 @@ Moses::TargetPhraseCollection *TargetPhraseCollection::ConvertToMoses(const std:
, vocab
, phraseDict
, weightT
, weightWP
, wpProducer
, lmList
, sourcePhrase);

View File

@ -8,6 +8,7 @@ namespace Moses
class TargetPhraseCollection;
class PhraseDictionary;
class LMList;
class WordPenaltyProducer;
}
namespace OnDiskPt
@ -50,7 +51,7 @@ public:
, const std::vector<Moses::FactorType> &outputFactors
, const Moses::PhraseDictionary &phraseDict
, const std::vector<float> &weightT
, float weightWP
, const Moses::WordPenaltyProducer* wpProducer
, const Moses::LMList &lmList
, const Moses::Phrase &sourcePhrase
, const std::string &filePath

View File

@ -54,10 +54,6 @@
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#undef LT_OBJDIR
/* Name of package */
#undef PACKAGE
@ -73,9 +69,6 @@
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */
#undef PACKAGE_VERSION

View File

@ -2,7 +2,7 @@ AC_DEFUN([AX_XMLRPC_C], [
AC_MSG_CHECKING(for XMLRPC-C)
AC_ARG_WITH(xmlrpc-c,
[ --with-xmlrpc-c=PATH Enable XMLRPC-C support.],
[ --with-xmlrpc-c=PATH Enable XMLRPC-C support. Setting the PATH to yes will search for xmlrpc-c-config on the shell PATH,],
[
if test "$withval" = "no"; then
AC_MSG_RESULT(no)

View File

@ -304,7 +304,7 @@ void IOWrapper::OutputBestHypo(const MosesChart::Hypothesis *hypo, long translat
}
}
void IOWrapper::OutputNBestList(const MosesChart::TrellisPathList &nBestList, long translationId)
void IOWrapper::OutputNBestList(const MosesChart::TrellisPathList &nBestList, const TranslationSystem* system, long translationId)
{
bool labeledOutput = StaticData::Instance().IsLabeledNBestList();
//bool includeAlignment = StaticData::Instance().NBestIncludesAlignment();
@ -332,7 +332,7 @@ void IOWrapper::OutputNBestList(const MosesChart::TrellisPathList &nBestList, lo
// MERT script relies on this
// lm
const LMList& lml = StaticData::Instance().GetAllLM();
const LMList& lml = system->GetLanguageModels();
if (lml.size() > 0) {
if (labeledOutput)
*m_nBestStream << "lm: ";
@ -345,7 +345,7 @@ void IOWrapper::OutputNBestList(const MosesChart::TrellisPathList &nBestList, lo
// translation components
if (StaticData::Instance().GetInputType()==SentenceInput){
// translation components for text input
vector<PhraseDictionaryFeature*> pds = StaticData::Instance().GetPhraseDictionaries();
vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries();
if (pds.size() > 0) {
if (labeledOutput)
*m_nBestStream << "tm: ";
@ -361,7 +361,7 @@ void IOWrapper::OutputNBestList(const MosesChart::TrellisPathList &nBestList, lo
// translation components for Confusion Network input
// first translation component has GetNumInputScores() scores from the input Confusion Network
// at the beginning of the vector
vector<PhraseDictionaryFeature*> pds = StaticData::Instance().GetPhraseDictionaries();
vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries();
if (pds.size() > 0) {
vector<PhraseDictionaryFeature*>::iterator iter;
@ -398,14 +398,14 @@ void IOWrapper::OutputNBestList(const MosesChart::TrellisPathList &nBestList, lo
// word penalty
if (labeledOutput)
*m_nBestStream << "w: ";
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance().GetWordPenaltyProducer()) << " ";
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(system->GetWordPenaltyProducer()) << " ";
// generation
vector<GenerationDictionary*> gds = StaticData::Instance().GetGenerationDictionaries();
const vector<GenerationDictionary*> gds = system->GetGenerationDictionaries();
if (gds.size() > 0) {
if (labeledOutput)
*m_nBestStream << "g: ";
vector<GenerationDictionary*>::iterator iter;
vector<GenerationDictionary*>::const_iterator iter;
for (iter = gds.begin(); iter != gds.end(); ++iter) {
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
for (size_t j = 0; j<scores.size(); j++) {

View File

@ -39,6 +39,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "TypeDef.h"
#include "Sentence.h"
#include "FactorTypeSet.h"
#include "TranslationSystem.h"
#include "TrellisPathList.h"
#include "../../moses-chart/src/ChartHypothesis.h"
@ -78,7 +79,7 @@ public:
Moses::InputType* GetInput(Moses::InputType *inputType);
void OutputBestHypo(const MosesChart::Hypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors);
void OutputBestHypo(const std::vector<const Moses::Factor*>& mbrBestHypo, long translationId, bool reportSegmentation, bool reportAllFactors);
void OutputNBestList(const MosesChart::TrellisPathList &nBestList, long translationId);
void OutputNBestList(const MosesChart::TrellisPathList &nBestList, const Moses::TranslationSystem* system, long translationId);
void Backtrack(const MosesChart::Hypothesis *hypo);
void ResetTranslationId() { m_translationId = 0; }

View File

@ -142,7 +142,8 @@ int main(int argc, char* argv[])
VERBOSE(2,"\nTRANSLATING(" << ++lineCount << "): " << *source);
//cerr << *source << endl;
MosesChart::Manager manager(*source);
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
MosesChart::Manager manager(*source, &system);
manager.ProcessSentence();
assert(!staticData.UseMBR());
@ -161,7 +162,7 @@ int main(int argc, char* argv[])
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
MosesChart::TrellisPathList nBestList;
manager.CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
ioWrapper->OutputNBestList(nBestList, source->GetTranslationId());
ioWrapper->OutputNBestList(nBestList, &system, source->GetTranslationId());
IFVERBOSE(2) { PrintUserTime("N-Best Hypotheses Generation Time:"); }
}

View File

@ -49,8 +49,8 @@ Hypothesis::Hypothesis(const QueueEntry &queueEntry, Manager &manager)
,m_wordsConsumedTargetOrder(queueEntry.GetTranslationOption().GetChartRule().GetWordsConsumedTargetOrder())
,m_id(++s_HypothesesCreated)
,m_currSourceWordsRange(queueEntry.GetTranslationOption().GetSourceWordsRange())
,m_contextPrefix(Output, StaticData::Instance().GetAllLM().GetMaxNGramOrder())
,m_contextSuffix(Output, StaticData::Instance().GetAllLM().GetMaxNGramOrder())
,m_contextPrefix(Output, manager.GetTranslationSystem()->GetLanguageModels().GetMaxNGramOrder())
,m_contextSuffix(Output, manager.GetTranslationSystem()->GetLanguageModels().GetMaxNGramOrder())
,m_arcList(NULL)
,m_manager(manager)
{
@ -73,7 +73,7 @@ Hypothesis::Hypothesis(const QueueEntry &queueEntry, Manager &manager)
m_prevHypos.push_back(prevHypo);
}
size_t maxNGram = StaticData::Instance().GetAllLM().GetMaxNGramOrder();
size_t maxNGram = manager.GetTranslationSystem()->GetLanguageModels().GetMaxNGramOrder();
CalcPrefix(m_contextPrefix, maxNGram - 1);
CalcSuffix(m_contextSuffix, maxNGram - 1);
}
@ -235,11 +235,11 @@ void Hypothesis::CalcScore()
void Hypothesis::CalcLMScore()
{
const LMList& lmList = m_manager.GetTranslationSystem()->GetLanguageModels();
assert(m_lmNGram.GetWeightedScore() == 0);
m_scoreBreakdown.ZeroAllLM();
m_scoreBreakdown.ZeroAllLM(lmList);
const LMList &lmList = StaticData::Instance().GetAllLM();
Phrase outPhrase(Output); // = GetOutputPhrase();
bool calcNow = false, firstPhrase = true;
@ -258,7 +258,7 @@ void Hypothesis::CalcLMScore()
if (numTargetTerminals >= lmList.GetMaxNGramOrder() - 1)
{ // large hypo (for trigram lm, another hypo equal or over 2 words). just take the prefix & suffix
m_lmNGram.PlusEqualsAllLM(prevHypo->m_lmNGram);
m_lmNGram.PlusEqualsAllLM(lmList, prevHypo->m_lmNGram);
// calc & add overlapping lm scores
// prefix
@ -299,8 +299,8 @@ void Hypothesis::CalcLMScore()
, m_lmNGram
, (firstPhrase) ? &m_lmPrefix : NULL);
m_scoreBreakdown.PlusEqualsAllLM(m_lmPrefix);
m_scoreBreakdown.PlusEqualsAllLM(m_lmNGram);
m_scoreBreakdown.PlusEqualsAllLM(lmList, m_lmPrefix);
m_scoreBreakdown.PlusEqualsAllLM(lmList, m_lmNGram);
/*
// lazy way. keep for comparison

View File

@ -27,6 +27,7 @@
#include "ChartTrellisPathList.h"
#include "ChartTrellisPathCollection.h"
#include "../../moses/src/StaticData.h"
#include "../../moses/src/DecodeStep.h"
using namespace std;
using namespace Moses;
@ -39,18 +40,18 @@ namespace Moses
namespace MosesChart
{
Manager::Manager(InputType const& source)
Manager::Manager(InputType const& source, const TranslationSystem* system)
:m_source(source)
,m_hypoStackColl(source, *this)
,m_transOptColl(source, StaticData::Instance().GetDecodeStepVL(source), m_hypoStackColl)
,m_transOptColl(source, system, m_hypoStackColl),
m_system(system)
{
const StaticData &staticData = StaticData::Instance();
staticData.InitializeBeforeSentenceProcessing(source);
m_system->InitializeBeforeSentenceProcessing(source);
}
Manager::~Manager()
{
StaticData::Instance().CleanUpAfterSentenceProcessing();
m_system->CleanUpAfterSentenceProcessing();
}
void Manager::ProcessSentence()

View File

@ -29,6 +29,7 @@
#include "../../moses/src/WordsRange.h"
#include "../../moses/src/TrellisPathList.h"
#include "../../moses/src/SentenceStats.h"
#include "../../moses/src/TranslationSystem.h"
namespace MosesChart
{
@ -43,9 +44,10 @@ protected:
ChartCellCollection m_hypoStackColl;
TranslationOptionCollection m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */
std::auto_ptr<Moses::SentenceStats> m_sentenceStats;
const Moses::TranslationSystem* m_system;
public:
Manager(Moses::InputType const& source);
Manager(Moses::InputType const& source, const Moses::TranslationSystem* system);
~Manager();
void ProcessSentence();
const Hypothesis *GetBestHypothesis() const;
@ -53,7 +55,8 @@ public:
void GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const;
const Moses::InputType& GetSource() const {return m_source;}
const Moses::InputType& GetSource() const {return m_source;}
const Moses::TranslationSystem* GetTranslationSystem() const {return m_system;}
Moses::SentenceStats& GetSentenceStats() const
{

View File

@ -38,10 +38,11 @@ namespace MosesChart
{
TranslationOptionCollection::TranslationOptionCollection(InputType const& source
, const std::vector<DecodeGraph*> &decodeGraphList
, const Moses::TranslationSystem* system
, const ChartCellCollection &hypoStackColl)
:m_source(source)
,m_decodeGraphList(decodeGraphList)
,m_system(system)
,m_decodeGraphList(system->GetDecodeGraphs())
,m_hypoStackColl(hypoStackColl)
,m_collection(source.GetSize())
{
@ -62,7 +63,6 @@ TranslationOptionCollection::~TranslationOptionCollection()
RemoveAllInColl(m_unksrcs);
RemoveAllInColl(m_cacheChartRule);
RemoveAllInColl(m_cacheTargetPhrase);
RemoveAllInColl(m_decodeGraphList);
std::list<std::vector<Moses::WordConsumed*>* >::iterator iterOuter;
for (iterOuter = m_cachedWordsConsumed.begin(); iterOuter != m_cachedWordsConsumed.end(); ++iterOuter)
@ -140,10 +140,11 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
const WordsRange &wordsRange = GetTranslationOptionList(startPos, endPos).GetSourceRange();
TranslationOptionList &translationOptionList = GetTranslationOptionList(startPos, endPos);
const PhraseDictionary &phraseDictionary = decodeStep.GetPhraseDictionary();
const PhraseDictionary* phraseDictionary =
decodeStep.GetPhraseDictionaryFeature()->GetDictionary();
//cerr << phraseDictionary.GetScoreProducerDescription() << endl;
const ChartRuleCollection *chartRuleCollection = phraseDictionary.GetChartRuleCollection(
const ChartRuleCollection *chartRuleCollection = phraseDictionary->GetChartRuleCollection(
m_source
, wordsRange
, adhereTableLimit
@ -201,13 +202,11 @@ void TranslationOptionCollection::ProcessUnknownWord(size_t sourcePos)
}
//! special handling of ONE unknown words.
void TranslationOptionCollection::ProcessOneUnknownWord(const Moses::Word &sourceWord
, size_t sourcePos, size_t length)
void TranslationOptionCollection::ProcessOneUnknownWord(const Moses::Word &sourceWord, size_t sourcePos, size_t length)
{
// unknown word, add as trans opt
const StaticData &staticData = StaticData::Instance();
const UnknownWordPenaltyProducer *unknownWordPenaltyProducer = staticData.GetUnknownWordPenaltyProducer();
const WordPenaltyProducer *wordPenaltyProducer = staticData.GetWordPenaltyProducer();
const UnknownWordPenaltyProducer *unknownWordPenaltyProducer = m_system->GetUnknownWordPenaltyProducer();
vector<float> wordPenaltyScore(1, -0.434294482);
size_t isDigit = 0;
@ -265,7 +264,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Moses::Word &sourc
//targetPhrase->SetScore();
targetPhrase->SetScore(unknownWordPenaltyProducer, unknownScore);
targetPhrase->SetScore(wordPenaltyProducer, wordPenaltyScore);
targetPhrase->SetScore(m_system->GetWordPenaltyProducer(), wordPenaltyScore);
targetPhrase->SetSourcePhrase(m_unksrc);
targetPhrase->SetTargetLHS(targetLHS);

View File

@ -32,6 +32,7 @@ namespace Moses
class Word;
class ChartRule;
class WordConsumed;
class WordPenaltyProducer;
};
namespace MosesChart
@ -43,7 +44,8 @@ class TranslationOptionCollection
friend std::ostream& operator<<(std::ostream&, const TranslationOptionCollection&);
protected:
const Moses::InputType &m_source;
std::vector<Moses::DecodeGraph*> m_decodeGraphList;
const Moses::TranslationSystem* m_system;
std::vector <Moses::DecodeGraph*> m_decodeGraphList;
const ChartCellCollection &m_hypoStackColl;
std::vector< std::vector< TranslationOptionList > > m_collection; /*< contains translation options */
@ -52,8 +54,8 @@ protected:
std::list<Moses::TargetPhrase*> m_cacheTargetPhrase;
std::list<std::vector<Moses::WordConsumed*>* > m_cachedWordsConsumed;
virtual void CreateTranslationOptionsForRange(const Moses::DecodeGraph &decodeStepList
, size_t startPosition
virtual void CreateTranslationOptionsForRange(const Moses::DecodeGraph& decodeGraph,
size_t startPosition
, size_t endPosition
, bool adhereTableLimit);
void Add(TranslationOptionList &translationOptionList);
@ -81,7 +83,7 @@ protected:
public:
TranslationOptionCollection(Moses::InputType const& source
, const std::vector<Moses::DecodeGraph*> &decodeGraphList
, const Moses::TranslationSystem* system
, const ChartCellCollection &hypoStackColl);
virtual ~TranslationOptionCollection();
//virtual void CreateTranslationOptions(const std::vector <Moses::DecodeGraph*> &decodeGraphList);

View File

@ -313,7 +313,7 @@ void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, b
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder,long translationId)
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId)
{
const StaticData &staticData = StaticData::Instance();
bool labeledOutput = staticData.IsLabeledNBestList();
@ -338,7 +338,7 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con
std::string lastName = "";
const vector<const StatefulFeatureFunction*>& sff =
staticData.GetScoreIndexManager().GetStatefulFeatureFunctions();
system->GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ )
{
if( labeledOutput && lastName != sff[i]->GetScoreProducerWeightShortName() )
@ -354,7 +354,7 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con
}
const vector<const StatelessFeatureFunction*>& slf =
staticData.GetScoreIndexManager().GetStatelessFeatureFunctions();
system->GetStatelessFeatureFunctions();
for( size_t i=0; i<slf.size(); i++ )
{
if( labeledOutput && lastName != slf[i]->GetScoreProducerWeightShortName() )
@ -372,7 +372,7 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con
// translation components
if (StaticData::Instance().GetInputType()==SentenceInput){
// translation components for text input
vector<PhraseDictionaryFeature*> pds = StaticData::Instance().GetPhraseDictionaries();
vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries();
if (pds.size() > 0) {
if (labeledOutput)
out << " tm:";
@ -388,7 +388,7 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con
// translation components for Confusion Network input
// first translation component has GetNumInputScores() scores from the input Confusion Network
// at the beginning of the vector
vector<PhraseDictionaryFeature*> pds = StaticData::Instance().GetPhraseDictionaries();
vector<PhraseDictionaryFeature*> pds = system->GetPhraseDictionaries();
if (pds.size() > 0) {
vector<PhraseDictionaryFeature*>::iterator iter;
@ -421,11 +421,11 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con
}
// generation
vector<GenerationDictionary*> gds = StaticData::Instance().GetGenerationDictionaries();
const vector<GenerationDictionary*> gds = system->GetGenerationDictionaries();
if (gds.size() > 0) {
if (labeledOutput)
out << " g: ";
vector<GenerationDictionary*>::iterator iter;
vector<GenerationDictionary*>::const_iterator iter;
for (iter = gds.begin(); iter != gds.end(); ++iter) {
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
for (size_t j = 0; j<scores.size(); j++) {
@ -493,9 +493,6 @@ void OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>&
}
}
void IOWrapper::OutputNBestList(const TrellisPathList &nBestList, long translationId) {
OutputNBest(*m_nBestStream, nBestList,m_outputFactorOrder, translationId);
}
void IOWrapper::OutputLatticeMBRNBestList(const vector<LatticeMBRSolution>& solutions,long translationId) {
OutputLatticeMBRNBest(*m_nBestStream, solutions,translationId);

View File

@ -91,7 +91,6 @@ public:
Moses::InputType* GetInput(Moses::InputType *inputType);
void OutputBestHypo(const Moses::Hypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors);
void OutputNBestList(const Moses::TrellisPathList &nBestList, long translationId);
void OutputLatticeMBRNBestList(const std::vector<LatticeMBRSolution>& solutions,long translationId);
void Backtrack(const Moses::Hypothesis *hypo);
@ -116,7 +115,8 @@ public:
IOWrapper *GetIODevice(const Moses::StaticData &staticData);
bool ReadInput(IOWrapper &ioWrapper, Moses::InputTypeEnum inputType, Moses::InputType*& source);
void OutputSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder ,bool reportSegmentation, bool reportAllFactors);
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>&, long translationId);
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>&,
const TranslationSystem* system, long translationId);
void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId);
void OutputBestHypo(const std::vector<Moses::Word>& mbrBestHypo, long /*translationId*/,
bool reportSegmentation, bool reportAllFactors, std::ostream& out);

View File

@ -171,7 +171,8 @@ int main(int argc, char* argv[]) {
while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) {
++lineCount;
Sentence sentence(Input);
Manager manager(*source,staticData.GetSearchAlgorithm());
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
Manager manager(*source,staticData.GetSearchAlgorithm(), &system);
manager.ProcessSentence();
TrellisPathList nBestList;
manager.CalcNBest(nBestSize, nBestList,true);

View File

@ -138,7 +138,8 @@ class TranslationTask : public Task {
#endif
const StaticData &staticData = StaticData::Instance();
Sentence sentence(Input);
Manager manager(*m_source,staticData.GetSearchAlgorithm());
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
Manager manager(*m_source,staticData.GetSearchAlgorithm(), &system);
manager.ProcessSentence();
//Word Graph
@ -259,7 +260,7 @@ class TranslationTask : public Task {
TrellisPathList nBestList;
ostringstream out;
manager.CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
OutputNBest(out,nBestList, staticData.GetOutputFactorOrder(), m_lineNumber);
OutputNBest(out,nBestList, staticData.GetOutputFactorOrder(), manager.GetTranslationSystem(), m_lineNumber);
m_nbestCollector->Write(m_lineNumber, out.str());
}
@ -267,7 +268,7 @@ class TranslationTask : public Task {
if (m_detailedTranslationCollector) {
ostringstream out;
fix(out);
TranslationAnalysis::PrintTranslationAnalysis(out, manager.GetBestHypothesis());
TranslationAnalysis::PrintTranslationAnalysis(manager.GetTranslationSystem(), out, manager.GetBestHypothesis());
m_detailedTranslationCollector->Write(m_lineNumber,out.str());
}

View File

@ -11,7 +11,7 @@ using namespace Moses;
namespace TranslationAnalysis {
void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
void PrintTranslationAnalysis(const TranslationSystem* system, std::ostream &os, const Hypothesis* hypo)
{
os << std::endl << "TRANSLATION HYPOTHESIS DETAILS:" << std::endl;
std::vector<const Hypothesis*> translationPath;
@ -93,7 +93,7 @@ void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
os << std::endl << std::endl;
if (doLMStats && lmCalls > 0) {
std::vector<unsigned int>::iterator acc = lmAcc.begin();
const LMList& lmlist = StaticData::Instance().GetAllLM();
const LMList& lmlist = system->GetLanguageModels();
LMList::const_iterator i = lmlist.begin();
for (; acc != lmAcc.end(); ++acc, ++i) {
char buf[256];

View File

@ -9,6 +9,7 @@
#include <iostream>
#include "Hypothesis.h"
#include "TranslationSystem.h"
namespace TranslationAnalysis
{
@ -17,7 +18,7 @@ namespace TranslationAnalysis
* print details about the translation represented in hypothesis to
* os. Included information: phrase alignment, words dropped, scores
*/
void PrintTranslationAnalysis(std::ostream &os, const Moses::Hypothesis* hypo);
void PrintTranslationAnalysis(const Moses::TranslationSystem* system, std::ostream &os, const Moses::Hypothesis* hypo);
}

View File

@ -27,6 +27,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "HypothesisStackCubePruning.h"
#include "DummyScoreProducers.h"
#include "TranslationOptionList.h"
#include "TranslationSystem.h"
namespace Moses
{
@ -57,17 +58,18 @@ class HypothesisScoreOrdererNoDistortion
class HypothesisScoreOrdererWithDistortion
{
public:
HypothesisScoreOrdererWithDistortion(const WordsRange* transOptRange) :
m_transOptRange(transOptRange) {}
HypothesisScoreOrdererWithDistortion(const WordsRange* transOptRange, const TranslationSystem* system) :
m_transOptRange(transOptRange), m_system(system) {}
const WordsRange* m_transOptRange;
const WordsRange* m_transOptRange;
const TranslationSystem* m_system;
bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const
{
assert (m_transOptRange != NULL);
const float weightDistortion = StaticData::Instance().GetWeightDistortion();
const DistortionScoreProducer *dsp = StaticData::Instance().GetDistortionScoreProducer();
const float weightDistortion = m_system->GetWeightDistortion();
const DistortionScoreProducer *dsp = m_system->GetDistortionProducer();
const float distortionScoreA = dsp->CalculateDistortionScore(
*hypoA,
hypoA->GetCurrSourceWordsRange(),
@ -108,7 +110,8 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
, BitmapContainer &parent
, const TranslationOptionList &translations
, const SquareMatrix &futureScore,
const InputType& itype)
const InputType& itype,
const TranslationSystem* system)
: m_initialized(false)
, m_prevBitmapContainer(prevBitmapContainer)
, m_parent(parent)
@ -172,7 +175,7 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
assert(m_hypotheses[0]->GetTotalScore() >= m_hypotheses[1]->GetTotalScore());
}
HypothesisScoreOrdererWithDistortion orderer (&transOptRange);
HypothesisScoreOrdererWithDistortion orderer (&transOptRange, system);
std::sort(m_hypotheses.begin(), m_hypotheses.end(), orderer);
// std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrdererNoDistortion());

View File

@ -188,7 +188,8 @@ class BackwardsEdge
, BitmapContainer &parent
, const TranslationOptionList &translations
, const SquareMatrix &futureScore,
const InputType& source);
const InputType& source,
const TranslationSystem* system);
~BackwardsEdge();
bool GetInitialized();

View File

@ -231,11 +231,11 @@ std::ostream& operator<<(std::ostream& out,const ConfusionNet& cn)
}
TranslationOptionCollection*
ConfusionNet::CreateTranslationOptionCollection() const
ConfusionNet::CreateTranslationOptionCollection(const TranslationSystem* system) const
{
size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage();
float translationOptionThreshold = StaticData::Instance().GetTranslationOptionThreshold();
TranslationOptionCollection *rv= new TranslationOptionCollectionConfusionNet(*this, maxNoTransOptPerCoverage, translationOptionThreshold);
TranslationOptionCollection *rv= new TranslationOptionCollectionConfusionNet(system, *this, maxNoTransOptPerCoverage, translationOptionThreshold);
assert(rv);
return rv;
}

View File

@ -14,6 +14,7 @@ namespace Moses
class FactorCollection;
class TranslationOptionCollection;
class Sentence;
class TranslationSystem;
class ConfusionNet : public InputType {
public:
@ -52,7 +53,7 @@ class ConfusionNet : public InputType {
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined
const Word& GetWord(size_t pos) const;
TranslationOptionCollection* CreateTranslationOptionCollection() const;
TranslationOptionCollection* CreateTranslationOptionCollection(const TranslationSystem* system) const;
const LabelList &GetLabelList(size_t /*startPos*/, size_t /*endPos*/) const
{

View File

@ -0,0 +1,63 @@
// $Id: PhraseDictionaryMemory.cpp 2477 2009-08-07 16:47:54Z bhaddow $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <iostream>
#include "DecodeFeature.h"
#include "StaticData.h"
using namespace std;
namespace Moses {
DecodeFeature::DecodeFeature(const std::vector<FactorType> &input, const std::vector<FactorType> &output) :
m_input(input), m_output(output){
m_inputFactors = FactorMask(input);
m_outputFactors = FactorMask(output);
VERBOSE(2,"DecodeFeature: input=" << m_inputFactors << " output=" << m_outputFactors << std::endl);
}
const FactorMask& DecodeFeature::GetOutputFactorMask() const
{
return m_outputFactors;
}
const FactorMask& DecodeFeature::GetInputFactorMask() const
{
return m_inputFactors;
}
const std::vector<FactorType>& DecodeFeature::GetInput() const
{
return m_input;
}
const std::vector<FactorType>& DecodeFeature::GetOutput() const
{
return m_output;
}
}

59
moses/src/DecodeFeature.h Normal file
View File

@ -0,0 +1,59 @@
// $Id: PhraseDictionaryMemory.cpp 2477 2009-08-07 16:47:54Z bhaddow $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#ifndef moses_DecodeFeature
#define moses_DecodeFeature
#include <vector>
#include "FactorTypeSet.h"
#include "FeatureFunction.h"
#include "TypeDef.h"
namespace Moses {
/**
* A feature on the decoding path (Generation or Translation)
**/
class DecodeFeature : public StatelessFeatureFunction {
public:
DecodeFeature(const std::vector<FactorType> &input, const std::vector<FactorType> &output);
//! returns output factor types as specified by the ini file
const FactorMask& GetOutputFactorMask() const;
//! returns input factor types as specified by the ini file
const FactorMask& GetInputFactorMask() const;
const std::vector<FactorType>& GetInput() const;
const std::vector<FactorType>& GetOutput() const;
private:
std::vector<FactorType> m_input;
std::vector<FactorType> m_output;
FactorMask m_inputFactors;
FactorMask m_outputFactors;
};
}
#endif

View File

@ -26,14 +26,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
DecodeStep::DecodeStep(const Dictionary *ptr, const DecodeStep* prev)
:m_ptr(ptr)
DecodeStep::DecodeStep(const DecodeFeature *decodeFeature, const DecodeStep* prev) :
m_decodeFeature(decodeFeature)
{
FactorMask prevOutputFactors;
if (prev) prevOutputFactors = prev->m_outputFactors;
m_outputFactors = prevOutputFactors;
FactorMask conflictMask = (m_outputFactors & ptr->GetOutputFactorMask());
m_outputFactors |= ptr->GetOutputFactorMask();
FactorMask conflictMask = (m_outputFactors & decodeFeature->GetOutputFactorMask());
m_outputFactors |= decodeFeature->GetOutputFactorMask();
FactorMask newOutputFactorMask = m_outputFactors ^ prevOutputFactors; //xor
m_newOutputFactors.resize(newOutputFactorMask.count());
m_conflictFactors.resize(conflictMask.count());
@ -49,16 +49,16 @@ DecodeStep::DecodeStep(const Dictionary *ptr, const DecodeStep* prev)
DecodeStep::~DecodeStep() {}
/** returns phrase table (dictionary) for translation step */
const PhraseDictionary &DecodeStep::GetPhraseDictionary() const
/** returns phrase feature (dictionary) for translation step */
const PhraseDictionaryFeature* DecodeStep::GetPhraseDictionaryFeature() const
{
return *static_cast<const PhraseDictionary*>(m_ptr);
return dynamic_cast<const PhraseDictionaryFeature*>(m_decodeFeature);
}
/** returns generation table (dictionary) for generation step */
const GenerationDictionary &DecodeStep::GetGenerationDictionary() const
/** returns generation feature (dictionary) for generation step */
const GenerationDictionary* DecodeStep::GetGenerationDictionaryFeature() const
{
return *static_cast<const GenerationDictionary*>(m_ptr);
return dynamic_cast<const GenerationDictionary*>(m_decodeFeature);
}
}

View File

@ -29,31 +29,33 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
class PhraseDictionary;
class DecodeFeature;
class PhraseDictionaryFeature;
class GenerationDictionary;
class TranslationOption;
class TranslationOptionCollection;
class PartialTranslOptColl;
class FactorCollection;
class InputType;
class TranslationSystem;
/*! Specification for a decoding step.
* The factored translation model consists of Translation and Generation
* steps, which consult a Dictionary of phrase translations or word
* generations. This class implements the specification for one of these
* steps, both the DecodeType and a pointer to the Dictionary
* steps, both the DecodeType and a pointer to the Translation or Generation Feature
**/
class DecodeStep
{
protected:
const Dictionary *m_ptr; //! pointer to translation/generation table
FactorMask m_outputFactors; //! mask of what factors exist on the output side after this decode step
std::vector<FactorType> m_conflictFactors; //! list of the factors that may conflict during this step
std::vector<FactorType> m_newOutputFactors; //! list of the factors that are new in this step, may be empty
const DecodeFeature* m_decodeFeature;
public:
DecodeStep(); //! not implemented
DecodeStep(const Dictionary *ptr, const DecodeStep* prevDecodeStep);
DecodeStep(const DecodeFeature *featurePtr, const DecodeStep* prevDecodeStep);
virtual ~DecodeStep();
//! mask of factors that are present after this decode step
@ -91,21 +93,23 @@ public:
return m_conflictFactors;
}
/*! returns phrase table (dictionary) for translation step */
const PhraseDictionary &GetPhraseDictionary() const;
/*! returns phrase table feature for translation step */
const PhraseDictionaryFeature* GetPhraseDictionaryFeature() const;
/*! returns generation table (dictionary) for generation step */
const GenerationDictionary &GetGenerationDictionary() const;
/*! returns generation table feature for generation step */
const GenerationDictionary* GetGenerationDictionaryFeature() const;
/*! returns dictionary in abstract class */
const Dictionary* GetDictionaryPtr() const {return m_ptr;}
/*! Given an input TranslationOption, extend it in some way (put results in outputPartialTranslOptColl) */
virtual void Process(const TranslationOption &inputPartialTranslOpt
virtual void Process(const TranslationSystem* system,
const TranslationOption &inputPartialTranslOpt
, const DecodeStep &decodeStep
, PartialTranslOptColl &outputPartialTranslOptColl
, TranslationOptionCollection *toc
, bool adhereTableLimit) const = 0;
/** Do any sentence specific initialisation */
virtual void InitializeBeforeSentenceProcessing(InputType const&) const {}
};

View File

@ -30,15 +30,11 @@ namespace Moses
{
using namespace std;
DecodeStepGeneration::DecodeStepGeneration(GenerationDictionary* dict, const DecodeStep* prev)
DecodeStepGeneration::DecodeStepGeneration(const GenerationDictionary* dict, const DecodeStep* prev)
: DecodeStep(dict, prev)
{
}
const GenerationDictionary &DecodeStepGeneration::GetGenerationDictionary() const
{
return *static_cast<const GenerationDictionary*>(m_ptr);
}
TranslationOption *DecodeStepGeneration::MergeGeneration(const TranslationOption& oldTO, Phrase &mergePhrase
, const ScoreComponentCollection& generationScore) const
@ -79,7 +75,8 @@ inline void IncrementIterators(vector< WordListIterator > &wordListIterVector
}
}
void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOpt
void DecodeStepGeneration::Process(const TranslationSystem* system
, const TranslationOption &inputPartialTranslOpt
, const DecodeStep &decodeStep
, PartialTranslOptColl &outputPartialTranslOptColl
, TranslationOptionCollection * /* toc */
@ -89,13 +86,13 @@ void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOp
{ // word deletion
TranslationOption *newTransOpt = new TranslationOption(inputPartialTranslOpt);
outputPartialTranslOptColl.Add(newTransOpt);
outputPartialTranslOptColl.Add(system, newTransOpt);
return;
}
// normal generation step
const GenerationDictionary &generationDictionary = decodeStep.GetGenerationDictionary();
const GenerationDictionary* generationDictionary = decodeStep.GetGenerationDictionaryFeature();
// const WordsRange &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();
const Phrase &targetPhrase = inputPartialTranslOpt.GetTargetPhrase();
@ -113,7 +110,7 @@ void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOp
const Word &word = targetPhrase.GetWord(currPos);
// consult dictionary for possible generations for this word
const OutputWordCollection *wordColl = generationDictionary.FindWord(word);
const OutputWordCollection *wordColl = generationDictionary->FindWord(word);
if (wordColl == NULL)
{ // word not found in generation dictionary
@ -165,7 +162,7 @@ void DecodeStepGeneration::Process(const TranslationOption &inputPartialTranslOp
TranslationOption *newTransOpt = MergeGeneration(inputPartialTranslOpt, genPhrase, generationScore);
if (newTransOpt != NULL)
{
outputPartialTranslOptColl.Add(newTransOpt);
outputPartialTranslOptColl.Add(system, newTransOpt);
}
// increment iterators

View File

@ -35,12 +35,11 @@ class ScoreComponentCollection;
class DecodeStepGeneration : public DecodeStep
{
public:
DecodeStepGeneration(GenerationDictionary* dict, const DecodeStep* prev);
DecodeStepGeneration(const GenerationDictionary* dict, const DecodeStep* prev);
//! returns phrase table (dictionary) for translation step
const GenerationDictionary &GetGenerationDictionary() const;
virtual void Process(const TranslationOption &inputPartialTranslOpt
virtual void Process(const TranslationSystem* system
, const TranslationOption &inputPartialTranslOpt
, const DecodeStep &decodeStep
, PartialTranslOptColl &outputPartialTranslOptColl
, TranslationOptionCollection *toc

View File

@ -28,15 +28,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
DecodeStepTranslation::DecodeStepTranslation(const PhraseDictionary* dict, const DecodeStep* prev)
: DecodeStep(dict, prev)
DecodeStepTranslation::DecodeStepTranslation(const PhraseDictionaryFeature* pdf, const DecodeStep* prev)
: DecodeStep(pdf, prev)
{
}
/*const PhraseDictionary &DecodeStepTranslation::GetPhraseDictionary() const
{
return *m_phraseDictionary;
}*/
TranslationOption *DecodeStepTranslation::MergeTranslation(const TranslationOption& oldTO, const TargetPhrase &targetPhrase) const
{
@ -50,7 +46,8 @@ TranslationOption *DecodeStepTranslation::MergeTranslation(const TranslationOpti
}
void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslOpt
void DecodeStepTranslation::Process(const TranslationSystem* system
, const TranslationOption &inputPartialTranslOpt
, const DecodeStep &decodeStep
, PartialTranslOptColl &outputPartialTranslOptColl
, TranslationOptionCollection *toc
@ -59,18 +56,20 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
if (inputPartialTranslOpt.GetTargetPhrase().GetSize() == 0)
{ // word deletion
outputPartialTranslOptColl.Add(new TranslationOption(inputPartialTranslOpt));
outputPartialTranslOptColl.Add(system, new TranslationOption(inputPartialTranslOpt));
return;
}
// normal trans step
const WordsRange &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();
const PhraseDictionary &phraseDictionary = decodeStep.GetPhraseDictionary();
// normal trans step
const WordsRange &sourceWordsRange = inputPartialTranslOpt.GetSourceWordsRange();
const PhraseDictionary* phraseDictionary =
decodeStep.GetPhraseDictionaryFeature()->GetDictionary(); ;
const size_t currSize = inputPartialTranslOpt.GetTargetPhrase().GetSize();
const size_t tableLimit = phraseDictionary.GetTableLimit();
const size_t tableLimit = phraseDictionary->GetTableLimit();
const TargetPhraseCollection *phraseColl= phraseDictionary.GetTargetPhraseCollection(toc->GetSource(),sourceWordsRange);
const TargetPhraseCollection *phraseColl=
phraseDictionary->GetTargetPhraseCollection(toc->GetSource(),sourceWordsRange);
if (phraseColl != NULL)
{
@ -86,7 +85,7 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
TranslationOption *newTransOpt = MergeTranslation(inputPartialTranslOpt, targetPhrase);
if (newTransOpt != NULL)
{
outputPartialTranslOptColl.Add( newTransOpt );
outputPartialTranslOptColl.Add(system, newTransOpt );
}
}
}
@ -97,16 +96,16 @@ void DecodeStepTranslation::Process(const TranslationOption &inputPartialTranslO
}
void DecodeStepTranslation::ProcessInitialTranslation(
const InputType &source
void DecodeStepTranslation::ProcessInitialTranslation(const TranslationSystem* system
, const InputType &source
,PartialTranslOptColl &outputPartialTranslOptColl
, size_t startPos, size_t endPos, bool adhereTableLimit) const
{
const PhraseDictionary &phraseDictionary = *static_cast<const PhraseDictionary*>(m_ptr);
const size_t tableLimit = phraseDictionary.GetTableLimit();
const PhraseDictionary* phraseDictionary = GetPhraseDictionaryFeature()->GetDictionary();
const size_t tableLimit = phraseDictionary->GetTableLimit();
const WordsRange wordsRange(startPos, endPos);
const TargetPhraseCollection *phraseColl = phraseDictionary.GetTargetPhraseCollection(source,wordsRange);
const TargetPhraseCollection *phraseColl = phraseDictionary->GetTargetPhraseCollection(source,wordsRange);
if (phraseColl != NULL)
{
@ -123,7 +122,7 @@ void DecodeStepTranslation::ProcessInitialTranslation(
for (iterTargetPhrase = phraseColl->begin() ; iterTargetPhrase != iterEnd ; ++iterTargetPhrase)
{
const TargetPhrase &targetPhrase = **iterTargetPhrase;
outputPartialTranslOptColl.Add ( new TranslationOption(wordsRange, targetPhrase, source) );
outputPartialTranslOptColl.Add (system, new TranslationOption(wordsRange, targetPhrase, source) );
VERBOSE(3,"\t" << targetPhrase << "\n");
}

View File

@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
class PhraseDictionary;
class PhraseDictionaryFeature;
class TargetPhrase;
//! subclass of DecodeStep for translation step
@ -36,24 +36,25 @@ class DecodeStepTranslation : public DecodeStep
{
public:
DecodeStepTranslation(); //! not implemented
DecodeStepTranslation(const PhraseDictionary* dict, const DecodeStep* prev);
DecodeStepTranslation(const PhraseDictionaryFeature* phraseFeature, const DecodeStep* prev);
//! returns phrase table (dictionary) for translation step
const PhraseDictionary &GetPhraseDictionary() const;
virtual void Process(const TranslationOption &inputPartialTranslOpt
virtual void Process(const TranslationSystem* system
, const TranslationOption &inputPartialTranslOpt
, const DecodeStep &decodeStep
, PartialTranslOptColl &outputPartialTranslOptColl
, TranslationOptionCollection *toc
, bool adhereTableLimit) const;
/*! initialize list of partial translation options by applying the first translation step
* Ideally, this function should be in DecodeStepTranslation class
*/
void ProcessInitialTranslation(
const InputType &source
void ProcessInitialTranslation(const TranslationSystem* system
, const InputType &source
, PartialTranslOptColl &outputPartialTranslOptColl
, size_t startPos, size_t endPos, bool adhereTableLimit) const;
private:
/*! create new TranslationOption from merging oldTO with mergePhrase
This function runs IsCompatible() to ensure the two can be merged

View File

@ -37,8 +37,6 @@ class Dictionary
protected:
const size_t m_numScoreComponent;
FactorMask m_inputFactors;
FactorMask m_outputFactors;
public:
//! Constructor
@ -46,16 +44,6 @@ public:
//!Destructor
virtual ~Dictionary();
//! returns output factor types as specified by the ini file
const FactorMask& GetOutputFactorMask() const
{
return m_outputFactors;
}
//! returns input factor types as specified by the ini file
const FactorMask& GetInputFactorMask() const
{
return m_inputFactors;
}
//! returns whether this dictionary is to be used for Translate or Generate
virtual DecodeType GetDecodeType() const = 0;

View File

@ -33,25 +33,20 @@ using namespace std;
namespace Moses
{
GenerationDictionary::GenerationDictionary(size_t numFeatures, ScoreIndexManager &scoreIndexManager)
: Dictionary(numFeatures)
GenerationDictionary::GenerationDictionary(size_t numFeatures, ScoreIndexManager &scoreIndexManager,
const std::vector<FactorType> &input,
const std::vector<FactorType> &output)
: Dictionary(numFeatures), DecodeFeature(input,output)
{
scoreIndexManager.AddScoreProducer(this);
}
bool GenerationDictionary::Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, const std::string &filePath
, FactorDirection direction)
bool GenerationDictionary::Load(const std::string &filePath, FactorDirection direction)
{
FactorCollection &factorCollection = FactorCollection::Instance();
const size_t numFeatureValuesInConfig = this->GetNumScoreComponents();
//factors
m_inputFactors = FactorMask(input);
m_outputFactors = FactorMask(output);
VERBOSE(2,"GenerationDictionary: input=" << m_inputFactors << " output=" << m_outputFactors << std::endl);
// data from file
InputFileStream inFile(filePath);
@ -76,17 +71,17 @@ bool GenerationDictionary::Load(const std::vector<FactorType> &input
// inputs
vector<string> factorString = Tokenize( token[0], "|" );
for (size_t i = 0 ; i < input.size() ; i++)
for (size_t i = 0 ; i < GetInput().size() ; i++)
{
FactorType factorType = input[i];
FactorType factorType = GetInput()[i];
const Factor *factor = factorCollection.AddFactor( direction, factorType, factorString[i]);
inputWord->SetFactor(factorType, factor);
}
factorString = Tokenize( token[1], "|" );
for (size_t i = 0 ; i < output.size() ; i++)
for (size_t i = 0 ; i < GetOutput().size() ; i++)
{
FactorType factorType = output[i];
FactorType factorType = GetOutput()[i];
const Factor *factor = factorCollection.AddFactor( direction, factorType, factorString[i]);
outputWord.SetFactor(factorType, factor);

View File

@ -29,7 +29,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Phrase.h"
#include "TypeDef.h"
#include "Dictionary.h"
#include "FeatureFunction.h"
#include "DecodeFeature.h"
namespace Moses
{
@ -42,7 +42,7 @@ typedef std::map < Word , ScoreComponentCollection > OutputWordCollection;
/** Implementation of a generation table in a trie.
*/
class GenerationDictionary : public Dictionary, public StatelessFeatureFunction
class GenerationDictionary : public Dictionary, public DecodeFeature
{
typedef std::map<const Word* , OutputWordCollection, WordComparer> Collection;
protected:
@ -55,7 +55,11 @@ public:
/** constructor.
* \param numFeatures number of score components, as specified in ini file
*/
GenerationDictionary(size_t numFeatures, ScoreIndexManager &scoreIndexManager);
GenerationDictionary(
size_t numFeatures,
ScoreIndexManager &scoreIndexManager,
const std::vector<FactorType> &input,
const std::vector<FactorType> &output);
virtual ~GenerationDictionary();
// returns Generate
@ -65,10 +69,7 @@ public:
}
//! load data file
bool Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, const std::string &filePath
, FactorDirection direction);
bool Load(const std::string &filePath, FactorDirection direction);
size_t GetNumScoreComponents() const;
std::string GetScoreProducerDescription() const;

View File

@ -59,7 +59,7 @@ Hypothesis::Hypothesis(Manager& manager, InputType const& source, const TargetPh
m_sourceCompleted.GetFirstGapPos()>0 ? m_sourceCompleted.GetFirstGapPos()-1 : NOT_FOUND)
, m_currTargetWordsRange(0, emptyTarget.GetSize()-1)
, m_wordDeleted(false)
, m_ffStates(StaticData::Instance().GetScoreIndexManager().GetStatefulFeatureFunctions().size())
, m_ffStates(manager.GetTranslationSystem()->GetStatefulFeatureFunctions().size())
, m_arcList(NULL)
, m_transOpt(NULL)
, m_manager(manager)
@ -70,7 +70,7 @@ Hypothesis::Hypothesis(Manager& manager, InputType const& source, const TargetPh
//_hash_computed = false;
//s_HypothesesCreated = 1;
ResetScore();
const vector<const StatefulFeatureFunction*>& ffs = StaticData::Instance().GetScoreIndexManager().GetStatefulFeatureFunctions();
const vector<const StatefulFeatureFunction*>& ffs = m_manager.GetTranslationSystem()->GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i)
m_ffStates[i] = ffs[i]->EmptyHypothesisState(source);
m_manager.GetSentenceStats().AddCreated();
@ -284,13 +284,13 @@ void Hypothesis::CalcScore(const SquareMatrix &futureScore)
// compute values of stateless feature functions that were not
// cached in the translation option-- there is no principled distinction
const vector<const StatelessFeatureFunction*>& sfs =
staticData.GetScoreIndexManager().GetStatelessFeatureFunctions();
m_manager.GetTranslationSystem()->GetStatelessFeatureFunctions();
for (unsigned i = 0; i < sfs.size(); ++i) {
sfs[i]->Evaluate(m_targetPhrase, &m_scoreBreakdown);
}
const vector<const StatefulFeatureFunction*>& ffs =
staticData.GetScoreIndexManager().GetStatefulFeatureFunctions();
m_manager.GetTranslationSystem()->GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i) {
m_ffStates[i] = ffs[i]->Evaluate(
*this,
@ -348,7 +348,8 @@ void Hypothesis::CalcRemainingScore()
IFVERBOSE(2) { t = clock(); } // track time excluding LM
// WORD PENALTY
m_scoreBreakdown.PlusEquals(staticData.GetWordPenaltyProducer(), - (float) m_currTargetWordsRange.GetNumWordsCovered());
m_scoreBreakdown.PlusEquals(m_manager.GetTranslationSystem()->GetWordPenaltyProducer()
, - (float)m_currTargetWordsRange.GetNumWordsCovered());
// TOTAL
m_totalScore = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore;

View File

@ -276,7 +276,8 @@ void HypothesisStackCubePruning::SetBitmapAccessor(const WordsBitmap &newBitmap
, *bmContainer
, transOptList
, futureScore,
m_manager.GetSource());
m_manager.GetSource(),
m_manager.GetTranslationSystem());
bmContainer->AddBackwardsEdge(edge);
}

View File

@ -36,6 +36,7 @@ class WordsRange;
class Factor;
class PhraseDictionary;
class TranslationOptionCollection;
class TranslationSystem;
typedef std::vector<Word> LabelList;
@ -115,7 +116,7 @@ public:
virtual void Print(std::ostream&) const =0;
//! create trans options specific to this InputType
virtual TranslationOptionCollection* CreateTranslationOptionCollection() const=0;
virtual TranslationOptionCollection* CreateTranslationOptionCollection(const TranslationSystem* system) const=0;
//! return substring. Only valid for Sentence class. TODO - get rid of this fn
virtual Phrase GetSubString(const WordsRange&) const =0;

View File

@ -18,6 +18,7 @@ You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <set>
#include "StaticData.h"
#include "LMList.h"
@ -30,6 +31,10 @@ using namespace std;
namespace Moses
{
LMList::~LMList()
{
}
void LMList::CleanUp()
{
RemoveAllInColl(m_coll);
}
@ -40,7 +45,7 @@ void LMList::CalcScore(const Phrase &phrase, float &retFullScore, float &retNGra
for (lmIter = begin(); lmIter != end(); ++lmIter)
{
const LanguageModel &lm = **lmIter;
const float weightLM = lm.GetWeight();
const float weightLM = lm.GetWeight();
float fullScore, nGramScore;

View File

@ -34,6 +34,7 @@ public:
,m_minInd(std::numeric_limits<size_t>::max())
,m_maxInd(0)
{}
void CleanUp();
~LMList();
void CalcScore(const Phrase &phrase, float &retFullScore, float &retNGramScore, ScoreComponentCollection* breakdown) const;

View File

@ -50,6 +50,13 @@ size_t LanguageModel::GetNumScoreComponents() const
return 1;
}
float LanguageModel::GetWeight() const {
size_t lmIndex = StaticData::Instance().GetScoreIndexManager().
GetBeginIndex(GetScoreBookkeepingID());
return StaticData::Instance().GetAllWeights()[lmIndex];
}
void LanguageModel::CalcScore(const Phrase &phrase
, float &fullScore
, float &ngramScore) const

View File

@ -118,18 +118,10 @@ public:
return m_sentenceEndArray;
}
//! scoring weight. Shouldn't this now be superceded by ScoreProducer???
float GetWeight() const
{
return m_weight;
}
void SetWeight(float weight)
{
m_weight = weight;
}
virtual std::string GetScoreProducerDescription() const = 0;
float GetWeight() const;
std::string GetScoreProducerWeightShortName() const
{
return "lm";

View File

@ -0,0 +1,69 @@
// $Id: LanguageModel.h 3078 2010-04-08 17:16:10Z hieuhoang1972 $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#ifndef moses_LanguageModelDelegate_h
#define moses_LanguageModelDelegate_h
#include "LanguageModelSingleFactor.h"
namespace Moses {
//! A language model which delegates all its calculation to another language model.
//! Used when you want to have the same language model with two different weights.
class LanguageModelDelegate: public LanguageModelSingleFactor {
public:
LanguageModelDelegate(bool registerScore, ScoreIndexManager &scoreIndexManager, LanguageModelSingleFactor* delegate) :
LanguageModelSingleFactor(registerScore, scoreIndexManager), m_delegate(delegate)
{
m_nGramOrder = m_delegate->GetNGramOrder();
m_factorType = m_delegate->GetFactorType();
m_sentenceStart = m_delegate->GetSentenceStart();
m_sentenceEnd = m_delegate->GetSentenceEnd();
m_sentenceStartArray = m_delegate->GetSentenceStartArray();
m_sentenceEndArray = m_delegate->GetSentenceEndArray();
}
virtual bool Load(const std::string &
, FactorType
, size_t)
{
/* do nothing */
return true;
}
virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState, unsigned int* len) const {
return m_delegate->GetValue(contextFactor, finalState, len);
}
private:
LanguageModelSingleFactor* m_delegate;
};
}
#endif

View File

@ -57,7 +57,6 @@ namespace LanguageModelFactory
, const std::vector<FactorType> &factorTypes
, size_t nGramOrder
, const std::string &languageModelFile
, float weight
, ScoreIndexManager &scoreIndexManager
, int dub)
{
@ -131,7 +130,7 @@ namespace LanguageModelFactory
switch (lm->GetLMType())
{
case SingleFactor:
if (! static_cast<LanguageModelSingleFactor*>(lm)->Load(languageModelFile, factorTypes[0], weight, nGramOrder))
if (! static_cast<LanguageModelSingleFactor*>(lm)->Load(languageModelFile, factorTypes[0], nGramOrder))
{
cerr << "single factor model failed" << endl;
delete lm;
@ -139,7 +138,7 @@ namespace LanguageModelFactory
}
break;
case MultiFactor:
if (! static_cast<LanguageModelMultiFactor*>(lm)->Load(languageModelFile, factorTypes, weight, nGramOrder))
if (! static_cast<LanguageModelMultiFactor*>(lm)->Load(languageModelFile, factorTypes, nGramOrder))
{
cerr << "multi factor model failed" << endl;
delete lm;

View File

@ -23,7 +23,6 @@ namespace LanguageModelFactory {
, const std::vector<FactorType> &factorTypes
, size_t nGramOrder
, const std::string &languageModelFile
, float weight
, ScoreIndexManager &scoreIndexManager
, int dub);

View File

@ -63,7 +63,6 @@ LanguageModelIRST::~LanguageModelIRST()
bool LanguageModelIRST::Load(const std::string &filePath,
FactorType factorType,
float weight,
size_t nGramOrder)
{
const char *SepString = " \t\n";
@ -72,7 +71,6 @@ bool LanguageModelIRST::Load(const std::string &filePath,
FactorCollection &factorCollection = FactorCollection::Instance();
m_factorType = factorType;
m_weight = weight;
m_nGramOrder = nGramOrder;
// get name of LM file and, if any, of the micro-macro map file

View File

@ -70,7 +70,6 @@ public:
~LanguageModelIRST();
bool Load(const std::string &filePath
, FactorType factorType
, float weight
, size_t nGramOrder);
virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len=0) const;

View File

@ -17,7 +17,6 @@ LanguageModelInternal::LanguageModelInternal(bool registerScore, ScoreIndexManag
bool LanguageModelInternal::Load(const std::string &filePath
, FactorType factorType
, float weight
, size_t nGramOrder)
{
assert(nGramOrder <= 3);
@ -33,7 +32,6 @@ bool LanguageModelInternal::Load(const std::string &filePath
m_filePath = filePath;
m_factorType = factorType;
m_weight = weight;
m_nGramOrder = nGramOrder;
// make sure start & end tags in factor collection

View File

@ -29,7 +29,6 @@ public:
LanguageModelInternal(bool registerScore, ScoreIndexManager &scoreIndexManager);
bool Load(const std::string &filePath
, FactorType factorType
, float weight
, size_t nGramOrder);
float GetValue(const std::vector<const Word*> &contextFactor
, State* finalState = 0

View File

@ -61,11 +61,9 @@ public:
bool Load(const std::string &filePath
, const std::vector<FactorType> &factorTypes
, float weight
, size_t nGramOrder)
{
m_factorTypes = FactorMask(factorTypes);
m_weight = weight;
m_filePath = filePath;
m_nGramOrder = nGramOrder;
@ -82,7 +80,7 @@ public:
m_sentenceEndArray[factorType] = factorCollection.AddFactor(Output, factorType, EOS_);
}
return m_lmImpl->Load(filePath, m_implFactor, weight, nGramOrder);
return m_lmImpl->Load(filePath, m_implFactor, nGramOrder);
}
float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const

View File

@ -44,7 +44,6 @@ protected:
public:
virtual bool Load(const std::string &filePath
, const std::vector<FactorType> &factorTypes
, float weight
, size_t nGramOrder) = 0;
LMType GetLMType() const

View File

@ -47,7 +47,7 @@ namespace Moses
}
bool LanguageModelParallelBackoff::Load(const std::string &filePath, const std::vector<FactorType> &factorTypes, float weight, size_t nGramOrder)
bool LanguageModelParallelBackoff::Load(const std::string &filePath, const std::vector<FactorType> &factorTypes, size_t nGramOrder)
{
cerr << "Loading Language Model Parallel Backoff!!!\n";
@ -99,7 +99,6 @@ bool LanguageModelParallelBackoff::Load(const std::string &filePath, const std::
cerr << "fngramLM reads!\n";
m_weight = weight;
m_filePath = filePath;
m_nGramOrder= nGramOrder;

View File

@ -83,7 +83,7 @@ public:
~LanguageModelParallelBackoff();
bool Load(const std::string &filePath, const std::vector<FactorType> &factorTypes, float weight, size_t nGramOrder);
bool Load(const std::string &filePath, const std::vector<FactorType> &factorTypes, size_t nGramOrder);
VocabIndex GetLmID( const std::string &str ) const;

View File

@ -32,13 +32,12 @@ namespace Moses
{
using namespace std;
bool LanguageModelRandLM::Load(const std::string &filePath, FactorType factorType, float weight,
bool LanguageModelRandLM::Load(const std::string &filePath, FactorType factorType,
size_t nGramOrder) {
cerr << "Loading LanguageModelRandLM..." << endl;
FactorCollection &factorCollection = FactorCollection::Instance();
m_filePath = filePath;
m_factorType = factorType;
m_weight = weight;
m_nGramOrder = nGramOrder;
int cache_MB = 50; // increase cache size
m_lm = randlm::RandLM::initRandLM(filePath, nGramOrder, cache_MB);

View File

@ -40,7 +40,7 @@ class LanguageModelRandLM : public LanguageModelSingleFactor {
public:
LanguageModelRandLM(bool registerScore, ScoreIndexManager &scoreIndexManager)
: LanguageModelSingleFactor(registerScore, scoreIndexManager), m_lm(0) {}
bool Load(const std::string &filePath, FactorType factorType, float weight, size_t nGramOrder);
bool Load(const std::string &filePath, FactorType factorType, size_t nGramOrder);
virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len=0) const;
~LanguageModelRandLM() {
delete m_lm;

View File

@ -21,11 +21,9 @@ LanguageModelRemote::LanguageModelRemote(bool registerScore, ScoreIndexManager &
bool LanguageModelRemote::Load(const std::string &filePath
, FactorType factorType
, float weight
, size_t nGramOrder)
{
m_factorType = factorType;
m_weight = weight;
m_nGramOrder = nGramOrder;
int cutAt = filePath.find(':',0);

View File

@ -35,7 +35,6 @@ class LanguageModelRemote : public LanguageModelSingleFactor {
virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0, unsigned int* len = 0) const;
bool Load(const std::string &filePath
, FactorType factorType
, float weight
, size_t nGramOrder);
};

View File

@ -50,13 +50,11 @@ LanguageModelSRI::~LanguageModelSRI()
bool LanguageModelSRI::Load(const std::string &filePath
, FactorType factorType
, float weight
, size_t nGramOrder)
{
m_srilmVocab = new ::Vocab();
m_srilmModel = new Ngram(*m_srilmVocab, nGramOrder);
m_factorType = factorType;
m_weight = weight;
m_nGramOrder = nGramOrder;
m_filePath = filePath;

View File

@ -54,7 +54,6 @@ public:
~LanguageModelSRI();
bool Load(const std::string &filePath
, FactorType factorType
, float weight
, size_t nGramOrder);
virtual float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = 0, unsigned int* len = 0) const;

View File

@ -46,7 +46,6 @@ public:
virtual ~LanguageModelSingleFactor();
virtual bool Load(const std::string &filePath
, FactorType factorType
, float weight
, size_t nGramOrder) = 0;
LMType GetLMType() const
@ -71,14 +70,6 @@ public:
{
return m_factorType;
}
float GetWeight() const
{
return m_weight;
}
void SetWeight(float weight)
{
m_weight = weight;
}
std::string GetScoreProducerDescription() const;
};

View File

@ -58,11 +58,9 @@ public:
}
bool Load(const std::string &filePath
, FactorType factorType
, float weight
, size_t nGramOrder)
{
m_factorType = factorType;
m_weight = weight;
m_filePath = filePath;
m_nGramOrder = nGramOrder;
@ -73,7 +71,7 @@ public:
m_sentenceStartArray[m_factorType] = factorCollection.AddFactor(Output, m_factorType, BOS_);
m_sentenceEndArray[m_factorType] = factorCollection.AddFactor(Output, m_factorType, EOS_);
return m_lmImpl->Load(filePath, m_factorType, weight, nGramOrder);
return m_lmImpl->Load(filePath, m_factorType, nGramOrder);
}
float GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL, unsigned int* len = NULL) const

View File

@ -11,6 +11,7 @@ libmoses_la_HEADERS = \
ChartRule.h \
ChartRuleCollection.h \
ConfusionNet.h \
DecodeFeature.h \
DecodeGraph.h \
DecodeStep.h \
DecodeStepGeneration.h \
@ -91,6 +92,7 @@ libmoses_la_HEADERS = \
TranslationOptionCollectionConfusionNet.h \
TranslationOptionCollectionText.h \
TranslationOptionList.h \
TranslationSystem.h \
TreeInput.h \
TrellisPath.h \
TrellisPathCollection.h \
@ -136,6 +138,7 @@ libmoses_la_SOURCES = \
ChartRule.cpp \
ChartRuleCollection.cpp \
ConfusionNet.cpp \
DecodeFeature.cpp \
DecodeGraph.cpp \
DecodeStep.cpp \
DecodeStepGeneration.cpp \
@ -214,6 +217,7 @@ libmoses_la_SOURCES = \
TranslationOptionCollectionConfusionNet.cpp \
TranslationOptionCollectionText.cpp \
TranslationOptionList.cpp \
TranslationSystem.cpp \
TreeInput.cpp \
TrellisPath.cpp \
TrellisPathCollection.cpp \

View File

@ -50,16 +50,16 @@ using namespace std;
namespace Moses
{
Manager::Manager(InputType const& source, SearchAlgorithm searchAlgorithm)
:m_source(source)
,m_transOptColl(source.CreateTranslationOptionCollection())
Manager::Manager(InputType const& source, SearchAlgorithm searchAlgorithm, const TranslationSystem* system)
:m_system(system)
,m_transOptColl(source.CreateTranslationOptionCollection(system))
,m_search(Search::CreateSearch(*this, source, searchAlgorithm, *m_transOptColl))
,m_start(clock())
,interrupted_flag(0)
,m_hypoId(0)
,m_source(source)
{
const StaticData &staticData = StaticData::Instance();
staticData.InitializeBeforeSentenceProcessing(source);
m_system->InitializeBeforeSentenceProcessing(source);
}
Manager::~Manager()
@ -67,7 +67,7 @@ Manager::~Manager()
delete m_transOptColl;
delete m_search;
StaticData::Instance().CleanUpAfterSentenceProcessing();
m_system->CleanUpAfterSentenceProcessing();
clock_t end = clock();
float et = (end - m_start);
@ -83,13 +83,11 @@ Manager::~Manager()
void Manager::ProcessSentence()
{
// reset statistics
const StaticData &staticData = StaticData::Instance();
ResetSentenceStats(m_source);
// collect translation options for this sentence
vector <DecodeGraph*> decodeGraphs = staticData.GetDecodeStepVL(m_source);
m_transOptColl->CreateTranslationOptions(decodeGraphs);
RemoveAllInColl(decodeGraphs);
m_system->InitializeBeforeSentenceProcessing(m_source);
m_transOptColl->CreateTranslationOptions();
// some reporting on how long this took
clock_t gotOptions = clock();
@ -305,9 +303,8 @@ void Manager::CalcDecoderStatistics() const
}
}
void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo, size_t &linkId)
void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo, size_t &linkId, const TranslationSystem* system)
{
const StaticData &staticData = StaticData::Instance();
const Hypothesis *prevHypo = hypo->GetPrevHypo();
@ -318,7 +315,7 @@ void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo
<< "\ta=";
// phrase table scores
const std::vector<PhraseDictionaryFeature*> &phraseTables = staticData.GetPhraseDictionaries();
const std::vector<PhraseDictionaryFeature*> &phraseTables = system->GetPhraseDictionaries();
std::vector<PhraseDictionaryFeature*>::const_iterator iterPhraseTable;
for (iterPhraseTable = phraseTables.begin() ; iterPhraseTable != phraseTables.end() ; ++iterPhraseTable)
{
@ -335,7 +332,7 @@ void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo
// language model scores
outputWordGraphStream << "\tl=";
const LMList &lmList = staticData.GetAllLM();
const LMList &lmList = system->GetLanguageModels();
LMList::const_iterator iterLM;
for (iterLM = lmList.begin() ; iterLM != lmList.end() ; ++iterLM)
{
@ -353,10 +350,10 @@ void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo
// re-ordering
outputWordGraphStream << "\tr=";
outputWordGraphStream << hypo->GetScoreBreakdown().GetScoreForProducer(staticData.GetDistortionScoreProducer());
outputWordGraphStream << hypo->GetScoreBreakdown().GetScoreForProducer(system->GetDistortionProducer());
// lexicalised re-ordering
const std::vector<LexicalReordering*> &lexOrderings = staticData.GetReorderModels();
const std::vector<LexicalReordering*> &lexOrderings = system->GetReorderModels();
std::vector<LexicalReordering*>::const_iterator iterLexOrdering;
for (iterLexOrdering = lexOrderings.begin() ; iterLexOrdering != lexOrderings.end() ; ++iterLexOrdering)
{
@ -398,7 +395,7 @@ void Manager::GetWordGraph(long translationId, std::ostream &outputWordGraphStre
for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo)
{
const Hypothesis *hypo = *iterHypo;
OutputWordGraph(outputWordGraphStream, hypo, linkId);
OutputWordGraph(outputWordGraphStream, hypo, linkId, m_system);
if (outputNBest)
{
@ -409,7 +406,7 @@ void Manager::GetWordGraph(long translationId, std::ostream &outputWordGraphStre
for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList)
{
const Hypothesis *loserHypo = *iterArcList;
OutputWordGraph(outputWordGraphStream, loserHypo, linkId);
OutputWordGraph(outputWordGraphStream, loserHypo, linkId,m_system);
}
}
} //if (outputNBest)

View File

@ -96,6 +96,7 @@ class Manager
Manager();
Manager(Manager const&);
void operator=(Manager const&);
const TranslationSystem* m_system;
protected:
// data
// InputType const& m_source; /**< source sentence to be translated */
@ -118,9 +119,10 @@ protected:
public:
InputType const& m_source; /**< source sentence to be translated */
Manager(InputType const& source, SearchAlgorithm searchAlgorithm);
Manager(InputType const& source, SearchAlgorithm searchAlgorithm, const TranslationSystem* system);
~Manager();
const TranslationOptionCollection* getSntTranslationOptions();
const TranslationOptionCollection* getSntTranslationOptions();
const TranslationSystem* GetTranslationSystem() {return m_system;}
void ProcessSentence();
const Hypothesis *GetBestHypothesis() const;

View File

@ -77,6 +77,7 @@ Parameter::Parameter()
AddParam("early-discarding-threshold", "edt", "threshold for constructing hypotheses based on estimate cost");
AddParam("verbose", "v", "verbosity level of the logging");
AddParam("weight-d", "d", "weight(s) for distortion (reordering components)");
AddParam("weight-lr", "lr", "weight(s) for lexicalized reordering, if not included in weight-d");
AddParam("weight-generation", "g", "weight(s) for generation components");
AddParam("weight-i", "I", "weight(s) for word insertion - used for parameters from confusion network and lattice input links");
AddParam("weight-l", "lm", "weight(s) for language models");
@ -85,7 +86,6 @@ Parameter::Parameter()
AddParam("weight-w", "w", "weight for word penalty");
AddParam("weight-u", "u", "weight for unknown word penalty");
AddParam("weight-e", "e", "weight for word deletion");
AddParam("weight-file", "wf", "file containing labeled weights");
AddParam("output-factors", "list if factors in the output");
AddParam("cache-path", "?");
AddParam("distortion-limit", "dl", "distortion (reordering) limit in maximum number of words (0 = monotone, -1 = unlimited)");
@ -132,6 +132,7 @@ Parameter::Parameter()
AddParam("glue-rule-type", "Left branching, or both branching. 0=left. 2=both. 1=right(not implemented). Default=0");
AddParam("output-hypo-score", "Output the hypo score to stdout with the output string. For search error analysis. Default is false");
AddParam("unknown-lhs", "file containing target lhs of unknown words. 1 per line: LHS prob");
AddParam("translation-systems", "specify multiple translation systems, each consisting of an id, followed by a set of models ids, eg '0 T1 R1 L0'");
}
Parameter::~Parameter()

View File

@ -35,9 +35,9 @@ PartialTranslOptColl::PartialTranslOptColl()
/** add a partial translation option to the collection (without pruning) */
void PartialTranslOptColl::AddNoPrune(TranslationOption *partialTranslOpt)
void PartialTranslOptColl::AddNoPrune(const TranslationSystem* system, TranslationOption *partialTranslOpt)
{
partialTranslOpt->CalcScore();
partialTranslOpt->CalcScore(system);
if (partialTranslOpt->GetFutureScore() >= m_worstScore)
{
m_list.push_back(partialTranslOpt);
@ -54,10 +54,10 @@ void PartialTranslOptColl::AddNoPrune(TranslationOption *partialTranslOpt)
/** add a partial translation option to the collection, prune if necessary.
* This is done similar to the Prune() in TranslationOptionCollection */
void PartialTranslOptColl::Add(TranslationOption *partialTranslOpt)
void PartialTranslOptColl::Add(const TranslationSystem* system, TranslationOption *partialTranslOpt)
{
// add
AddNoPrune( partialTranslOpt );
AddNoPrune(system,partialTranslOpt );
// done if not too large (lazy pruning, only if twice as large as max)
if ( m_list.size() > 2 * m_maxSize ) {

View File

@ -59,8 +59,8 @@ public:
RemoveAllInColl( m_list );
}
void AddNoPrune(TranslationOption *partialTranslOpt);
void Add(TranslationOption *partialTranslOpt);
void AddNoPrune(const TranslationSystem* system, TranslationOption *partialTranslOpt);
void Add(const TranslationSystem* system, TranslationOption *partialTranslOpt);
void Prune();
/** returns list of translation options */

View File

@ -51,10 +51,9 @@ PhraseDictionaryFeature::PhraseDictionaryFeature
, size_t tableLimit
, const std::string &targetFile // default param
, const std::string &alignmentsFile) // default param
:m_numScoreComponent(numScoreComponent),
:DecodeFeature(input,output)
, m_numScoreComponent(numScoreComponent),
m_numInputScores(numInputScores),
m_input(input),
m_output(output),
m_filePath(filePath),
m_weight(weight),
m_tableLimit(tableLimit),
@ -64,17 +63,14 @@ PhraseDictionaryFeature::PhraseDictionaryFeature
{
const StaticData& staticData = StaticData::Instance();
const_cast<ScoreIndexManager&>(staticData.GetScoreIndexManager()).AddScoreProducer(this);
//Thread-safe phrase dictionaries get loaded now
if (implementation == Memory || implementation == SCFG || implementation == OnDisk || implementation == SuffixArray) {
m_threadSafePhraseDictionary.reset(LoadPhraseTable());
m_useThreadSafePhraseDictionary = true;
} else {
//Other types are lazy-loaded into thread-specific data
m_useThreadSafePhraseDictionary = false;
}
}
PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable() {
PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSystem* system) {
const StaticData& staticData = StaticData::Instance();
if (m_implementation == Memory)
{ // memory phrase table
@ -90,27 +86,24 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable() {
}
PhraseDictionaryMemory* pdm = new PhraseDictionaryMemory(m_numScoreComponent,this);
assert(pdm->Load(m_input
, m_output
assert(pdm->Load(GetInput(), GetOutput()
, m_filePath
, m_weight
, m_tableLimit
, staticData.GetAllLM()
, staticData.GetWeightWordPenalty()));
, system->GetLanguageModels()
, system->GetWeightWordPenalty()));
return pdm;
}
else if (m_implementation == Binary)
{
const StaticData& staticData = StaticData::Instance();
PhraseDictionaryTreeAdaptor* pdta = new PhraseDictionaryTreeAdaptor(m_numScoreComponent, m_numInputScores,this);
assert(pdta->Load(
m_input
, m_output
assert(pdta->Load( GetInput()
, GetOutput()
, m_filePath
, m_weight
, m_tableLimit
, staticData.GetAllLM()
, staticData.GetWeightWordPenalty()));
, system->GetLanguageModels()
, system->GetWeightWordPenalty()));
return pdta;
}
else if (m_implementation == SCFG)
@ -122,13 +115,13 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable() {
}
PhraseDictionarySCFG* pdm = new PhraseDictionarySCFG(m_numScoreComponent,this);
assert(pdm->Load(m_input
, m_output
assert(pdm->Load(GetInput()
, GetOutput()
, m_filePath
, m_weight
, m_tableLimit
, staticData.GetAllLM()
, staticData.GetWeightWordPenalty()));
, system->GetLanguageModels()
, system->GetWordPenaltyProducer()));
return pdm;
}
else if (m_implementation == OnDisk)
@ -136,13 +129,13 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable() {
PhraseDictionaryOnDisk* pdta = new PhraseDictionaryOnDisk(m_numScoreComponent, this);
pdta->Load(
m_input
, m_output
GetInput()
, GetOutput()
, m_filePath
, m_weight
, m_tableLimit);
//, staticData.GetAllLM()
//, staticData.GetWeightWordPenalty()));
, m_tableLimit
, system->GetLanguageModels()
, system->GetWordPenaltyProducer());
assert(pdta);
return pdta;
}
@ -150,13 +143,13 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable() {
{
PhraseDictionaryTreeAdaptor* pdta = new PhraseDictionaryTreeAdaptor(m_numScoreComponent, m_numInputScores,this);
assert(pdta->Load(
m_input
, m_output
GetInput()
, GetOutput()
, m_filePath
, m_weight
, m_tableLimit
, staticData.GetAllLM()
, staticData.GetWeightWordPenalty()));
, system->GetLanguageModels()
, system->GetWeightWordPenalty()));
return pdta;
}
else if (m_implementation == SuffixArray)
@ -164,14 +157,14 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable() {
#ifndef WIN32
PhraseDictionaryDynSuffixArray *pd = new PhraseDictionaryDynSuffixArray(m_numScoreComponent, this);
if(!(pd->Load(
m_input
,m_output
GetInput()
,GetOutput()
,m_filePath
,m_targetFile
, m_alignmentsFile
, m_weight, m_tableLimit
, staticData.GetAllLM()
, staticData.GetWeightWordPenalty())))
, system->GetLanguageModels()
, system->GetWeightWordPenalty())))
{
std::cerr << "FAILED TO LOAD\n" << endl;
delete pd;
@ -182,28 +175,52 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable() {
#else
assert(false);
#endif
}
}
const PhraseDictionary* PhraseDictionaryFeature::GetDictionary(const InputType& source)
{
PhraseDictionary* dict = GetDictionary();
dict->InitializeForInput(source);
return dict;
} else {
std::cerr << "Unknown phrase table type " << m_implementation << endl;
assert(false);
}
}
PhraseDictionary* PhraseDictionaryFeature::GetDictionary() {
PhraseDictionary* dict;
if (m_useThreadSafePhraseDictionary) {
dict = m_threadSafePhraseDictionary.get();
} else {
if (!m_threadUnsafePhraseDictionary.get()) {
m_threadUnsafePhraseDictionary.reset(LoadPhraseTable());
}
dict = m_threadUnsafePhraseDictionary.get();
void PhraseDictionaryFeature::InitDictionary(const TranslationSystem* system)
{
//Thread-safe phrase dictionaries get loaded now
if (m_useThreadSafePhraseDictionary && !m_threadSafePhraseDictionary.get()) {
IFVERBOSE(1)
PrintUserTime("Start loading phrase table from " + m_filePath);
m_threadSafePhraseDictionary.reset(LoadPhraseTable(system));
IFVERBOSE(1)
PrintUserTime("Finished loading phrase tables");
}
//Other types will be lazy loaded
}
//Called when we start translating a new sentence
void PhraseDictionaryFeature::InitDictionary(const TranslationSystem* system, const InputType& source)
{
PhraseDictionary* dict;
if (m_useThreadSafePhraseDictionary) {
//thread safe dictionary should already be loaded
dict = m_threadSafePhraseDictionary.get();
} else {
//thread-unsafe dictionary may need to be loaded if this is a new thread.
if (!m_threadUnsafePhraseDictionary.get()) {
m_threadUnsafePhraseDictionary.reset(LoadPhraseTable(system));
}
assert(dict);
return dict;
dict = m_threadUnsafePhraseDictionary.get();
}
assert(dict);
dict->InitializeForInput(source);
}
const PhraseDictionary* PhraseDictionaryFeature::GetDictionary() const {
PhraseDictionary* dict;
if (m_useThreadSafePhraseDictionary) {
dict = m_threadSafePhraseDictionary.get();
} else {
dict = m_threadUnsafePhraseDictionary.get();
}
assert(dict);
return dict;
}

View File

@ -37,7 +37,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TargetPhrase.h"
#include "Dictionary.h"
#include "TargetPhraseCollection.h"
#include "FeatureFunction.h"
#include "DecodeFeature.h"
namespace Moses
{
@ -47,6 +47,7 @@ class InputType;
class WordsRange;
class ChartRuleCollection;
class CellCollection;
class TranslationSystem;
class PhraseDictionaryFeature;
/**
@ -85,7 +86,7 @@ class PhraseDictionary: public Dictionary {
/**
* Represents a feature derived from a phrase table.
*/
class PhraseDictionaryFeature : public StatelessFeatureFunction
class PhraseDictionaryFeature : public DecodeFeature
{
@ -114,20 +115,22 @@ class PhraseDictionaryFeature : public StatelessFeatureFunction
size_t GetNumScoreComponents() const;
size_t GetNumInputScores() const;
const PhraseDictionary* GetDictionary(const InputType& source);
// TODO - get rid of this, make Cleanup() const. only to be called by static data
PhraseDictionary* GetDictionary();
//Initialises the dictionary (may involve loading from file)
void InitDictionary(const TranslationSystem* system);
//Initialise the dictionary for this source (in this thread)
void InitDictionary(const TranslationSystem* system,const InputType& source);
//Get the dictionary. Be sure to initialise it first.
const PhraseDictionary* GetDictionary() const;
private:
/** Load the appropriate phrase table */
PhraseDictionary* LoadPhraseTable();
PhraseDictionary* LoadPhraseTable(const TranslationSystem* system);
size_t m_numScoreComponent;
unsigned m_numInputScores;
std::vector<FactorType> m_input;
std::vector<FactorType> m_output;
std::string m_filePath;
std::vector<float> m_weight;
size_t m_tableLimit;

View File

@ -50,10 +50,6 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
m_tableLimit = tableLimit;
//factors
m_inputFactors = FactorMask(input);
m_outputFactors = FactorMask(output);
VERBOSE(2,"PhraseDictionaryMemory: input=" << m_inputFactors << " output=" << m_outputFactors << std::endl);
// data from file
InputFileStream inFile(filePath);

View File

@ -27,6 +27,7 @@
using namespace std;
namespace Moses
{
PhraseDictionaryOnDisk::~PhraseDictionaryOnDisk()
@ -38,12 +39,14 @@ bool PhraseDictionaryOnDisk::Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, const std::string &filePath
, const std::vector<float> &weight
, size_t tableLimit)
, size_t tableLimit
, const LMList& languageModels
, const WordPenaltyProducer* wpProducer)
{
m_languageModels = &(languageModels);
m_wpProducer = wpProducer;
m_filePath = filePath;
m_tableLimit = tableLimit;
m_inputFactors = FactorMask(input);
m_outputFactors = FactorMask(output);
m_inputFactorsVec = input;
m_outputFactorsVec = output;

View File

@ -35,6 +35,7 @@ namespace Moses
class TargetPhraseCollection;
class ProcessedRuleStackOnDisk;
class CellCollection;
class WordPenaltyProducer;
class PhraseDictionaryOnDisk : public PhraseDictionary
{
@ -42,6 +43,8 @@ class PhraseDictionaryOnDisk : public PhraseDictionary
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryOnDisk&);
protected:
const LMList* m_languageModels;
const WordPenaltyProducer* m_wpProducer;;
std::vector<FactorType> m_inputFactorsVec, m_outputFactorsVec;
std::vector<float> m_weight;
std::string m_filePath;
@ -59,7 +62,7 @@ protected:
public:
PhraseDictionaryOnDisk(size_t numScoreComponent, PhraseDictionaryFeature* feature)
: MyBase(numScoreComponent, feature)
: MyBase(numScoreComponent, feature), m_languageModels(NULL)
{}
virtual ~PhraseDictionaryOnDisk();
@ -70,7 +73,9 @@ public:
, const std::vector<FactorType> &output
, const std::string &filePath
, const std::vector<float> &weight
, size_t tableLimit);
, size_t tableLimit,
const LMList& languageModels,
const WordPenaltyProducer* wpProducer);
std::string GetScoreProducerDescription() const
{ return "BerkeleyPt"; }

View File

@ -34,8 +34,6 @@ namespace Moses
bool adhereTableLimit,const CellCollection &cellColl) const
{
const StaticData &staticData = StaticData::Instance();
float weightWP = staticData.GetWeightWordPenalty();
const LMList &lmList = staticData.GetAllLM();
size_t rulesLimit = StaticData::Instance().GetRuleLimit();
// source phrase
@ -232,8 +230,8 @@ namespace Moses
,m_outputFactorsVec
,*this
,m_weight
,weightWP
,lmList
,m_wpProducer
,*m_languageModels
,*cachedSource
,m_filePath
, m_dbWrapper.GetVocab());

View File

@ -79,19 +79,16 @@ bool PhraseDictionarySCFG::Load(const std::vector<FactorType> &input
, const vector<float> &weight
, size_t tableLimit
, const LMList &languageModels
, float weightWP)
, const WordPenaltyProducer* wpProducer)
{
m_filePath = filePath;
m_tableLimit = tableLimit;
//factors
m_inputFactors = FactorMask(input);
m_outputFactors = FactorMask(output);
// data from file
InputFileStream inFile(filePath);
bool ret = Load(input, output, inFile, weight, tableLimit, languageModels, weightWP);
bool ret = Load(input, output, inFile, weight, tableLimit, languageModels, wpProducer);
return ret;
}
@ -101,14 +98,13 @@ bool PhraseDictionarySCFG::Load(const std::vector<FactorType> &input
, const std::vector<float> &weight
, size_t tableLimit
, const LMList &languageModels
, float weightWP)
, const WordPenaltyProducer* wpProducer)
{
PrintUserTime("Start loading new format pt model");
const StaticData &staticData = StaticData::Instance();
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
VERBOSE(2,"PhraseDictionarySCFG: input=" << m_inputFactors << " output=" << m_outputFactors << std::endl);
string line;
size_t count = 0;
@ -171,7 +167,7 @@ bool PhraseDictionarySCFG::Load(const std::vector<FactorType> &input
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
targetPhrase->SetScoreChart(GetFeature(), scoreVector, weight, languageModels);
targetPhrase->SetScoreChart(GetFeature(), scoreVector, weight, languageModels,wpProducer);
// count info for backoff
if (tokens.size() >= 6)

View File

@ -59,7 +59,7 @@ namespace Moses
, const std::vector<float> &weight
, size_t tableLimit
, const LMList &languageModels
, float weightWP);
, const WordPenaltyProducer* wpProducer);
void DeleteDuplicates(ProcessedRuleColl &nodes) const; // keep only backoff, if it exists
@ -88,7 +88,7 @@ namespace Moses
, const std::vector<float> &weight
, size_t tableLimit
, const LMList &languageModels
, float weightWP);
, const WordPenaltyProducer* wpProducer);
const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase &source) const;

View File

@ -50,10 +50,6 @@ bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
return false;
}
// set Dictionary members
m_inputFactors = FactorMask(input);
m_outputFactors = FactorMask(output);
VERBOSE(2,"PhraseDictionaryTreeAdaptor: input=" << m_inputFactors << " output=" << m_outputFactors << std::endl);
// set PhraseDictionary members
m_tableLimit=tableLimit;

View File

@ -16,9 +16,8 @@ float ScoreComponentCollection::GetWeightedScore() const
return ret;
}
void ScoreComponentCollection::ZeroAllLM()
void ScoreComponentCollection::ZeroAllLM(const LMList& lmList)
{
const LMList &lmList = StaticData::Instance().GetAllLM();
for (size_t ind = lmList.GetMinIndex(); ind <= lmList.GetMaxIndex(); ++ind)
{
@ -26,9 +25,8 @@ void ScoreComponentCollection::ZeroAllLM()
}
}
void ScoreComponentCollection::PlusEqualsAllLM(const ScoreComponentCollection& rhs)
void ScoreComponentCollection::PlusEqualsAllLM(const LMList& lmList, const ScoreComponentCollection& rhs)
{
const LMList &lmList = StaticData::Instance().GetAllLM();
for (size_t ind = lmList.GetMinIndex(); ind <= lmList.GetMaxIndex(); ++ind)
{

View File

@ -24,6 +24,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <numeric>
#include <cassert>
#include "LMList.h"
#include "ScoreProducer.h"
#include "ScoreIndexManager.h"
#include "TypeDef.h"
@ -199,8 +201,8 @@ public:
float GetWeightedScore() const;
void ZeroAllLM();
void PlusEqualsAllLM(const ScoreComponentCollection& rhs);
void ZeroAllLM(const LMList& lmList);
void PlusEqualsAllLM(const LMList& lmList, const ScoreComponentCollection& rhs);
};

View File

@ -22,13 +22,6 @@ void ScoreIndexManager::AddScoreProducer(const ScoreProducer* sp)
assert(m_begins.size() == (sp->GetScoreBookkeepingID()));
m_producers.push_back(sp);
if (sp->IsStateless()) {
const StatelessFeatureFunction* ff = static_cast<const StatelessFeatureFunction*>(sp);
if (!ff->ComputeValueInTranslationOption())
m_stateless.push_back(ff);
} else {
m_stateful.push_back(static_cast<const StatefulFeatureFunction*>(sp));
}
m_begins.push_back(m_last);
size_t numScoreCompsProduced = sp->GetNumScoreComponents();
@ -142,7 +135,7 @@ std::ostream& operator<<(std::ostream& os, const ScoreIndexManager& sim)
for (size_t i = 0; i < sim.m_featureNames.size(); ++i) {
os << sim.m_featureNames[i] << endl;
}
os << "Stateless: " << sim.m_stateless.size() << "\tStateful: " << sim.m_stateful.size() << endl;
os << endl;
return os;
}

View File

@ -48,17 +48,12 @@ public:
void SerializeFeatureNamesToPB(hgmert::Hypergraph* hg) const;
#endif
void InitWeightVectorFromFile(const std::string& fnam, std::vector<float>* m_allWeights) const;
const std::vector<const ScoreProducer*>& GetFeatureFunctions() const { return m_producers; }
const std::vector<const StatefulFeatureFunction*>& GetStatefulFeatureFunctions() const { return m_stateful; }
const std::vector<const StatelessFeatureFunction*>& GetStatelessFeatureFunctions() const { return m_stateless; }
private:
ScoreIndexManager(const ScoreIndexManager&); // don't implement
std::vector<size_t> m_begins;
std::vector<size_t> m_ends;
std::vector<const ScoreProducer*> m_producers; /**< all the score producers in this run */
std::vector<const StatefulFeatureFunction*> m_stateful; /**< all the score producers in this run */
std::vector<const StatelessFeatureFunction*> m_stateless; /**< all the score producers in this run */
std::vector<std::string> m_featureNames;
std::vector<std::string> m_featureShortNames;
size_t m_last;

View File

@ -199,11 +199,11 @@ void Sentence::InitStartEndWord()
}
TranslationOptionCollection*
Sentence::CreateTranslationOptionCollection() const
Sentence::CreateTranslationOptionCollection(const TranslationSystem* system) const
{
size_t maxNoTransOptPerCoverage = StaticData::Instance().GetMaxNoTransOptPerCoverage();
float transOptThreshold = StaticData::Instance().GetTranslationOptionThreshold();
TranslationOptionCollection *rv= new TranslationOptionCollectionText(*this, maxNoTransOptPerCoverage, transOptThreshold);
TranslationOptionCollection *rv= new TranslationOptionCollectionText(system, *this, maxNoTransOptPerCoverage, transOptThreshold);
assert(rv);
return rv;
}

View File

@ -92,7 +92,7 @@ class Sentence : public Phrase, public InputType
int Read(std::istream& in,const std::vector<FactorType>& factorOrder);
void Print(std::ostream& out) const;
TranslationOptionCollection* CreateTranslationOptionCollection() const;
TranslationOptionCollection* CreateTranslationOptionCollection(const TranslationSystem* system) const;
void CreateFromString(const std::vector<FactorType> &factorOrder
, const std::string &phraseString

View File

@ -34,6 +34,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "LanguageModelSingleFactor.h"
#include "LanguageModelMultiFactor.h"
#include "LanguageModelFactory.h"
#include "LanguageModelDelegate.h"
#include "LexicalReordering.h"
#include "GlobalLexicalModel.h"
#include "SentenceStats.h"
@ -66,17 +67,15 @@ static size_t CalcMax(size_t x, const vector<size_t>& y, const vector<size_t>& z
StaticData StaticData::s_instance;
StaticData::StaticData()
:m_fLMsLoaded(false)
:m_numLinkParams(1)
,m_fLMsLoaded(false)
,m_sourceStartPosMattersForRecombination(false)
,m_inputType(SentenceInput)
,m_numInputScores(0)
,m_distortionScoreProducer(0)
,m_wpProducer(0)
,m_detailedTranslationReportingFilePath()
,m_onlyDistinctNBest(false)
,m_factorDelimiter("|") // default delimiter between factors
,m_isAlwaysCreateDirectTranslationOption(false)
,m_sourceStartPosMattersForRecombination(false)
,m_numLinkParams(1)
{
m_maxFactorIdx[0] = 0; // source side
m_maxFactorIdx[1] = 0; // target side
@ -121,7 +120,9 @@ bool StaticData::LoadData(Parameter *parameter)
m_recoverPath = false;
}
}
// factor delimiter
if (m_parameter->GetParam("factor-delimiter").size() > 0) {
@ -284,14 +285,17 @@ bool StaticData::LoadData(Parameter *parameter)
}
}
// score weights
m_weightWordPenalty = Scan<float>( m_parameter->GetParam("weight-w")[0] );
m_wpProducer = new WordPenaltyProducer(m_scoreIndexManager);
m_allWeights.push_back(m_weightWordPenalty);
// word penalties
for (size_t i = 0; i < m_parameter->GetParam("weight-w").size(); ++i) {
float weightWordPenalty = Scan<float>( m_parameter->GetParam("weight-w")[i] );
m_wordPenaltyProducers.push_back(new WordPenaltyProducer(m_scoreIndexManager));
m_allWeights.push_back(weightWordPenalty);
}
m_weightUnknownWord = (m_parameter->GetParam("weight-u").size() > 0) ? Scan<float>(m_parameter->GetParam("weight-u")[0]) : 1;
float weightUnknownWord = (m_parameter->GetParam("weight-u").size() > 0) ? Scan<float>(m_parameter->GetParam("weight-u")[0]) : 1;
m_unknownWordPenaltyProducer = new UnknownWordPenaltyProducer(m_scoreIndexManager);
m_allWeights.push_back(m_weightUnknownWord);
m_allWeights.push_back(weightUnknownWord);
// reordering constraints
m_maxDistortion = (m_parameter->GetParam("distortion-limit").size() > 0) ?
@ -381,7 +385,7 @@ bool StaticData::LoadData(Parameter *parameter)
m_timeout_threshold = (m_parameter->GetParam("time-out").size() > 0) ?
Scan<size_t>(m_parameter->GetParam("time-out")[0]) : -1;
m_timeout = (GetTimeoutThreshold() == -1) ? false : true;
m_timeout = (GetTimeoutThreshold() == (size_t)-1) ? false : true;
m_lmcache_cleanup_threshold = (m_parameter->GetParam("clean-lm-cache").size() > 0) ?
@ -439,18 +443,94 @@ bool StaticData::LoadData(Parameter *parameter)
if (!LoadGenerationTables()) return false;
if (!LoadPhraseTables()) return false;
if (!LoadGlobalLexicalModel()) return false;
if (!LoadDecodeGraphs()) return false;
//configure the translation systems with these tables
vector<string> tsConfig = m_parameter->GetParam("translation-systems");
if (!tsConfig.size()) {
//use all models in default system.
tsConfig.push_back(TranslationSystem::DEFAULT + " D * L * R * G *");
}
if (m_wordPenaltyProducers.size() != tsConfig.size()) {
UserMessage::Add(string("Mismatch between number of word penalties and number of translation systems"));
return false;
}
if (m_searchAlgorithm == ChartDecoding) {
//insert some null distortion score producers
m_distortionScoreProducers.assign(tsConfig.size(), NULL);
} else {
if (m_distortionScoreProducers.size() != tsConfig.size()) {
UserMessage::Add(string("Mismatch between number of distortion scores and number of translation systems"));
return false;
}
}
for (size_t i = 0; i < tsConfig.size(); ++i) {
vector<string> config = Tokenize(tsConfig[i]);
if (config.size() % 2 != 1) {
UserMessage::Add(string("Incorrect number of fields in Translation System config. Should be an odd number"));
}
m_translationSystems.insert(pair<string, TranslationSystem>(config[0],
TranslationSystem(config[0],m_wordPenaltyProducers[i],m_unknownWordPenaltyProducer,m_distortionScoreProducers[i])));
for (size_t j = 1; j < config.size(); j += 2) {
const string& id = config[j];
const string& tables = config[j+1];
set<size_t> tableIds;
if (tables != "*") {
//selected tables
vector<string> tableIdStrings = Tokenize(tables,",");
vector<size_t> tableIdList;
Scan<size_t>(tableIdList, tableIdStrings);
copy(tableIdList.begin(), tableIdList.end(), inserter(tableIds,tableIds.end()));
}
if (id == "D") {
for (size_t k = 0; k < m_decodeGraphs.size(); ++k) {
if (!tableIds.size() || tableIds.find(k) != tableIds.end()) {
VERBOSE(2,"Adding decoder graph " << k << " to translation system " << config[0] << endl);
m_translationSystems.find(config[0])->second.AddDecodeGraph(m_decodeGraphs[k]);
}
}
} else if (id == "R") {
for (size_t k = 0; k < m_reorderModels.size(); ++k) {
if (!tableIds.size() || tableIds.find(k) != tableIds.end()) {
m_translationSystems.find(config[0])->second.AddReorderModel(m_reorderModels[k]);
VERBOSE(2,"Adding reorder table " << k << " to translation system " << config[0] << endl);
}
}
} else if (id == "G") {
for (size_t k = 0; k < m_globalLexicalModels.size(); ++k) {
if (!tableIds.size() || tableIds.find(k) != tableIds.end()) {
m_translationSystems.find(config[0])->second.AddGlobalLexicalModel(m_globalLexicalModels[k]);
VERBOSE(2,"Adding global lexical model " << k << " to translation system " << config[0] << endl);
}
}
} else if (id == "L") {
size_t lmid = 0;
for (LMList::const_iterator k = m_languageModel.begin(); k != m_languageModel.end(); ++k, ++lmid) {
if (!tableIds.size() || tableIds.find(lmid) != tableIds.end()) {
m_translationSystems.find(config[0])->second.AddLanguageModel(*k);
VERBOSE(2,"Adding language model " << lmid << " to translation system " << config[0] << endl);
}
}
} else {
UserMessage::Add(string("Incorrect translation system identifier: ") + id);
return false;
}
}
//Instigate dictionary loading
m_translationSystems.find(config[0])->second.ConfigDictionaries();
//Add any other features here.
}
m_scoreIndexManager.InitFeatureNames();
if (m_parameter->GetParam("weight-file").size() > 0) {
UserMessage::Add("ERROR: weight-file option is broken\n");
abort();
// if (m_parameter->GetParam("weight-file").size() != 1) {
// UserMessage::Add(string("ERROR: weight-file takes a single parameter"));
// return false;
// }
// string fnam = m_parameter->GetParam("weight-file")[0];
// m_scoreIndexManager.InitWeightVectorFromFile(fnam, &m_allWeights);
}
return true;
}
@ -483,6 +563,10 @@ StaticData::~StaticData()
RemoveAllInColl(m_generationDictionary);
RemoveAllInColl(m_reorderModels);
RemoveAllInColl(m_globalLexicalModels);
RemoveAllInColl(m_decodeGraphs);
RemoveAllInColl(m_wordPenaltyProducers);
RemoveAllInColl(m_distortionScoreProducers);
m_languageModel.CleanUp();
// delete trans opt
map<std::pair<size_t, Phrase>, std::pair< TranslationOptionList*, clock_t > >::iterator iterCache;
@ -493,8 +577,6 @@ StaticData::~StaticData()
}
// small score producers
delete m_distortionScoreProducer;
delete m_wpProducer;
delete m_unknownWordPenaltyProducer;
//delete m_parameter;
@ -508,10 +590,19 @@ bool StaticData::LoadLexicalReorderingModel()
{
VERBOSE(1, "Loading lexical distortion models...");
const vector<string> fileStr = m_parameter->GetParam("distortion-file");
const vector<string> weightsStr = m_parameter->GetParam("weight-d");
bool hasWeightlr = (m_parameter->GetParam("weight-lr").size() != 0);
vector<string> weightsStr;
if (hasWeightlr) {
weightsStr = m_parameter->GetParam("weight-lr");
} else {
weightsStr = m_parameter->GetParam("weight-d");
}
std::vector<float> weights;
size_t w = 1; //cur weight
if (hasWeightlr) {
w = 0; // if reading from weight-lr, don't have to count first as distortion penalty
}
size_t f = 0; //cur file
//get weights values
VERBOSE(1, "have " << fileStr.size() << " models" << std::endl);
@ -623,48 +714,56 @@ bool StaticData::LoadLanguageModels()
// initialize n-gram order for each factor. populated only by factored lm
const vector<string> &lmVector = m_parameter->GetParam("lmodel-file");
//prevent language models from being loaded twice
map<string,LanguageModel*> languageModelsLoaded;
for(size_t i=0; i<lmVector.size(); i++)
{
vector<string> token = Tokenize(lmVector[i]);
if (token.size() != 4 && token.size() != 5 )
{
UserMessage::Add("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath [mapFilePath (only for IRSTLM)]'");
return false;
}
// type = implementation, SRI, IRST etc
LMImplementation lmImplementation = static_cast<LMImplementation>(Scan<int>(token[0]));
// factorType = 0 = Surface, 1 = POS, 2 = Stem, 3 = Morphology, etc
vector<FactorType> factorTypes = Tokenize<FactorType>(token[1], ",");
// nGramOrder = 2 = bigram, 3 = trigram, etc
size_t nGramOrder = Scan<int>(token[2]);
string &languageModelFile = token[3];
if (token.size() == 5){
if (lmImplementation==IRST)
languageModelFile += " " + token[4];
else {
UserMessage::Add("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath [mapFilePath (only for IRSTLM)]'");
return false;
}
}
IFVERBOSE(1)
PrintUserTime(string("Start loading LanguageModel ") + languageModelFile);
LanguageModel *lm = LanguageModelFactory::CreateLanguageModel(
lmImplementation
, factorTypes
, nGramOrder
, languageModelFile
, weightAll[i]
, m_scoreIndexManager
, LMdub[i]);
if (lm == NULL)
{
UserMessage::Add("no LM created. We probably don't have it compiled");
return false;
LanguageModel* lm = NULL;
if (languageModelsLoaded.find(lmVector[i]) != languageModelsLoaded.end()) {
lm = new LanguageModelDelegate(true, m_scoreIndexManager,
static_cast<LanguageModelSingleFactor*>(languageModelsLoaded[lmVector[i]]));
} else {
vector<string> token = Tokenize(lmVector[i]);
if (token.size() != 4 && token.size() != 5 )
{
UserMessage::Add("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath [mapFilePath (only for IRSTLM)]'");
return false;
}
// type = implementation, SRI, IRST etc
LMImplementation lmImplementation = static_cast<LMImplementation>(Scan<int>(token[0]));
// factorType = 0 = Surface, 1 = POS, 2 = Stem, 3 = Morphology, etc
vector<FactorType> factorTypes = Tokenize<FactorType>(token[1], ",");
// nGramOrder = 2 = bigram, 3 = trigram, etc
size_t nGramOrder = Scan<int>(token[2]);
string &languageModelFile = token[3];
if (token.size() == 5){
if (lmImplementation==IRST)
languageModelFile += " " + token[4];
else {
UserMessage::Add("Expected format 'LM-TYPE FACTOR-TYPE NGRAM-ORDER filePath [mapFilePath (only for IRSTLM)]'");
return false;
}
}
IFVERBOSE(1)
PrintUserTime(string("Start loading LanguageModel ") + languageModelFile);
lm = LanguageModelFactory::CreateLanguageModel(
lmImplementation
, factorTypes
, nGramOrder
, languageModelFile
, m_scoreIndexManager
, LMdub[i]);
if (lm == NULL)
{
UserMessage::Add("no LM created. We probably don't have it compiled");
return false;
}
languageModelsLoaded[lmVector[i]] = lm;
}
m_languageModel.Add(lm);
@ -714,12 +813,9 @@ bool StaticData::LoadGenerationTables()
VERBOSE(1, filePath << endl);
m_generationDictionary.push_back(new GenerationDictionary(numFeatures, m_scoreIndexManager));
m_generationDictionary.push_back(new GenerationDictionary(numFeatures, m_scoreIndexManager, input,output));
assert(m_generationDictionary.back() && "could not create GenerationDictionary");
if (!m_generationDictionary.back()->Load(input
, output
, filePath
, Output))
if (!m_generationDictionary.back()->Load(filePath, Output))
{
delete m_generationDictionary.back();
return false;
@ -737,9 +833,10 @@ bool StaticData::LoadGenerationTables()
return true;
}
/* Doesn't load phrase tables any more. Just creates the features. */
bool StaticData::LoadPhraseTables()
{
VERBOSE(2,"About to LoadPhraseTables" << endl);
VERBOSE(2,"Creating phrase table features" << endl);
// language models must be loaded prior to loading phrase tables
assert(m_fLMsLoaded);
@ -776,7 +873,7 @@ bool StaticData::LoadPhraseTables()
oldFileFormat = true;
}
if(!oldFileFormat && token.size() < 5 || oldFileFormat && token.size() != 4)
if((!oldFileFormat && token.size() < 5) || (oldFileFormat && token.size() != 4))
{
UserMessage::Add("invalid phrase table specification");
return false;
@ -869,6 +966,8 @@ bool StaticData::LoadPhraseTables()
std::copy(weight.begin(),weight.end(),std::back_inserter(m_allWeights));
//This is needed for regression testing, but the phrase table
//might not really be loading here
IFVERBOSE(1)
PrintUserTime(string("Start loading PhraseTable ") + filePath);
VERBOSE(1,"filePath: " << filePath <<endl);
@ -968,17 +1067,20 @@ void StaticData::LoadChartDecodingParameters()
void StaticData::LoadPhraseBasedParameters()
{
const vector<string> distortionWeights = m_parameter->GetParam("weight-d");
m_weightDistortion = Scan<float>(distortionWeights[0]);
m_distortionScoreProducer = new DistortionScoreProducer(m_scoreIndexManager);
m_allWeights.push_back(m_weightDistortion);
size_t distortionWeightCount = distortionWeights.size();
//if there's a lex-reordering model, and no separate weight set, then
//take just one of these weights for linear distortion
if (!m_parameter->GetParam("weight-lr").size() && m_parameter->GetParam("distortion-file").size()) {
distortionWeightCount = 1;
}
for (size_t i = 0; i < distortionWeightCount; ++i) {
float weightDistortion = Scan<float>(distortionWeights[i]);
m_distortionScoreProducers.push_back(new DistortionScoreProducer(m_scoreIndexManager));
m_allWeights.push_back(weightDistortion);
}
}
vector<DecodeGraph*> StaticData::GetDecodeStepVL(const InputType& source) const
{
vector<DecodeGraph*> decodeGraphs;
// mapping
bool StaticData::LoadDecodeGraphs() {
const vector<string> &mappingVector = m_parameter->GetParam("mapping");
const vector<size_t> &maxChartSpans = Scan<size_t>(m_parameter->GetParam("max-chart-span"));
@ -1025,7 +1127,7 @@ vector<DecodeGraph*> StaticData::GetDecodeStepVL(const InputType& source) const
UserMessage::Add(strme.str());
assert(false);
}
decodeStep = new DecodeStepTranslation(m_phraseDictionary[index]->GetDictionary(source), prev);
decodeStep = new DecodeStepTranslation(m_phraseDictionary[index], prev);
break;
case Generate:
if(index>=m_generationDictionary.size())
@ -1044,72 +1146,30 @@ vector<DecodeGraph*> StaticData::GetDecodeStepVL(const InputType& source) const
}
assert(decodeStep);
if (decodeGraphs.size() < decodeGraphInd + 1)
if (m_decodeGraphs.size() < decodeGraphInd + 1)
{
DecodeGraph *decodeGraph;
if (m_searchAlgorithm == ChartDecoding)
{
size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
decodeGraph = new DecodeGraph(decodeGraphs.size(), maxChartSpan);
decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
}
else
{
decodeGraph = new DecodeGraph(decodeGraphs.size());
decodeGraph = new DecodeGraph(m_decodeGraphs.size());
}
decodeGraphs.push_back(decodeGraph); // TODO max chart span
m_decodeGraphs.push_back(decodeGraph); // TODO max chart span
}
decodeGraphs[decodeGraphInd]->Add(decodeStep);
m_decodeGraphs[decodeGraphInd]->Add(decodeStep);
prev = decodeStep;
prevDecodeGraphInd = decodeGraphInd;
}
return decodeGraphs;
return true;
}
void StaticData::CleanUpAfterSentenceProcessing() const
{
for(size_t i=0;i<m_phraseDictionary.size();++i)
{
PhraseDictionaryFeature &phraseDictionaryFeature = *m_phraseDictionary[i];
PhraseDictionary &phraseDictionary = *phraseDictionaryFeature.GetDictionary();
phraseDictionary.CleanUp();
}
for(size_t i=0;i<m_generationDictionary.size();++i)
m_generationDictionary[i]->CleanUp();
//something LMs could do after each sentence
LMList::const_iterator iterLM;
for (iterLM = m_languageModel.begin() ; iterLM != m_languageModel.end() ; ++iterLM)
{
LanguageModel &languageModel = **iterLM;
languageModel.CleanUpAfterSentenceProcessing();
}
}
/** initialize the translation and language models for this sentence
(includes loading of translation table entries on demand, if
binary format is used) */
void StaticData::InitializeBeforeSentenceProcessing(InputType const& in) const
{
for(size_t i=0;i<m_reorderModels.size();++i) {
m_reorderModels[i]->InitializeForInput(in);
}
for(size_t i=0;i<m_globalLexicalModels.size();++i) {
m_globalLexicalModels[i]->InitializeForInput((Sentence const&)in);
}
//something LMs could do before translating a sentence
LMList::const_iterator iterLM;
for (iterLM = m_languageModel.begin() ; iterLM != m_languageModel.end() ; ++iterLM)
{
LanguageModel &languageModel = **iterLM;
languageModel.InitializeBeforeSentenceProcessing();
}
}
void StaticData::SetWeightsForScoreProducer(const ScoreProducer* sp, const std::vector<float>& weights)
{

View File

@ -22,6 +22,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#ifndef moses_StaticData_h
#define moses_StaticData_h
#include <stdexcept>
#include <limits>
#include <list>
#include <vector>
@ -42,6 +43,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "SentenceStats.h"
#include "DecodeGraph.h"
#include "TranslationOptionList.h"
#include "TranslationSystem.h"
#if HAVE_CONFIG_H
#include "config.h"
@ -57,9 +59,9 @@ class GlobalLexicalModel;
class PhraseDictionaryFeature;
class GenerationDictionary;
class DistortionScoreProducer;
class WordPenaltyProducer;
class DecodeStep;
class UnknownWordPenaltyProducer;
class TranslationSystem;
typedef std::pair<std::string, float> UnknownLHSEntry;
typedef std::vector<UnknownLHSEntry> UnknownLHSList;
@ -81,16 +83,15 @@ protected:
std::vector<float> m_allWeights;
std::vector<LexicalReordering*> m_reorderModels;
std::vector<GlobalLexicalModel*> m_globalLexicalModels;
std::vector<DecodeGraph*> m_decodeGraphs;
// Initial = 0 = can be used when creating poss trans
// Other = 1 = used to calculate LM score once all steps have been processed
std::map<std::string, TranslationSystem> m_translationSystems;
float
m_beamWidth,
m_earlyDiscardingThreshold,
m_translationOptionThreshold,
m_weightDistortion,
m_weightWordPenalty,
m_wordDeletionWeight,
m_weightUnknownWord;
m_wordDeletionWeight;
// PhraseTrans, Generation & LanguageModelScore has multiple weights.
int m_maxDistortion;
@ -132,8 +133,8 @@ protected:
size_t m_numInputScores;
mutable size_t m_verboseLevel;
DistortionScoreProducer *m_distortionScoreProducer;
WordPenaltyProducer *m_wpProducer;
std::vector<WordPenaltyProducer*> m_wordPenaltyProducers;
std::vector<DistortionScoreProducer *> m_distortionScoreProducers;
UnknownWordPenaltyProducer *m_unknownWordPenaltyProducer;
bool m_reportSegmentation;
bool m_reportAllFactors;
@ -218,6 +219,7 @@ protected:
//! load all generation tables as specified in ini file
bool LoadGenerationTables();
//! load decoding steps
bool LoadDecodeGraphs();
bool LoadLexicalReorderingModel();
bool LoadGlobalLexicalModel();
void ReduceTransOptCache() const;
@ -267,7 +269,6 @@ public:
return m_outputFactorOrder;
}
std::vector<DecodeGraph*> GetDecodeStepVL(const InputType& source) const;
inline bool GetSourceStartPosMattersForRecombination() const
{
@ -306,22 +307,6 @@ public:
{
return m_maxPhraseLength;
}
const std::vector<LexicalReordering*> &GetReorderModels() const
{
return m_reorderModels;
}
float GetWeightDistortion() const
{
return m_weightDistortion;
}
float GetWeightWordPenalty() const
{
return m_weightWordPenalty;
}
float GetWeightUnknownWord() const
{
return m_weightUnknownWord;
}
bool IsWordDeletionEnabled() const
{
return m_wordDeletionEnabled;
@ -380,30 +365,17 @@ public:
return m_scoreIndexManager;
}
size_t GetLMSize() const
{
return m_languageModel.size();
}
const LMList &GetAllLM() const
{
return m_languageModel;
}
size_t GetPhraseDictionarySize() const
{
return m_phraseDictionary.size();
}
const std::vector<PhraseDictionaryFeature*> &GetPhraseDictionaries() const
{
return m_phraseDictionary;
}
const std::vector<GenerationDictionary*> &GetGenerationDictionaries() const
{
return m_generationDictionary;
}
size_t GetGenerationDictionarySize() const
{
return m_generationDictionary.size();
}
const TranslationSystem& GetTranslationSystem(std::string id) const {
std::map<std::string, TranslationSystem>::const_iterator iter =
m_translationSystems.find(id);
VERBOSE(2, "Looking for translation system id " << id << std::endl);
if (iter == m_translationSystems.end()) {
VERBOSE(1, "Translation system not found " << id << std::endl);
throw std::runtime_error("Unknown translation system id");
} else {
return iter->second;
}
}
size_t GetVerboseLevel() const
{
return m_verboseLevel;
@ -475,16 +447,11 @@ public:
InputTypeEnum GetInputType() const {return m_inputType;}
SearchAlgorithm GetSearchAlgorithm() const {return m_searchAlgorithm;}
size_t GetNumInputScores() const {return m_numInputScores;}
void InitializeBeforeSentenceProcessing(InputType const&) const;
void CleanUpAfterSentenceProcessing() const;
const std::vector<float>& GetAllWeights() const
{
return m_allWeights;
}
const DistortionScoreProducer *GetDistortionScoreProducer() const { return m_distortionScoreProducer; }
const WordPenaltyProducer *GetWordPenaltyProducer() const { return m_wpProducer; }
const UnknownWordPenaltyProducer *GetUnknownWordPenaltyProducer() const { return m_unknownWordPenaltyProducer; }
bool UseAlignmentInfo() const { return m_UseAlignmentInfo;}
void UseAlignmentInfo(bool a){ m_UseAlignmentInfo=a; };

View File

@ -69,10 +69,10 @@ TargetPhrase::~TargetPhrase()
{
}
void TargetPhrase::SetScore()
void TargetPhrase::SetScore(const TranslationSystem* system)
{ // used when creating translations of unknown words:
m_transScore = m_ngramScore = 0;
m_fullScore = - StaticData::Instance().GetWeightWordPenalty();
m_fullScore = - system->GetWeightWordPenalty();
}
#ifdef HAVE_PROTOBUF
@ -89,7 +89,9 @@ void TargetPhrase::SetScore(float score)
{
//we use an existing score producer to figure out information for score setting (number of scores and weights)
//TODO: is this a good idea?
ScoreProducer* prod = StaticData::Instance().GetPhraseDictionaries()[0];
// Assume the default system.
const TranslationSystem& system = StaticData::Instance().GetTranslationSystem(TranslationSystem::DEFAULT);
const ScoreProducer* prod = system.GetPhraseDictionaries()[0];
//get the weight list
unsigned int id = prod->GetScoreBookkeepingID();
@ -110,17 +112,18 @@ void TargetPhrase::SetScore(float score)
vector <float> scoreVector(numScores,score/numScores);
//Now we have what we need to call the full SetScore method
SetScore(prod,scoreVector,weights,StaticData::Instance().GetWeightWordPenalty(),StaticData::Instance().GetAllLM());
SetScore(prod,scoreVector,weights,system.GetWeightWordPenalty(),system.GetLanguageModels());
}
/**
* used for setting scores for unknown words with input link features (lattice/conf. nets)
* \param scoreVector input scores
*/
void TargetPhrase::SetScore(const Scores &scoreVector)
void TargetPhrase::SetScore(const TranslationSystem* system, const Scores &scoreVector)
{
//we use an existing score producer to figure out information for score setting (number of scores and weights)
ScoreProducer* prod = StaticData::Instance().GetPhraseDictionaries()[0];
const ScoreProducer* prod = system->GetPhraseDictionaries()[0];
//get the weight list
unsigned int id = prod->GetScoreBookkeepingID();
@ -135,7 +138,7 @@ void TargetPhrase::SetScore(const Scores &scoreVector)
Scores sizedScoreVector = scoreVector;
sizedScoreVector.resize(prod->GetNumScoreComponents(),0.0f);
SetScore(prod,sizedScoreVector,weights,StaticData::Instance().GetWeightWordPenalty(),StaticData::Instance().GetAllLM());
SetScore(prod,sizedScoreVector,weights,system->GetWeightWordPenalty(),system->GetLanguageModels());
}
void TargetPhrase::SetScore(const ScoreProducer* translationScoreProducer,
@ -182,9 +185,9 @@ void TargetPhrase::SetScore(const ScoreProducer* translationScoreProducer,
void TargetPhrase::SetScoreChart(const ScoreProducer* translationScoreProducer,
const Scores &scoreVector
,const vector<float> &weightT
,const LMList &languageModels)
,const LMList &languageModels
,const WordPenaltyProducer* wpProducer)
{
const StaticData &staticData = StaticData::Instance();
assert(weightT.size() == scoreVector.size());
@ -220,7 +223,7 @@ void TargetPhrase::SetScoreChart(const ScoreProducer* translationScoreProducer,
// word penalty
size_t wordCount = GetNumTerminals();
m_scoreBreakdown.Assign(staticData.GetWordPenaltyProducer(), - (float) wordCount * 0.434294482); // TODO log -> ln ??
m_scoreBreakdown.Assign(wpProducer, - (float) wordCount * 0.434294482); // TODO log -> ln ??
m_fullScore = m_scoreBreakdown.GetWeightedScore() - totalNgramScore + totalFullScore;
}

View File

@ -42,6 +42,8 @@ class LMList;
class PhraseDictionary;
class GenerationDictionary;
class ScoreProducer;
class TranslationSystem;
class WordPenaltyProducer;
class CountInfo
{
@ -89,13 +91,13 @@ public:
//! used by the unknown word handler- these targets
//! don't have a translation score, so wp is the only thing used
void SetScore();
void SetScore(const TranslationSystem* system);
//!Set score for Sentence XML target options
void SetScore(float score);
//! Set score for unknown words with input weights
void SetScore(const Scores &scoreVector);
void SetScore(const TranslationSystem* system, const Scores &scoreVector);
/*** Called immediately after creation to initialize scores.
@ -118,7 +120,8 @@ public:
void SetScoreChart(const ScoreProducer* translationScoreProducer
,const Scores &scoreVector
,const std::vector<float> &weightT
,const LMList &languageModels);
,const LMList &languageModels
,const WordPenaltyProducer* wpProducer);
// used by for unknown word proc in chart decoding
void SetScore(const ScoreProducer* producer, const Scores &scoreVector);

View File

@ -59,12 +59,11 @@ TranslationOption::TranslationOption(const WordsRange &wordsRange
TranslationOption::TranslationOption(const WordsRange &wordsRange
, const TargetPhrase &targetPhrase
, const InputType &inputType
, int /*whatever*/)
, const UnknownWordPenaltyProducer* up)
: m_targetPhrase(targetPhrase)
, m_sourceWordsRange (wordsRange)
, m_futureScore(0)
{
const UnknownWordPenaltyProducer *up = StaticData::Instance().GetUnknownWordPenaltyProducer();
if (up) {
const ScoreProducer *scoreProducer = (const ScoreProducer *)up; // not sure why none of the c++ cast works
vector<float> score(1);
@ -137,20 +136,21 @@ bool TranslationOption::Overlap(const Hypothesis &hypothesis) const
return bitmap.Overlap(GetSourceWordsRange());
}
void TranslationOption::CalcScore()
void TranslationOption::CalcScore(const TranslationSystem* system)
{
// LM scores
float ngramScore = 0;
float retFullScore = 0;
const LMList &allLM = StaticData::Instance().GetAllLM();
const LMList &allLM = system->GetLanguageModels();
allLM.CalcScore(GetTargetPhrase(), retFullScore, ngramScore, &m_scoreBreakdown);
size_t phraseSize = GetTargetPhrase().GetSize();
// future score
m_futureScore = retFullScore - ngramScore
+ m_scoreBreakdown.InnerProduct(StaticData::Instance().GetAllWeights()) - phraseSize * StaticData::Instance().GetWeightWordPenalty();
+ m_scoreBreakdown.InnerProduct(StaticData::Instance().GetAllWeights()) - phraseSize *
system->GetWeightWordPenalty();
}
TO_STRING_BODY(TranslationOption);

View File

@ -86,7 +86,7 @@ public:
TranslationOption(const WordsRange &wordsRange
, const TargetPhrase &targetPhrase
, const InputType &inputType
, int);
, const UnknownWordPenaltyProducer* uwpProducer);
/** copy constructor */
TranslationOption(const TranslationOption &copy);
@ -186,7 +186,7 @@ public:
}
/** Calculate future score and n-gram score of this trans option, plus the score breakdowns */
void CalcScore();
void CalcScore(const TranslationSystem* system);
void CacheScores(const ScoreProducer &scoreProducer, const Scores &score);

View File

@ -47,8 +47,10 @@ bool CompareTranslationOption(const TranslationOption *a, const TranslationOptio
/** constructor; since translation options are indexed by coverage span, the corresponding data structure is initialized here
* This fn should be called by inherited classes
*/
TranslationOptionCollection::TranslationOptionCollection(InputType const& src, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
: m_source(src)
TranslationOptionCollection::TranslationOptionCollection(const TranslationSystem* system,
InputType const& src, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
: m_system(system),
m_source(src)
,m_futureScore(src.GetSize())
,m_maxNoTransOptPerCoverage(maxNoTransOptPerCoverage)
,m_translationOptionThreshold(translationOptionThreshold)
@ -168,8 +170,9 @@ void TranslationOptionCollection::Prune()
* \param factorCollection input sentence with all factors
*/
void TranslationOptionCollection::ProcessUnknownWord(const std::vector <DecodeGraph*> &decodeStepVL)
void TranslationOptionCollection::ProcessUnknownWord()
{
const vector<DecodeGraph*>& decodeStepVL = m_system->GetDecodeGraphs();
size_t size = m_source.GetSize();
// try to translation for coverage with no trans by expanding table limit
for (size_t startVL = 0 ; startVL < decodeStepVL.size() ; startVL++)
@ -240,9 +243,9 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,s
TargetPhrase targetPhrase(Output);
targetPhrase.SetSourcePhrase(m_unksrc);
if (inputScores != NULL) {
targetPhrase.SetScore(*inputScores);
targetPhrase.SetScore(m_system,*inputScores);
} else {
targetPhrase.SetScore();
targetPhrase.SetScore(m_system);
}
if (!(StaticData::Instance().GetDropUnknown() || isEpsilon) || isDigit)
@ -275,8 +278,9 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,s
//targetPhrase.SetAlignment();
}
transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos + length - 1), targetPhrase, m_source, 0);
transOpt->CalcScore();
transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos + length - 1), targetPhrase, m_source
, m_system->GetUnknownWordPenaltyProducer());
transOpt->CalcScore(m_system);
Add(transOpt);
}
@ -375,12 +379,14 @@ void TranslationOptionCollection::CalcFutureScore()
* \param decodeStepList list of decoding steps
* \param factorCollection input sentence with all factors
*/
void TranslationOptionCollection::CreateTranslationOptions(const vector <DecodeGraph*> &decodeStepVL)
void TranslationOptionCollection::CreateTranslationOptions()
{
// loop over all substrings of the source sentence, look them up
// in the phraseDictionary (which is the- possibly filtered-- phrase
// table loaded on initialization), generate TranslationOption objects
// for all phrases
const vector <DecodeGraph*> &decodeStepVL = m_system->GetDecodeGraphs();
size_t size = m_source.GetSize();
for (size_t startVL = 0 ; startVL < decodeStepVL.size() ; startVL++)
@ -401,7 +407,7 @@ void TranslationOptionCollection::CreateTranslationOptions(const vector <DecodeG
VERBOSE(2,"Translation Option Collection\n " << *this << endl);
ProcessUnknownWord(decodeStepVL);
ProcessUnknownWord();
// Prune
Prune();
@ -483,7 +489,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
const DecodeStep &decodeStep = **iterStep;
static_cast<const DecodeStepTranslation&>(decodeStep).ProcessInitialTranslation
(m_source, *oldPtoc
(m_system, m_source, *oldPtoc
, startPos, endPos, adhereTableLimit );
// do rest of decode steps
@ -499,7 +505,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
for (iterPartialTranslOpt = partTransOptList.begin() ; iterPartialTranslOpt != partTransOptList.end() ; ++iterPartialTranslOpt)
{
TranslationOption &inputPartialTranslOpt = **iterPartialTranslOpt;
decodeStep.Process(inputPartialTranslOpt
decodeStep.Process(m_system, inputPartialTranslOpt
, decodeStep
, *newPtoc
, this
@ -519,7 +525,7 @@ void TranslationOptionCollection::CreateTranslationOptionsForRange(
for (iterColl = partTransOptList.begin() ; iterColl != partTransOptList.end() ; ++iterColl)
{
TranslationOption *transOpt = *iterColl;
transOpt->CalcScore();
transOpt->CalcScore(m_system);
Add(transOpt);
}
@ -615,8 +621,7 @@ std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& c
void TranslationOptionCollection::CacheLexReordering()
{
const std::vector<LexicalReordering*> &lexReorderingModels = StaticData::Instance().GetReorderModels();
const vector<LexicalReordering*> &lexReorderingModels = m_system->GetReorderModels();
std::vector<LexicalReordering*>::const_iterator iterLexreordering;
size_t size = m_source.GetSize();

View File

@ -64,21 +64,24 @@ class TranslationOptionCollection
friend std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& coll);
TranslationOptionCollection(const TranslationOptionCollection&); /*< no copy constructor */
protected:
const TranslationSystem* m_system;
std::vector< std::vector< TranslationOptionList > > m_collection; /*< contains translation options */
InputType const &m_source; /*< reference to the input */
SquareMatrix m_futureScore; /*< matrix of future costs for contiguous parts (span) of the input */
const size_t m_maxNoTransOptPerCoverage; /*< maximum number of translation options per input span */
const float m_translationOptionThreshold; /*< threshold for translation options with regard to best option for input span */
std::vector<Phrase*> m_unksrcs;
TranslationOptionCollection(InputType const& src, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
TranslationOptionCollection(const TranslationSystem* system, InputType const& src, size_t maxNoTransOptPerCoverage,
float translationOptionThreshold);
void CalcFutureScore();
//! Force a creation of a translation option where there are none for a particular source position.
void ProcessUnknownWord(const std::vector <DecodeGraph*> &decodeStepVL);
void ProcessUnknownWord();
//! special handling of ONE unknown words.
virtual void ProcessOneUnknownWord(const Word &sourceWord, size_t sourcePos, size_t length = 1, const Scores *inputScores = NULL);
virtual void ProcessOneUnknownWord(const Word &sourceWord, size_t sourcePos, size_t length = 1, const Scores *inputScores = NULL);
//! pruning: only keep the top n (m_maxNoTransOptPerCoverage) elements */
void Prune();
@ -91,8 +94,8 @@ protected:
void Add(TranslationOption *translationOption);
//! implemented by inherited class, called by this class
virtual void ProcessUnknownWord(size_t sourcePos)=0;
void CacheLexReordering();
virtual void ProcessUnknownWord(size_t sourcePos)=0;
void CacheLexReordering();
public:
virtual ~TranslationOptionCollection();
@ -104,7 +107,7 @@ public:
size_t GetSize() const { return m_source.GetSize(); };
//! Create all possible translations from the phrase tables
virtual void CreateTranslationOptions(const std::vector <DecodeGraph*> &decodeStepVL);
virtual void CreateTranslationOptions();
//! Create translation options that exactly cover a specific input span.
virtual void CreateTranslationOptionsForRange(const DecodeGraph &decodeStepList
, size_t startPosition

View File

@ -10,18 +10,18 @@
namespace Moses
{
/** constructor; just initialize the base class */
TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet(
TranslationOptionCollectionConfusionNet::TranslationOptionCollectionConfusionNet(const TranslationSystem* system,
const ConfusionNet &input
, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
: TranslationOptionCollection(input, maxNoTransOptPerCoverage, translationOptionThreshold) {}
: TranslationOptionCollection(system, input, maxNoTransOptPerCoverage, translationOptionThreshold) {}
/* forcibly create translation option for a particular source word.
* call the base class' ProcessOneUnknownWord() for each possible word in the confusion network
* at a particular source position
*/
void TranslationOptionCollectionConfusionNet::ProcessUnknownWord(
size_t sourcePos)
void TranslationOptionCollectionConfusionNet::ProcessUnknownWord(size_t sourcePos)
{
ConfusionNet const& source=dynamic_cast<ConfusionNet const&>(m_source);

View File

@ -8,12 +8,13 @@ namespace Moses
{
class ConfusionNet;
class TranslationSystem;
class TranslationOptionCollectionConfusionNet : public TranslationOptionCollection {
public:
TranslationOptionCollectionConfusionNet(const ConfusionNet &source, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
TranslationOptionCollectionConfusionNet(const TranslationSystem* system, const ConfusionNet &source, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
void ProcessUnknownWord( size_t sourcePos);
void ProcessUnknownWord(size_t sourcePos);
};

View File

@ -33,8 +33,8 @@ using namespace std;
namespace Moses
{
/** constructor; just initialize the base class */
TranslationOptionCollectionText::TranslationOptionCollectionText(Sentence const &inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
: TranslationOptionCollection(inputSentence, maxNoTransOptPerCoverage, translationOptionThreshold) {}
TranslationOptionCollectionText::TranslationOptionCollectionText(const TranslationSystem* system, Sentence const &inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold)
: TranslationOptionCollection(system, inputSentence, maxNoTransOptPerCoverage, translationOptionThreshold) {}
/* forcibly create translation option for a particular source word.
* For text, this function is easy, just call the base class' ProcessOneUnknownWord()
@ -65,7 +65,7 @@ void TranslationOptionCollectionText::CreateXmlOptionsForRange(size_t startPosit
//get vector of TranslationOptions from Sentence
for(size_t i=0;i<xmlOptions.size();i++) {
xmlOptions[i]->CalcScore();
xmlOptions[i]->CalcScore(m_system);
Add(xmlOptions[i]);
}

View File

@ -32,9 +32,9 @@ class LMList;
class TranslationOptionCollectionText : public TranslationOptionCollection {
public:
void ProcessUnknownWord( size_t sourcePos);
void ProcessUnknownWord(size_t sourcePos);
TranslationOptionCollectionText(Sentence const& inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
TranslationOptionCollectionText(const TranslationSystem* system, Sentence const& inputSentence, size_t maxNoTransOptPerCoverage, float translationOptionThreshold);
bool HasXmlOptionsOverlappingRange(size_t startPosition, size_t endPosition) const;

View File

@ -0,0 +1,172 @@
// $Id: $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <stdexcept>
#include <iostream>
#include "DecodeGraph.h"
#include "DecodeStep.h"
#include "DummyScoreProducers.h"
#include "GlobalLexicalModel.h"
#include "LexicalReordering.h"
#include "StaticData.h"
#include "TranslationSystem.h"
#include "Util.h"
using namespace std;
namespace Moses {
const string TranslationSystem::DEFAULT = "default";
TranslationSystem::TranslationSystem(const std::string& id,
const WordPenaltyProducer* wpProducer,
const UnknownWordPenaltyProducer* uwpProducer,
const DistortionScoreProducer* distortionProducer)
: m_id(id), m_wpProducer(wpProducer), m_unknownWpProducer(uwpProducer), m_distortionScoreProducer(distortionProducer)
{
AddFeatureFunction(wpProducer);
AddFeatureFunction(uwpProducer);
if (distortionProducer) {
AddFeatureFunction(distortionProducer);
}
}
//Insert core 'big' features
void TranslationSystem::AddLanguageModel(LanguageModel* languageModel) {
m_languageModels.Add(languageModel);
AddFeatureFunction(languageModel);
}
void TranslationSystem::AddDecodeGraph(DecodeGraph* decodeGraph) {
m_decodeGraphs.push_back(decodeGraph);
}
void TranslationSystem::AddReorderModel(LexicalReordering* reorderModel) {
m_reorderingTables.push_back(reorderModel);
AddFeatureFunction(reorderModel);
}
void TranslationSystem::AddGlobalLexicalModel(GlobalLexicalModel* globalLexicalModel) {
m_globalLexicalModels.push_back(globalLexicalModel);
AddFeatureFunction(globalLexicalModel);
}
void TranslationSystem::AddFeatureFunction(const FeatureFunction* ff) {
if (ff->IsStateless()) {
const StatelessFeatureFunction* statelessFF = static_cast<const StatelessFeatureFunction*>(ff);
if (!statelessFF->ComputeValueInTranslationOption()) {
m_statelessFFs.push_back(statelessFF);
}
} else {
m_statefulFFs.push_back(static_cast<const StatefulFeatureFunction*>(ff));
}
}
void TranslationSystem::ConfigDictionaries() {
for (vector<DecodeGraph*>::const_iterator i = m_decodeGraphs.begin();
i != m_decodeGraphs.end(); ++i) {
for (DecodeGraph::const_iterator j = (*i)->begin(); j != (*i)->end(); ++j) {
const DecodeStep* step = *j;
PhraseDictionaryFeature* pdict = const_cast<PhraseDictionaryFeature*>(step->GetPhraseDictionaryFeature());
if (pdict) {
m_phraseDictionaries.push_back(pdict);
AddFeatureFunction(pdict);
const_cast<PhraseDictionaryFeature*>(pdict)->InitDictionary(this);
}
GenerationDictionary* gdict = const_cast<GenerationDictionary*>(step->GetGenerationDictionaryFeature());
if (gdict) {
m_generationDictionaries.push_back(gdict);
AddFeatureFunction(gdict);
}
}
}
}
void TranslationSystem::InitializeBeforeSentenceProcessing(const InputType& source) const {
for (vector<PhraseDictionaryFeature*>::const_iterator i = m_phraseDictionaries.begin();
i != m_phraseDictionaries.end(); ++i) {
(*i)->InitDictionary(this,source);
}
for(size_t i=0;i<m_reorderingTables.size();++i) {
m_reorderingTables[i]->InitializeForInput(source);
}
for(size_t i=0;i<m_globalLexicalModels.size();++i) {
m_globalLexicalModels[i]->InitializeForInput((Sentence const&)source);
}
LMList::const_iterator iterLM;
for (iterLM = m_languageModels.begin() ; iterLM != m_languageModels.end() ; ++iterLM)
{
LanguageModel &languageModel = **iterLM;
languageModel.InitializeBeforeSentenceProcessing();
}
}
void TranslationSystem::CleanUpAfterSentenceProcessing() const {
for(size_t i=0;i<m_phraseDictionaries.size();++i)
{
PhraseDictionaryFeature &phraseDictionaryFeature = *m_phraseDictionaries[i];
PhraseDictionary* phraseDictionary = const_cast<PhraseDictionary*>(phraseDictionaryFeature.GetDictionary());
phraseDictionary->CleanUp();
}
for(size_t i=0;i<m_generationDictionaries.size();++i)
m_generationDictionaries[i]->CleanUp();
//something LMs could do after each sentence
LMList::const_iterator iterLM;
for (iterLM = m_languageModels.begin() ; iterLM != m_languageModels.end() ; ++iterLM)
{
LanguageModel &languageModel = **iterLM;
languageModel.CleanUpAfterSentenceProcessing();
}
}
float TranslationSystem::GetWeightWordPenalty() const {
//const ScoreComponentCollection weights = StaticData::Instance().GetAllWeights();
size_t wpIndex = StaticData::Instance().GetScoreIndexManager().GetBeginIndex(m_wpProducer->GetScoreBookkeepingID());
return StaticData::Instance().GetAllWeights()[wpIndex];
}
float TranslationSystem::GetWeightUnknownWordPenalty() const {
size_t uwpIndex = StaticData::Instance().GetScoreIndexManager().
GetBeginIndex(m_unknownWpProducer->GetScoreBookkeepingID());
return StaticData::Instance().GetAllWeights()[uwpIndex];
}
float TranslationSystem::GetWeightDistortion() const {
assert(m_distortionScoreProducer);
size_t distIndex = StaticData::Instance().GetScoreIndexManager().
GetBeginIndex(m_distortionScoreProducer->GetScoreBookkeepingID());
return StaticData::Instance().GetAllWeights()[distIndex];
}
};

View File

@ -0,0 +1,124 @@
// $Id: $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#ifndef moses_TranslationSystem_h
#define moses_TranslationSystem_h
#include <stdexcept>
#include <string>
#include <vector>
#include "FeatureFunction.h"
#include "LMList.h"
namespace Moses {
class DecodeGraph;
class LexicalReordering;
class PhraseDictionaryFeature;
class GenerationDictionary;
class WordPenaltyProducer;
class DistortionScoreProducer;
class UnknownWordPenaltyProducer;
class GlobalLexicalModel;
/**
* Enables the configuration of multiple translation systems.
**/
class TranslationSystem {
public:
/** Creates a system with the given id */
TranslationSystem(const std::string& id,
const WordPenaltyProducer* wpProducer,
const UnknownWordPenaltyProducer* uwpProducer,
const DistortionScoreProducer* distortionProducer);
//Insert core 'big' features
void AddLanguageModel(LanguageModel* languageModel);
void AddDecodeGraph(DecodeGraph* decodeGraph);
void AddReorderModel(LexicalReordering* reorderModel);
void AddGlobalLexicalModel(GlobalLexicalModel* globalLexicalModel);
//Insert non-core feature function
void AddFeatureFunction(const FeatureFunction* featureFunction);
//Called after adding the tables in order to set up the dictionaries
void ConfigDictionaries();
const std::string& GetId() const {return m_id;}
//Lists of tables relevant to this system.
const std::vector<LexicalReordering*>& GetReorderModels() const {return m_reorderingTables;}
const std::vector<DecodeGraph*>& GetDecodeGraphs() const {return m_decodeGraphs;}
const LMList& GetLanguageModels() const {return m_languageModels;}
const std::vector<GenerationDictionary*>& GetGenerationDictionaries() const {return m_generationDictionaries;}
const std::vector<PhraseDictionaryFeature*>& GetPhraseDictionaries() const {return m_phraseDictionaries;}
const std::vector<const StatefulFeatureFunction*>& GetStatefulFeatureFunctions() const {return m_statefulFFs;}
const std::vector<const StatelessFeatureFunction*>& GetStatelessFeatureFunctions() const {return m_statelessFFs;}
const WordPenaltyProducer *GetWordPenaltyProducer() const { return m_wpProducer; }
const UnknownWordPenaltyProducer *GetUnknownWordPenaltyProducer() const { return m_unknownWpProducer; }
const DistortionScoreProducer* GetDistortionProducer() const {return m_distortionScoreProducer;}
float GetWeightWordPenalty() const;
float GetWeightUnknownWordPenalty() const;
float GetWeightDistortion() const;
//sentence (and thread) specific initialisationn and cleanup
void InitializeBeforeSentenceProcessing(const InputType& source) const;
void CleanUpAfterSentenceProcessing() const;
static const std::string DEFAULT;
private:
std::string m_id;
std::vector<DecodeGraph*> m_decodeGraphs;
std::vector<LexicalReordering*> m_reorderingTables;
std::vector<PhraseDictionaryFeature*> m_phraseDictionaries;
std::vector<GenerationDictionary*> m_generationDictionaries;
LMList m_languageModels;
std::vector<GlobalLexicalModel*> m_globalLexicalModels;
//All stateless FFs, except those that cache scores in T-Option
std::vector<const StatelessFeatureFunction*> m_statelessFFs;
//All statefull FFs
std::vector<const StatefulFeatureFunction*> m_statefulFFs;
const WordPenaltyProducer* m_wpProducer;
const UnknownWordPenaltyProducer* m_unknownWpProducer;
const DistortionScoreProducer* m_distortionScoreProducer;
};
}
#endif

View File

@ -0,0 +1,22 @@
#!/usr/bin/perl
BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, "$script_dir/../perllib"; }
use RegTestUtils;
$x=0;
while (<>) {
chomp;
if (/^Finished loading LanguageModels/) {
my $time = RegTestUtils::readTime($_);
print "LMLOAD_TIME ~ $time\n";
}
if (/^Finished loading phrase tables/) {
my $time = RegTestUtils::readTime($_);
print "PTLOAD_TIME ~ $time\n";
}
next unless /^BEST TRANSLATION:/;
my $pscore = RegTestUtils::readHypoScore($_);
$x++;
print "SCORE_$x = $pscore\n";
}

View File

@ -0,0 +1,7 @@
#!/usr/bin/perl
$x=0;
while (<>) {
chomp;
$x++;
print "TRANSLATION_$x=$_\n";
}

View File

@ -0,0 +1,76 @@
# moses.ini for regression test
# D - decoding path, R - reordering model, L - language model
[translation-systems]
other D 0 L 0
default D 1 L 1 R 0
[mapping]
0 T 0
1 T 1
[ttable-file]
0 0 0 5 ${MODEL_PATH}/basic-surface-only/phrase-table.gz
0 0 0 5 ${MODEL_PATH}/lexicalized-reordering/phrase-table.0-0.gz
# language model
[lmodel-file]
0 0 5 ${MODEL_PATH}/lexicalized-reordering/europarl.lm
0 0 5 ${MODEL_PATH}/lexicalized-reordering/europarl.lm
# limit on how many phrase translations e for each phrase f are loaded
# distortion (reordering) files
[distortion-file]
0-0 msd-bidirectional-fe 6 ${MODEL_PATH}/lexicalized-reordering/reordering-table.msd-bidirectional-fe.0.5.0-0.gz
[ttable-limit]
#ttable element load limit 0 = all elements loaded
20
# distortion (reordering) weight
[weight-d]
0.1
0.5
# lexical reordering weights
[weight-lr]
0.3
0.3
0.3
0.3
0.3
0.3
# language model weight
[weight-l]
0.001
0.5
# translation model weight (phrase translation, lexical weighting)
[weight-t]
0.5
0.5
0.5
0.5
0.5
0.2
0.2
0.2
0.2
0.2
# word penalty
[weight-w]
0.273416114951401
-1
[distortion-limit]
6
[input-factors]
0
[verbose]
2

View File

@ -0,0 +1,5 @@
monsieur le président , ce que nous devrons toutefois également faire à biarritz , c' est regarder un peu plus loin .
les élus que nous sommes avons au moins autant le devoir de l' encourager à progresser , en dépit de l' adversité , que de relayer les messages que nous recevons de l' opinion publique dans chacun de nos pays .
au regard des événements de ces derniers temps , la question du prix de l' essence me semble elle aussi particulièrement remarquable .
à l' heure actuelle , le conseil est en train d' examiner l' inclusion de tels mécanismes dans l' article 7 .
deuxièmement , dans la transparence pour les citoyens , qui connaissent à présent les droits dont ils disposent vis-à-vis de ceux qui appliquent et élaborent le droit européen , et pour ceux qui , justement , appliquent et élaborent ce droit européen .

Some files were not shown because too many files have changed in this diff Show More