2013-04-22 15:21:59 +04:00
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2006 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
#ifndef moses_PhraseDictionaryMultiModel_h
|
|
|
|
#define moses_PhraseDictionaryMultiModel_h
|
|
|
|
|
|
|
|
#include "moses/TranslationModel/PhraseDictionary.h"
|
|
|
|
|
|
|
|
|
|
|
|
#include <boost/unordered_map.hpp>
|
2013-06-12 16:47:40 +04:00
|
|
|
#include <boost/thread/shared_mutex.hpp>
|
2013-04-22 15:21:59 +04:00
|
|
|
#include "moses/StaticData.h"
|
|
|
|
#include "moses/TargetPhrase.h"
|
|
|
|
#include "moses/Util.h"
|
|
|
|
#include "moses/UserMessage.h"
|
|
|
|
|
|
|
|
#ifdef WITH_DLIB
|
|
|
|
#include <dlib/optimization.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
namespace Moses
|
|
|
|
{
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
struct multiModelStatistics {
|
|
|
|
TargetPhrase *targetPhrase;
|
|
|
|
std::vector<std::vector<float> > p;
|
|
|
|
~multiModelStatistics() {
|
|
|
|
delete targetPhrase;
|
2013-04-22 15:21:59 +04:00
|
|
|
};
|
2013-05-29 21:16:15 +04:00
|
|
|
};
|
2013-04-22 15:21:59 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
struct multiModelStatisticsOptimization: multiModelStatistics {
|
|
|
|
size_t f;
|
|
|
|
};
|
2013-04-22 15:21:59 +04:00
|
|
|
|
|
|
|
class OptimizationObjective;
|
|
|
|
|
|
|
|
/** Implementation of a virtual phrase table constructed from multiple component phrase tables.
|
|
|
|
*/
|
|
|
|
class PhraseDictionaryMultiModel: public PhraseDictionary
|
|
|
|
{
|
|
|
|
#ifdef WITH_DLIB
|
2013-05-29 21:16:15 +04:00
|
|
|
friend class CrossEntropy;
|
2013-04-22 15:21:59 +04:00
|
|
|
#endif
|
|
|
|
|
|
|
|
public:
|
2013-05-10 15:30:01 +04:00
|
|
|
PhraseDictionaryMultiModel(const std::string &line);
|
2013-05-13 20:20:14 +04:00
|
|
|
PhraseDictionaryMultiModel(const std::string &description, const std::string &line);
|
2013-04-22 15:21:59 +04:00
|
|
|
~PhraseDictionaryMultiModel();
|
2013-05-31 23:21:02 +04:00
|
|
|
void Load();
|
2013-04-22 15:21:59 +04:00
|
|
|
virtual void CollectSufficientStatistics(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats) const;
|
2013-05-28 03:41:25 +04:00
|
|
|
virtual TargetPhraseCollection* CreateTargetPhraseCollectionLinearInterpolation(const Phrase& src, std::map<std::string,multiModelStatistics*>* allStats, std::vector<std::vector<float> > &multimodelweights) const;
|
2013-04-22 15:21:59 +04:00
|
|
|
std::vector<std::vector<float> > getWeights(size_t numWeights, bool normalize) const;
|
|
|
|
std::vector<float> normalizeWeights(std::vector<float> &weights) const;
|
|
|
|
void CacheForCleanup(TargetPhraseCollection* tpc);
|
2013-05-10 15:30:01 +04:00
|
|
|
void CleanUpAfterSentenceProcessing(const InputType &source);
|
2013-04-22 15:21:59 +04:00
|
|
|
virtual void CleanUpComponentModels(const InputType &source);
|
|
|
|
#ifdef WITH_DLIB
|
|
|
|
virtual std::vector<float> MinimizePerplexity(std::vector<std::pair<std::string, std::string> > &phrase_pair_vector);
|
|
|
|
std::vector<float> Optimize(OptimizationObjective * ObjectiveFunction, size_t numModels);
|
|
|
|
#endif
|
|
|
|
// functions below required by base class
|
2013-08-24 00:02:03 +04:00
|
|
|
virtual const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
2013-04-22 15:21:59 +04:00
|
|
|
virtual void InitializeForInput(InputType const&) {
|
|
|
|
/* Don't do anything source specific here as this object is shared between threads.*/
|
|
|
|
}
|
2013-07-31 15:25:34 +04:00
|
|
|
ChartRuleLookupManager *CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase&);
|
2013-06-20 16:25:02 +04:00
|
|
|
void SetParameter(const std::string& key, const std::string& value);
|
2013-04-22 15:21:59 +04:00
|
|
|
|
2013-06-12 16:47:40 +04:00
|
|
|
const std::vector<float>* GetTemporaryMultiModelWeightsVector() const;
|
|
|
|
void SetTemporaryMultiModelWeightsVector(std::vector<float> weights);
|
|
|
|
|
2013-04-22 15:21:59 +04:00
|
|
|
protected:
|
|
|
|
std::string m_mode;
|
2013-05-10 17:48:06 +04:00
|
|
|
std::vector<std::string> m_pdStr;
|
2013-04-22 15:21:59 +04:00
|
|
|
std::vector<PhraseDictionary*> m_pd;
|
|
|
|
size_t m_numModels;
|
2013-05-10 18:33:46 +04:00
|
|
|
std::vector<float> m_multimodelweights;
|
2013-04-22 15:21:59 +04:00
|
|
|
|
|
|
|
typedef std::vector<TargetPhraseCollection*> PhraseCache;
|
|
|
|
#ifdef WITH_THREADS
|
2013-06-12 17:25:23 +04:00
|
|
|
boost::shared_mutex m_lock_cache;
|
2013-04-22 15:21:59 +04:00
|
|
|
typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
|
|
|
|
#else
|
|
|
|
typedef PhraseCache SentenceCache;
|
|
|
|
#endif
|
|
|
|
SentenceCache m_sentenceCache;
|
|
|
|
|
2013-06-12 17:25:23 +04:00
|
|
|
PhraseCache& GetPhraseCache() {
|
|
|
|
#ifdef WITH_THREADS
|
2013-06-14 21:34:47 +04:00
|
|
|
{
|
|
|
|
// first try read-only lock
|
|
|
|
boost::shared_lock<boost::shared_mutex> read_lock(m_lock_cache);
|
|
|
|
SentenceCache::iterator i = m_sentenceCache.find(boost::this_thread::get_id());
|
|
|
|
if (i != m_sentenceCache.end()) return i->second;
|
|
|
|
}
|
|
|
|
boost::unique_lock<boost::shared_mutex> lock(m_lock_cache);
|
|
|
|
return m_sentenceCache[boost::this_thread::get_id()];
|
2013-06-12 17:25:23 +04:00
|
|
|
#else
|
2013-06-14 21:34:47 +04:00
|
|
|
return m_sentenceCache;
|
2013-06-12 17:25:23 +04:00
|
|
|
#endif
|
|
|
|
}
|
2013-05-14 18:16:09 +04:00
|
|
|
|
2013-06-12 16:47:40 +04:00
|
|
|
#ifdef WITH_THREADS
|
|
|
|
//reader-writer lock
|
|
|
|
mutable boost::shared_mutex m_lock_weights;
|
|
|
|
std::map<boost::thread::id, std::vector<float> > m_multimodelweights_tmp;
|
|
|
|
#else
|
|
|
|
std::vector<float> m_multimodelweights_tmp;
|
|
|
|
#endif
|
2013-04-22 15:21:59 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
#ifdef WITH_DLIB
|
2013-05-29 21:16:15 +04:00
|
|
|
class OptimizationObjective
|
2013-04-22 15:21:59 +04:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
virtual double operator() ( const dlib::matrix<double,0,1>& arg) const = 0;
|
2013-04-22 15:21:59 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
class CrossEntropy: public OptimizationObjective
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
CrossEntropy (
|
|
|
|
std::vector<multiModelStatisticsOptimization*> &optimizerStats,
|
|
|
|
PhraseDictionaryMultiModel * model,
|
|
|
|
size_t iFeature
|
|
|
|
) {
|
|
|
|
m_optimizerStats = optimizerStats;
|
|
|
|
m_model = model;
|
|
|
|
m_iFeature = iFeature;
|
|
|
|
}
|
2013-04-22 15:21:59 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
double operator() ( const dlib::matrix<double,0,1>& arg) const;
|
2013-04-22 15:21:59 +04:00
|
|
|
|
|
|
|
protected:
|
2013-05-29 21:16:15 +04:00
|
|
|
std::vector<multiModelStatisticsOptimization*> m_optimizerStats;
|
|
|
|
PhraseDictionaryMultiModel * m_model;
|
|
|
|
size_t m_iFeature;
|
2013-04-22 15:21:59 +04:00
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
2013-07-30 15:54:50 +04:00
|
|
|
PhraseDictionary *FindPhraseDictionary(const std::string &ptName);
|
|
|
|
|
2013-04-22 15:21:59 +04:00
|
|
|
} // end namespace
|
|
|
|
|
2013-04-29 23:51:00 +04:00
|
|
|
#endif
|