/*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #ifndef moses_PhraseDictionaryMultiModel_h #define moses_PhraseDictionaryMultiModel_h #include "moses/TranslationModel/PhraseDictionary.h" #ifndef WIN32 #include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h" #endif #include #include "moses/StaticData.h" #include "moses/TargetPhrase.h" #include "moses/Util.h" #include "moses/UserMessage.h" #ifdef WITH_DLIB #include #endif namespace Moses { struct multiModelStatistics { TargetPhrase *targetPhrase; std::vector > p; ~multiModelStatistics() {delete targetPhrase;}; }; struct multiModelStatisticsOptimization: multiModelStatistics { size_t f; }; class OptimizationObjective; /** Implementation of a virtual phrase table constructed from multiple component phrase tables. */ class PhraseDictionaryMultiModel: public PhraseDictionary { #ifdef WITH_DLIB friend class CrossEntropy; #endif public: PhraseDictionaryMultiModel(size_t m_numScoreComponent, PhraseDictionaryFeature* feature); ~PhraseDictionaryMultiModel(); bool Load(const std::vector &input , const std::vector &output , const std::vector &files , const std::vector &weight , size_t tableLimit , const LMList &languageModels , float weightWP); virtual void CollectSufficientStatistics(const Phrase& src, std::map* allStats) const; virtual TargetPhraseCollection* CreateTargetPhraseCollectionLinearInterpolation(std::map* allStats, std::vector > &multimodelweights) const; std::vector > getWeights(size_t numWeights, bool normalize) const; std::vector normalizeWeights(std::vector &weights) const; void CacheForCleanup(TargetPhraseCollection* tpc); void CleanUp(const InputType &source); virtual void CleanUpComponentModels(const InputType &source); #ifdef WITH_DLIB virtual std::vector MinimizePerplexity(std::vector > &phrase_pair_vector); std::vector Optimize(OptimizationObjective * ObjectiveFunction, size_t numModels); #endif // functions below required by base class virtual const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase& src) const; virtual void InitializeForInput(InputType const&) { /* Don't do anything source specific here as this object is shared between threads.*/ } ChartRuleLookupManager *CreateRuleLookupManager(const InputType&, const ChartCellCollectionBase&); protected: std::string m_mode; std::vector m_pd; std::vector m_weight; const LMList *m_languageModels; float m_weightWP; std::vector m_input; std::vector m_output; size_t m_numModels; size_t m_componentTableLimit; PhraseDictionaryFeature* m_feature_load; typedef std::vector PhraseCache; #ifdef WITH_THREADS boost::mutex m_sentenceMutex; typedef std::map SentenceCache; #else typedef PhraseCache SentenceCache; #endif SentenceCache m_sentenceCache; }; #ifdef WITH_DLIB class OptimizationObjective { public: virtual double operator() ( const dlib::matrix& arg) const = 0; }; class CrossEntropy: public OptimizationObjective { public: CrossEntropy ( std::vector &optimizerStats, PhraseDictionaryMultiModel * model, size_t iFeature ) { m_optimizerStats = optimizerStats; m_model = model; m_iFeature = iFeature; } double operator() ( const dlib::matrix& arg) const; protected: std::vector m_optimizerStats; PhraseDictionaryMultiModel * m_model; size_t m_iFeature; }; #endif } // end namespace #endif