/*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #ifndef moses_PhraseDictionaryMultiModelCounts_h #define moses_PhraseDictionaryMultiModelCounts_h #include "moses/TranslationModel/PhraseDictionaryMultiModel.h" #include #include "moses/StaticData.h" #include "moses/TargetPhrase.h" #include "moses/Util.h" #include "moses/UserMessage.h" #include extern std::vector tokenize( const char*); namespace Moses { typedef boost::unordered_map lexicalMap; typedef boost::unordered_map lexicalMapJoint; typedef std::pair, std::vector > lexicalPair; typedef std::vector > lexicalCache; struct multiModelCountsStatistics : multiModelStatistics { std::vector fst, ft; }; struct multiModelCountsStatisticsOptimization: multiModelCountsStatistics { std::vector fs; lexicalCache lexCachee2f, lexCachef2e; size_t f; }; struct lexicalTable { lexicalMapJoint joint; lexicalMap marginal; }; double InstanceWeighting(std::vector &joint_counts, std::vector &marginals, std::vector &multimodelweights); double LinearInterpolationFromCounts(std::vector &joint_counts, std::vector &marginals, std::vector &multimodelweights); //thrown if alignment information does not match phrase pair (out-of-bound alignment points) class AlignmentException : public std::runtime_error { public: AlignmentException() : std::runtime_error("AlignmentException") { } }; /** Implementation of a phrase table with raw counts. */ class PhraseDictionaryMultiModelCounts: public PhraseDictionaryMultiModel { #ifdef WITH_DLIB friend class CrossEntropyCounts; #endif typedef std::vector< std::set > AlignVector; public: PhraseDictionaryMultiModelCounts(const std::string &line); ~PhraseDictionaryMultiModelCounts(); void Load(); TargetPhraseCollection* CreateTargetPhraseCollectionCounts(const Phrase &src, std::vector &fs, std::map* allStats, std::vector > &multimodelweights) const; void CollectSufficientStatistics(const Phrase &src, std::vector &fs, std::map* allStats) const; float GetTargetCount(const Phrase& target, size_t modelIndex) const; double GetLexicalProbability( Word &inner, Word &outer, const std::vector &tables, std::vector &multimodelweights ) const; double ComputeWeightedLexicalTranslation( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector &tables, std::vector &multimodelweights, bool is_input ) const; double ComputeWeightedLexicalTranslationFromCache( std::vector, std::vector > > > &cache, std::vector &weights ) const; std::pair GetAlignmentsForLexWeights(const Phrase &phraseS, const Phrase &phraseT, const AlignmentInfo &alignment) const; std::vector, std::vector > > > CacheLexicalStatistics( const Phrase &phraseS, const Phrase &phraseT, AlignVector &alignment, const std::vector &tables, bool is_input ); void FillLexicalCountsJoint(Word &wordS, Word &wordT, std::vector &count, const std::vector &tables) const; void FillLexicalCountsMarginal(Word &wordS, std::vector &count, const std::vector &tables) const; void LoadLexicalTable( std::string &fileName, lexicalTable* ltable); const TargetPhraseCollection* GetTargetPhraseCollectionLEGACY(const Phrase& src) const; #ifdef WITH_DLIB std::vector MinimizePerplexity(std::vector > &phrase_pair_vector); #endif // functions below required by base class virtual void InitializeForInput(InputType const&) { /* Don't do anything source specific here as this object is shared between threads.*/ } void SetParameter(const std::string& key, const std::string& value); private: std::vector m_inverse_pd; std::vector m_lexTable_e2f, m_lexTable_f2e; double (*m_combineFunction) (std::vector &joint_counts, std::vector &marginals, std::vector &multimodelweights); std::vector m_lexE2FStr, m_lexF2EStr, m_targetTable; }; #ifdef WITH_DLIB class CrossEntropyCounts: public OptimizationObjective { public: CrossEntropyCounts ( std::vector &optimizerStats, PhraseDictionaryMultiModelCounts * model, size_t iFeature ) { m_optimizerStats = optimizerStats; m_model = model; m_iFeature = iFeature; } double operator() ( const dlib::matrix& arg) const; private: std::vector m_optimizerStats; PhraseDictionaryMultiModelCounts * m_model; size_t m_iFeature; }; #endif } // end namespace #endif