2008-06-11 14:52:57 +04:00
|
|
|
// $Id$
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2006 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
2010-09-22 02:43:29 +04:00
|
|
|
#ifndef moses_LanguageModelKen_h
|
|
|
|
#define moses_LanguageModelKen_h
|
|
|
|
|
|
|
|
#include <string>
|
2013-08-27 23:55:07 +04:00
|
|
|
#include <boost/shared_ptr.hpp>
|
2010-10-28 05:05:04 +04:00
|
|
|
|
2013-08-27 23:55:07 +04:00
|
|
|
#include "lm/word_index.hh"
|
2016-02-20 03:07:48 +03:00
|
|
|
#include "util/mmap.hh"
|
2013-08-27 23:55:07 +04:00
|
|
|
|
|
|
|
#include "moses/LM/Base.h"
|
|
|
|
#include "moses/Hypothesis.h"
|
2012-11-12 23:56:18 +04:00
|
|
|
#include "moses/TypeDef.h"
|
2013-08-27 23:55:07 +04:00
|
|
|
#include "moses/Word.h"
|
2011-08-24 14:45:41 +04:00
|
|
|
|
2017-01-02 21:57:52 +03:00
|
|
|
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
2010-10-27 21:50:40 +04:00
|
|
|
|
2013-08-27 23:55:07 +04:00
|
|
|
//class LanguageModel;
|
2013-09-27 12:35:24 +04:00
|
|
|
class FFState;
|
2017-01-02 21:57:52 +03:00
|
|
|
class InMemoryPerSentenceOnDemandLM;
|
2011-02-24 16:14:42 +03:00
|
|
|
|
2013-07-19 01:54:52 +04:00
|
|
|
LanguageModel *ConstructKenLM(const std::string &line);
|
2013-01-17 21:15:10 +04:00
|
|
|
|
2012-06-29 02:29:46 +04:00
|
|
|
//! This will also load. Returns a templated KenLM class
|
2016-02-20 03:07:48 +03:00
|
|
|
LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method);
|
2010-09-22 02:43:29 +04:00
|
|
|
|
2013-08-27 23:55:07 +04:00
|
|
|
/*
|
|
|
|
* An implementation of single factor LM using Kenneth's code.
|
|
|
|
*/
|
|
|
|
template <class Model> class LanguageModelKen : public LanguageModel
|
|
|
|
{
|
|
|
|
public:
|
2016-02-20 03:07:48 +03:00
|
|
|
LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, util::LoadMethod load_method);
|
2013-09-27 12:35:24 +04:00
|
|
|
|
|
|
|
virtual const FFState *EmptyHypothesisState(const InputType &/*input*/) const;
|
|
|
|
|
|
|
|
virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const;
|
|
|
|
|
2014-07-10 02:41:08 +04:00
|
|
|
virtual FFState *EvaluateWhenApplied(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const;
|
2013-09-27 12:35:24 +04:00
|
|
|
|
2014-07-10 02:54:16 +04:00
|
|
|
virtual FFState *EvaluateWhenApplied(const ChartHypothesis& cur_hypo, int featureID, ScoreComponentCollection *accumulator) const;
|
2013-09-27 12:35:24 +04:00
|
|
|
|
2014-11-04 16:13:56 +03:00
|
|
|
virtual FFState *EvaluateWhenApplied(const Syntax::SHyperedge& hyperedge, int featureID, ScoreComponentCollection *accumulator) const;
|
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
virtual void IncrementalCallback(Incremental::Manager &manager) const;
|
2013-10-13 09:59:05 +04:00
|
|
|
virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const;
|
2013-09-27 12:35:24 +04:00
|
|
|
|
|
|
|
virtual bool IsUseable(const FactorMask &mask) const;
|
|
|
|
|
2017-01-02 21:57:52 +03:00
|
|
|
friend class InMemoryPerSentenceOnDemandLM;
|
|
|
|
|
2013-08-27 23:55:07 +04:00
|
|
|
protected:
|
2013-09-27 12:35:24 +04:00
|
|
|
boost::shared_ptr<Model> m_ngram;
|
|
|
|
|
|
|
|
const Factor *m_beginSentenceFactor;
|
|
|
|
|
|
|
|
FactorType m_factorType;
|
|
|
|
|
2016-02-20 03:07:48 +03:00
|
|
|
void LoadModel(const std::string &file, util::LoadMethod load_method);
|
2016-01-13 01:05:00 +03:00
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
lm::WordIndex TranslateID(const Word &word) const {
|
|
|
|
std::size_t factor = word.GetFactor(m_factorType)->GetId();
|
|
|
|
return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
|
|
|
|
}
|
2013-08-27 23:55:07 +04:00
|
|
|
|
2016-01-13 00:54:21 +03:00
|
|
|
std::vector<lm::WordIndex> m_lmIdLookup;
|
|
|
|
|
2013-08-27 23:55:07 +04:00
|
|
|
private:
|
2017-01-02 21:57:52 +03:00
|
|
|
LanguageModelKen();
|
2013-09-27 12:35:24 +04:00
|
|
|
LanguageModelKen(const LanguageModelKen<Model> ©_from);
|
|
|
|
|
|
|
|
// Convert last words of hypothesis into vocab ids, returning an end pointer.
|
|
|
|
lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const {
|
2013-08-27 23:55:07 +04:00
|
|
|
lm::WordIndex *index = indices;
|
|
|
|
lm::WordIndex *end = indices + m_ngram->Order() - 1;
|
|
|
|
int position = hypo.GetCurrTargetWordsRange().GetEndPos();
|
|
|
|
for (; ; ++index, --position) {
|
|
|
|
if (index == end) return index;
|
|
|
|
if (position == -1) {
|
|
|
|
*index = m_ngram->GetVocabulary().BeginSentence();
|
|
|
|
return index + 1;
|
|
|
|
}
|
|
|
|
*index = TranslateID(hypo.GetWord(position));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
|
2015-11-10 18:07:06 +03:00
|
|
|
protected:
|
|
|
|
//bool m_oovFeatureEnabled; /// originally from LanguageModel, copied here to separate the interfaces. Called m_enableOOVFeature there
|
2013-08-27 23:55:07 +04:00
|
|
|
};
|
|
|
|
|
2011-10-13 16:33:05 +04:00
|
|
|
} // namespace Moses
|
2010-09-22 02:43:29 +04:00
|
|
|
|
|
|
|
#endif
|