mosesdecoder/moses/LM/Base.h

// -*- mode: c++; indent-tabs-mode: nil; tab-width:2  -*-
// $Id$

/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#ifndef moses_LanguageModel_h
#define moses_LanguageModel_h

#include <string>
#include <cstddef>

#include "moses/FF/StatefulFeatureFunction.h"

namespace Moses
{

namespace Incremental
{
class Manager;
}

class FactorCollection;
class Factor;
class Phrase;

//! Abstract base class which represent a language model on a contiguous phrase
class LanguageModel : public StatefulFeatureFunction
{
protected:
  LanguageModel(const std::string &line);

  bool m_enableOOVFeature;

public:
  static const LanguageModel &GetFirstLM();

  virtual ~LanguageModel();

  bool OOVFeatureEnabled() const {
    return m_enableOOVFeature;
  }

  virtual void SetParameter(const std::string& key, const std::string& value);

  virtual const FFState* EmptyHypothesisState(const InputType &input) const = 0;

  /* calc total unweighted LM score of this phrase and return score via arguments.
   * Return scores should always be in natural log, regardless of representation with LM implementation.
   * Uses GetValue() of inherited class.
   * \param fullScore scores of all unigram, bigram... of contiguous n-gram of the phrase
   * \param ngramScore score of only n-gram of order m_nGramOrder
   * \param oovCount number of LM OOVs
   */
  virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const = 0;

  virtual void CalcScoreFromCache(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const {
  }

  virtual void IssueRequestsFor(Hypothesis& hypo,
                                const FFState* input_state) {
  }
  virtual void sync() {
  }
  virtual void SetFFStateIdx(int state_idx) {
  }

  // KenLM only (others throw an exception): call incremental search with the model and mapping.
  virtual void IncrementalCallback(Incremental::Manager &manager) const;
  virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const;

  virtual void EvaluateInIsolation(const Phrase &source
                                   , const TargetPhrase &targetPhrase
                                   , ScoreComponentCollection &scoreBreakdown
                                   , ScoreComponentCollection &estimatedScores) const;

};

}

#endif
Eliminated CalcScoreWithContext(). TargetPhrase provides access to the TranslationTask object. 2015-10-24 03:34:56 +03:00			`// -- mode: c++; indent-tabs-mode: nil; tab-width:2 --`
move cube pruning moses lib to trunk git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1848 1f5c12ca-751b-0410-a591-d2e778427230 2008-06-11 14:52:57 +04:00			`// $Id$`

			`/***********************************************************************`
			`Moses - factored phrase-based language decoder`
			`Copyright (C) 2006 University of Edinburgh`

			`This library is free software; you can redistribute it and/or`
			`modify it under the terms of the GNU Lesser General Public`
			`License as published by the Free Software Foundation; either`
			`version 2.1 of the License, or (at your option) any later version.`

			`This library is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`Lesser General Public License for more details.`

			`You should have received a copy of the GNU Lesser General Public`
			`License along with this library; if not, write to the Free Software`
			`Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`***********************************************************************/`

Use portable include guard instead of pragma once git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2939 1f5c12ca-751b-0410-a591-d2e778427230 2010-02-24 14:15:44 +03:00			`#ifndef moses_LanguageModel_h`
			`#define moses_LanguageModel_h`
move cube pruning moses lib to trunk git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1848 1f5c12ca-751b-0410-a591-d2e778427230 2008-06-11 14:52:57 +04:00
			`#include <string>`
Organize language models into an LM directory. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4350 1f5c12ca-751b-0410-a591-d2e778427230 2011-10-13 18:27:01 +04:00			`#include <cstddef>`
Start hack to get raw model out of LM classes 2012-10-08 00:47:24 +04:00
split classes in FeatureFunction files 2013-05-24 22:11:15 +04:00			`#include "moses/FF/StatefulFeatureFunction.h"`
move cube pruning moses lib to trunk git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1848 1f5c12ca-751b-0410-a591-d2e778427230 2008-06-11 14:52:57 +04:00
create namespace git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1897 1f5c12ca-751b-0410-a591-d2e778427230 2008-10-09 03:51:26 +04:00			`namespace Moses`
			`{`

beautify 2013-05-29 21:16:15 +04:00			`namespace Incremental`
			`{`
			`class Manager;`
			`}`
Add Manager for incremental. More ChartCellCollectionBase 2012-10-11 19:38:39 +04:00
move cube pruning moses lib to trunk git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1848 1f5c12ca-751b-0410-a591-d2e778427230 2008-06-11 14:52:57 +04:00			`class FactorCollection;`
			`class Factor;`
			`class Phrase;`

			`//! Abstract base class which represent a language model on a contiguous phrase`
beautify 2013-05-29 21:16:15 +04:00			`class LanguageModel : public StatefulFeatureFunction`
			`{`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3901 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:14:42 +03:00			`protected:`
remove description argument from feature function constructor. Redundant 2013-10-29 22:59:53 +04:00			`LanguageModel(const std::string &line);`
Cut the middle men out of the language model interface. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4348 1f5c12ca-751b-0410-a591-d2e778427230 2011-10-13 16:33:05 +04:00
Implement LM OOV feature - still have to run regression - doesn't work for moses_chart (awaiting H & K LM state merge) git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4211 1f5c12ca-751b-0410-a591-d2e778427230 2011-09-09 22:03:00 +04:00			`bool m_enableOOVFeature;`
beautify 2013-05-29 21:16:15 +04:00
Cleaned up language modelling code by disentangling the decoder's LM feature function from the LM toolkit abstraction layer. There are two different groups of classes now: - LanguageModel, which inherits from StatefulFeatureFunction and contains the n-gram model feature function. - LanguageModelImplementation, which is the base class of the individual LM implementations (SRI, IRST, RandLM, KenLM) and provides methods to query LM probabilities and states. Each LanguageModel controls a LanguageModelImplementation. Implementations can be shared by more than one LanguageModel. This should make it easier to use the LM libraries as a backend for other feature functions while retaining the flexibility to use different LM toolkits. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3719 1f5c12ca-751b-0410-a591-d2e778427230 2010-11-17 17:06:21 +03:00			`public:`
move GetFirstLM() as static method of LanguageModel 2013-05-28 20:35:06 +04:00			`static const LanguageModel &GetFirstLM();`

run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3901 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:14:42 +03:00			`virtual ~LanguageModel();`

Move phrase scoring from LanguageModel to LanguageModelImplementation. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4324 1f5c12ca-751b-0410-a591-d2e778427230 2011-10-11 17:50:44 +04:00			`bool OOVFeatureEnabled() const {`
			`return m_enableOOVFeature;`
			`}`

LanguageModel, KenLM: avoid StaticData usage * drop global lmodel-oov-feature option, and add it to LM FF config line instead use oov-feature=1 (bool) option instead * drop LanguageModel::GetWeight() * KenLM: use m_verbosity of FF instead of IFVERBOSE macro which uses StaticData * train-model.perl: move language model OOV feature onto LM feature spec line 2015-11-10 18:07:06 +03:00			`virtual void SetParameter(const std::string& key, const std::string& value);`
run beautify.perl. Consistent formatting for .h & .cpp files git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3901 1f5c12ca-751b-0410-a591-d2e778427230 2011-02-24 16:14:42 +03:00
Cut the middle men out of the language model interface. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4348 1f5c12ca-751b-0410-a591-d2e778427230 2011-10-13 16:33:05 +04:00			`virtual const FFState* EmptyHypothesisState(const InputType &input) const = 0;`
Feature function overhaul. Each feature function is computed in one of three ways: 1) Stateless feature functions from the phrase table/generation table: these are computed when the TranslationOption is created. They become part of the ScoreBreakdown object contained in the TranslationOption and are added to the feature value vector when a hypothesis is extended. 2) Stateless feature functions that are computed during state exploration. Currently, only WordPenalty falls into this category, but these functions implement a method Evaluate which do does not receive a Hypothesis or any contextual information. 3) Stateful feature functions: these features receive the arc information (translation option), compute some value and then return some context information. The context information created by a particular feature function is passed back to it as the previous context when a hypothesis originating at the node where the previous edge terminates is created. States in the search space may be recombined if the context information is identical. The context information must be stored in an object implementing the FFState interface. TODO: 1) the command line interface / MERT interface needs to go to named parameters that are otherwise opaque 2) StatefulFeatureFunction's Evaluate method should just take a TranslationOption and a context object. It is not good that it takes a hypothesis, because then people may be tempted to access information about the "previous" hypothesis without "declaring" this dependency. 3) Future cost estimates should be handled using feature functions. All stateful feature functions need some kind of future cost estimate. 4) Philipp's poor-man's cube pruning is broken. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2087 1f5c12ca-751b-0410-a591-d2e778427230 2009-02-06 18:43:06 +03:00
Cut the middle men out of the language model interface. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4348 1f5c12ca-751b-0410-a591-d2e778427230 2011-10-13 16:33:05 +04:00			`/* calc total unweighted LM score of this phrase and return score via arguments.`
			`* Return scores should always be in natural log, regardless of representation with LM implementation.`
			`* Uses GetValue() of inherited class.`
			`* \param fullScore scores of all unigram, bigram... of contiguous n-gram of the phrase`
			`* \param ngramScore score of only n-gram of order m_nGramOrder`
			`* \param oovCount number of LM OOVs`
			`*/`
Organize language models into an LM directory. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@4350 1f5c12ca-751b-0410-a591-d2e778427230 2011-10-13 18:27:01 +04:00			`virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const = 0;`
Eliminated CalcScoreWithContext(). TargetPhrase provides access to the TranslationTask object. 2015-10-24 03:34:56 +03:00
Fix up LM request batching and asynchronousely score TranslationOptions too. 2012-07-06 19:48:26 +04:00			`virtual void CalcScoreFromCache(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const {`
			`}`
Implement batched LM requests. 2012-07-02 18:57:54 +04:00
			`virtual void IssueRequestsFor(Hypothesis& hypo,`
			`const FFState* input_state) {`
			`}`
			`virtual void sync() {`
			`}`
			`virtual void SetFFStateIdx(int state_idx) {`
			`}`

Start hack to get raw model out of LM classes 2012-10-08 00:47:24 +04:00			`// KenLM only (others throw an exception): call incremental search with the model and mapping.`
Add Manager for incremental. More ChartCellCollectionBase 2012-10-11 19:38:39 +04:00			`virtual void IncrementalCallback(Incremental::Manager &manager) const;`
more enriched trace 2013-10-13 09:59:05 +04:00			`virtual void ReportHistoryOrder(std::ostream &out,const Phrase &phrase) const;`
start framework for evaluating target phrase score 2013-05-02 15:15:26 +04:00
rename Evaluate() to EvaluateInIsolation() 2014-07-10 01:35:59 +04:00			`virtual void EvaluateInIsolation(const Phrase &source`
beautify 2015-01-14 14:07:42 +03:00			`, const TargetPhrase &targetPhrase`
			`, ScoreComponentCollection &scoreBreakdown`
estimatedScore -> estimatedScores 2015-11-04 18:10:45 +03:00			`, ScoreComponentCollection &estimatedScores) const;`
beautify 2015-01-14 14:07:42 +03:00
move cube pruning moses lib to trunk git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1848 1f5c12ca-751b-0410-a591-d2e778427230 2008-06-11 14:52:57 +04:00			`};`

create namespace git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1897 1f5c12ca-751b-0410-a591-d2e778427230 2008-10-09 03:51:26 +04:00			`}`

Use portable include guard instead of pragma once git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2939 1f5c12ca-751b-0410-a591-d2e778427230 2010-02-24 14:15:44 +03:00			`#endif`