mosesdecoder/moses/src/PhraseDictionary.h

// $Id$

/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#pragma once

#include <iostream>
#include <map>
#include <list>
#include <vector>
#include <string>

#ifdef WITH_THREADS
#include <boost/thread/tss.hpp>
#endif

#include "Phrase.h"
#include "TargetPhrase.h"
#include "Dictionary.h"
#include "TargetPhraseCollection.h"
#include "FeatureFunction.h"

namespace Moses
{

class StaticData;
class InputType;
class WordsRange;

class PhraseDictionaryFeature;
/**
  * Abstract base class for phrase dictionaries (tables).
  **/
class PhraseDictionary: public Dictionary {
  public: 
    PhraseDictionary(size_t numScoreComponent, const PhraseDictionaryFeature* feature): 
        Dictionary(numScoreComponent), m_tableLimit(0), m_feature(feature) {}
    //! table limit number. 
    size_t GetTableLimit() const { return m_tableLimit; }
    DecodeType GetDecodeType() const    {   return Translate;   }
    const PhraseDictionaryFeature* GetFeature() const;
    /** set/change translation weights and recalc weighted score for each translation. 
        * TODO This may be redundant now we use ScoreCollection
    */
    virtual void SetWeightTransModel(const std::vector<float> &weightT)=0;

    //! find list of translations that can translates src. Only for phrase input
    virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const=0;
    //! find list of translations that can translates a portion of src. Used by confusion network decoding
    virtual const TargetPhraseCollection *GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const;
    //! Create entry for translation of source to targetPhrase
    virtual void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)=0;
    virtual void InitializeForInput(InputType const& source) = 0;
    
    
  protected:
    size_t m_tableLimit;
    const PhraseDictionaryFeature* m_feature;
};


/**
 * Represents a feature derived from a phrase table.
 */
class PhraseDictionaryFeature :  public StatelessFeatureFunction
{
 

 public:
	PhraseDictionaryFeature(  size_t numScoreComponent
                            , unsigned numInputScores
                            , const std::vector<FactorType> &input
                            , const std::vector<FactorType> &output
                            , const std::string &filePath
                            , const std::vector<float> &weight
                            , size_t tableLimit);
                            
	virtual ~PhraseDictionaryFeature();
	
    virtual bool ComputeValueInTranslationOption() const; 

	std::string GetScoreProducerDescription() const;
	std::string GetScoreProducerWeightShortName() const
	{
		return "tm";
	}
	size_t GetNumScoreComponents() const;

	size_t GetNumInputScores() const;

	PhraseDictionary* GetDictionary(const InputType& source);
    
 private:
    size_t m_numScoreComponent;
    unsigned m_numInputScores;
    std::vector<FactorType> m_input;
    std::vector<FactorType> m_output;
    std::string m_filePath;
    std::vector<float> m_weight;
    size_t m_tableLimit;
    //Only instantiate one of these
    std::auto_ptr<PhraseDictionary> m_memoryDictionary;
    #ifdef WITH_THREADS
    boost::thread_specific_ptr<PhraseDictionary>  m_treeDictionary;
    #else
    std::auto_ptr<PhraseDictionary> m_treeDictionary;
    #endif

};


}
move cube pruning moses lib to trunk git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1848 1f5c12ca-751b-0410-a591-d2e778427230 2008-06-11 14:52:57 +04:00			`// $Id$`

			`/***********************************************************************`
			`Moses - factored phrase-based language decoder`
			`Copyright (C) 2006 University of Edinburgh`

			`This library is free software; you can redistribute it and/or`
			`modify it under the terms of the GNU Lesser General Public`
			`License as published by the Free Software Foundation; either`
			`version 2.1 of the License, or (at your option) any later version.`

			`This library is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`Lesser General Public License for more details.`

			`You should have received a copy of the GNU Lesser General Public`
			`License along with this library; if not, write to the Free Software`
			`Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`***********************************************************************/`

			`#pragma once`

			`#include <iostream>`
			`#include <map>`
			`#include <list>`
			`#include <vector>`
			`#include <string>`
Merge moses-server branch (includes mt moses) into trunk. Plain (single-thread) moses should configure and build as before. Multi-thread and server only available if appropriate options are selected at configure/compile time. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2477 1f5c12ca-751b-0410-a591-d2e778427230 2009-08-07 20:47:54 +04:00
			`#ifdef WITH_THREADS`
			`#include <boost/thread/tss.hpp>`
			`#endif`

move cube pruning moses lib to trunk git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1848 1f5c12ca-751b-0410-a591-d2e778427230 2008-06-11 14:52:57 +04:00			`#include "Phrase.h"`
			`#include "TargetPhrase.h"`
			`#include "Dictionary.h"`
			`#include "TargetPhraseCollection.h"`
Feature function overhaul. Each feature function is computed in one of three ways: 1) Stateless feature functions from the phrase table/generation table: these are computed when the TranslationOption is created. They become part of the ScoreBreakdown object contained in the TranslationOption and are added to the feature value vector when a hypothesis is extended. 2) Stateless feature functions that are computed during state exploration. Currently, only WordPenalty falls into this category, but these functions implement a method Evaluate which do does not receive a Hypothesis or any contextual information. 3) Stateful feature functions: these features receive the arc information (translation option), compute some value and then return some context information. The context information created by a particular feature function is passed back to it as the previous context when a hypothesis originating at the node where the previous edge terminates is created. States in the search space may be recombined if the context information is identical. The context information must be stored in an object implementing the FFState interface. TODO: 1) the command line interface / MERT interface needs to go to named parameters that are otherwise opaque 2) StatefulFeatureFunction's Evaluate method should just take a TranslationOption and a context object. It is not good that it takes a hypothesis, because then people may be tempted to access information about the "previous" hypothesis without "declaring" this dependency. 3) Future cost estimates should be handled using feature functions. All stateful feature functions need some kind of future cost estimate. 4) Philipp's poor-man's cube pruning is broken. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2087 1f5c12ca-751b-0410-a591-d2e778427230 2009-02-06 18:43:06 +03:00			`#include "FeatureFunction.h"`
move cube pruning moses lib to trunk git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1848 1f5c12ca-751b-0410-a591-d2e778427230 2008-06-11 14:52:57 +04:00
create namespace git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1897 1f5c12ca-751b-0410-a591-d2e778427230 2008-10-09 03:51:26 +04:00			`namespace Moses`
			`{`

move cube pruning moses lib to trunk git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1848 1f5c12ca-751b-0410-a591-d2e778427230 2008-06-11 14:52:57 +04:00			`class StaticData;`
			`class InputType;`
			`class WordsRange;`

Merge moses-server branch (includes mt moses) into trunk. Plain (single-thread) moses should configure and build as before. Multi-thread and server only available if appropriate options are selected at configure/compile time. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2477 1f5c12ca-751b-0410-a591-d2e778427230 2009-08-07 20:47:54 +04:00			`class PhraseDictionaryFeature;`
			`/**`
			`* Abstract base class for phrase dictionaries (tables).`
			`**/`
			`class PhraseDictionary: public Dictionary {`
			`public:`
			`PhraseDictionary(size_t numScoreComponent, const PhraseDictionaryFeature* feature):`
			`Dictionary(numScoreComponent), m_tableLimit(0), m_feature(feature) {}`
			`//! table limit number.`
			`size_t GetTableLimit() const { return m_tableLimit; }`
			`DecodeType GetDecodeType() const { return Translate; }`
			`const PhraseDictionaryFeature* GetFeature() const;`
			`/** set/change translation weights and recalc weighted score for each translation.`
			`* TODO This may be redundant now we use ScoreCollection`
			`*/`
			`virtual void SetWeightTransModel(const std::vector<float> &weightT)=0;`

			`//! find list of translations that can translates src. Only for phrase input`
			`virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const=0;`
			`//! find list of translations that can translates a portion of src. Used by confusion network decoding`
			`virtual const TargetPhraseCollection *GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const;`
			`//! Create entry for translation of source to targetPhrase`
			`virtual void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)=0;`
			`virtual void InitializeForInput(InputType const& source) = 0;`


			`protected:`
			`size_t m_tableLimit;`
			`const PhraseDictionaryFeature* m_feature;`
			`};`


			`/**`
			`* Represents a feature derived from a phrase table.`
			`*/`
			`class PhraseDictionaryFeature : public StatelessFeatureFunction`
move cube pruning moses lib to trunk git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1848 1f5c12ca-751b-0410-a591-d2e778427230 2008-06-11 14:52:57 +04:00			`{`
Merge moses-server branch (includes mt moses) into trunk. Plain (single-thread) moses should configure and build as before. Multi-thread and server only available if appropriate options are selected at configure/compile time. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2477 1f5c12ca-751b-0410-a591-d2e778427230 2009-08-07 20:47:54 +04:00
move cube pruning moses lib to trunk git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1848 1f5c12ca-751b-0410-a591-d2e778427230 2008-06-11 14:52:57 +04:00
			`public:`
Merge moses-server branch (includes mt moses) into trunk. Plain (single-thread) moses should configure and build as before. Multi-thread and server only available if appropriate options are selected at configure/compile time. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2477 1f5c12ca-751b-0410-a591-d2e778427230 2009-08-07 20:47:54 +04:00			`PhraseDictionaryFeature( size_t numScoreComponent`
			`, unsigned numInputScores`
			`, const std::vector<FactorType> &input`
			`, const std::vector<FactorType> &output`
			`, const std::string &filePath`
			`, const std::vector<float> &weight`
			`, size_t tableLimit);`

			`virtual ~PhraseDictionaryFeature();`
move cube pruning moses lib to trunk git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1848 1f5c12ca-751b-0410-a591-d2e778427230 2008-06-11 14:52:57 +04:00
Merge moses-server branch (includes mt moses) into trunk. Plain (single-thread) moses should configure and build as before. Multi-thread and server only available if appropriate options are selected at configure/compile time. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2477 1f5c12ca-751b-0410-a591-d2e778427230 2009-08-07 20:47:54 +04:00			`virtual bool ComputeValueInTranslationOption() const;`
Feature function overhaul. Each feature function is computed in one of three ways: 1) Stateless feature functions from the phrase table/generation table: these are computed when the TranslationOption is created. They become part of the ScoreBreakdown object contained in the TranslationOption and are added to the feature value vector when a hypothesis is extended. 2) Stateless feature functions that are computed during state exploration. Currently, only WordPenalty falls into this category, but these functions implement a method Evaluate which do does not receive a Hypothesis or any contextual information. 3) Stateful feature functions: these features receive the arc information (translation option), compute some value and then return some context information. The context information created by a particular feature function is passed back to it as the previous context when a hypothesis originating at the node where the previous edge terminates is created. States in the search space may be recombined if the context information is identical. The context information must be stored in an object implementing the FFState interface. TODO: 1) the command line interface / MERT interface needs to go to named parameters that are otherwise opaque 2) StatefulFeatureFunction's Evaluate method should just take a TranslationOption and a context object. It is not good that it takes a hypothesis, because then people may be tempted to access information about the "previous" hypothesis without "declaring" this dependency. 3) Future cost estimates should be handled using feature functions. All stateful feature functions need some kind of future cost estimate. 4) Philipp's poor-man's cube pruning is broken. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2087 1f5c12ca-751b-0410-a591-d2e778427230 2009-02-06 18:43:06 +03:00
move cube pruning moses lib to trunk git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1848 1f5c12ca-751b-0410-a591-d2e778427230 2008-06-11 14:52:57 +04:00			`std::string GetScoreProducerDescription() const;`
generalized n-best list reporting for feature functions, added experimental version of global lexical model git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2343 1f5c12ca-751b-0410-a591-d2e778427230 2009-05-26 23:30:35 +04:00			`std::string GetScoreProducerWeightShortName() const`
			`{`
			`return "tm";`
			`}`
move cube pruning moses lib to trunk git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1848 1f5c12ca-751b-0410-a591-d2e778427230 2008-06-11 14:52:57 +04:00			`size_t GetNumScoreComponents() const;`

Feature function overhaul. Each feature function is computed in one of three ways: 1) Stateless feature functions from the phrase table/generation table: these are computed when the TranslationOption is created. They become part of the ScoreBreakdown object contained in the TranslationOption and are added to the feature value vector when a hypothesis is extended. 2) Stateless feature functions that are computed during state exploration. Currently, only WordPenalty falls into this category, but these functions implement a method Evaluate which do does not receive a Hypothesis or any contextual information. 3) Stateful feature functions: these features receive the arc information (translation option), compute some value and then return some context information. The context information created by a particular feature function is passed back to it as the previous context when a hypothesis originating at the node where the previous edge terminates is created. States in the search space may be recombined if the context information is identical. The context information must be stored in an object implementing the FFState interface. TODO: 1) the command line interface / MERT interface needs to go to named parameters that are otherwise opaque 2) StatefulFeatureFunction's Evaluate method should just take a TranslationOption and a context object. It is not good that it takes a hypothesis, because then people may be tempted to access information about the "previous" hypothesis without "declaring" this dependency. 3) Future cost estimates should be handled using feature functions. All stateful feature functions need some kind of future cost estimate. 4) Philipp's poor-man's cube pruning is broken. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2087 1f5c12ca-751b-0410-a591-d2e778427230 2009-02-06 18:43:06 +03:00			`size_t GetNumInputScores() const;`

Merge moses-server branch (includes mt moses) into trunk. Plain (single-thread) moses should configure and build as before. Multi-thread and server only available if appropriate options are selected at configure/compile time. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2477 1f5c12ca-751b-0410-a591-d2e778427230 2009-08-07 20:47:54 +04:00			`PhraseDictionary* GetDictionary(const InputType& source);`

			`private:`
			`size_t m_numScoreComponent;`
			`unsigned m_numInputScores;`
			`std::vector<FactorType> m_input;`
			`std::vector<FactorType> m_output;`
			`std::string m_filePath;`
			`std::vector<float> m_weight;`
			`size_t m_tableLimit;`
			`//Only instantiate one of these`
			`std::auto_ptr<PhraseDictionary> m_memoryDictionary;`
			`#ifdef WITH_THREADS`
			`boost::thread_specific_ptr<PhraseDictionary> m_treeDictionary;`
			`#else`
			`std::auto_ptr<PhraseDictionary> m_treeDictionary;`
			`#endif`
Feature function overhaul. Each feature function is computed in one of three ways: 1) Stateless feature functions from the phrase table/generation table: these are computed when the TranslationOption is created. They become part of the ScoreBreakdown object contained in the TranslationOption and are added to the feature value vector when a hypothesis is extended. 2) Stateless feature functions that are computed during state exploration. Currently, only WordPenalty falls into this category, but these functions implement a method Evaluate which do does not receive a Hypothesis or any contextual information. 3) Stateful feature functions: these features receive the arc information (translation option), compute some value and then return some context information. The context information created by a particular feature function is passed back to it as the previous context when a hypothesis originating at the node where the previous edge terminates is created. States in the search space may be recombined if the context information is identical. The context information must be stored in an object implementing the FFState interface. TODO: 1) the command line interface / MERT interface needs to go to named parameters that are otherwise opaque 2) StatefulFeatureFunction's Evaluate method should just take a TranslationOption and a context object. It is not good that it takes a hypothesis, because then people may be tempted to access information about the "previous" hypothesis without "declaring" this dependency. 3) Future cost estimates should be handled using feature functions. All stateful feature functions need some kind of future cost estimate. 4) Philipp's poor-man's cube pruning is broken. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2087 1f5c12ca-751b-0410-a591-d2e778427230 2009-02-06 18:43:06 +03:00
Merge moses-server branch (includes mt moses) into trunk. Plain (single-thread) moses should configure and build as before. Multi-thread and server only available if appropriate options are selected at configure/compile time. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2477 1f5c12ca-751b-0410-a591-d2e778427230 2009-08-07 20:47:54 +04:00			`};`
Feature function overhaul. Each feature function is computed in one of three ways: 1) Stateless feature functions from the phrase table/generation table: these are computed when the TranslationOption is created. They become part of the ScoreBreakdown object contained in the TranslationOption and are added to the feature value vector when a hypothesis is extended. 2) Stateless feature functions that are computed during state exploration. Currently, only WordPenalty falls into this category, but these functions implement a method Evaluate which do does not receive a Hypothesis or any contextual information. 3) Stateful feature functions: these features receive the arc information (translation option), compute some value and then return some context information. The context information created by a particular feature function is passed back to it as the previous context when a hypothesis originating at the node where the previous edge terminates is created. States in the search space may be recombined if the context information is identical. The context information must be stored in an object implementing the FFState interface. TODO: 1) the command line interface / MERT interface needs to go to named parameters that are otherwise opaque 2) StatefulFeatureFunction's Evaluate method should just take a TranslationOption and a context object. It is not good that it takes a hypothesis, because then people may be tempted to access information about the "previous" hypothesis without "declaring" this dependency. 3) Future cost estimates should be handled using feature functions. All stateful feature functions need some kind of future cost estimate. 4) Philipp's poor-man's cube pruning is broken. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2087 1f5c12ca-751b-0410-a591-d2e778427230 2009-02-06 18:43:06 +03:00
move cube pruning moses lib to trunk git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1848 1f5c12ca-751b-0410-a591-d2e778427230 2008-06-11 14:52:57 +04:00
create namespace git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1897 1f5c12ca-751b-0410-a591-d2e778427230 2008-10-09 03:51:26 +04:00
			`}`