mosesdecoder/moses/TranslationModel/UG/mmsapt.h

// -*- c++ -*-
// Sampling phrase table implementation based on memory-mapped suffix arrays.
// Design and code by Ulrich Germann.
#pragma once

#include <time.h>
#include <boost/thread.hpp>
#include <boost/scoped_ptr.hpp>

#include "moses/TypeDef.h"
#include "moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h"
#include "moses/TranslationModel/UG/generic/sampling/Sampling.h"
#include "moses/TranslationModel/UG/generic/file_io/ug_stream.h"

#include "moses/TranslationModel/UG/mm/ug_mm_ttrack.h"
#include "moses/TranslationModel/UG/mm/ug_mm_tsa.h"
#include "moses/TranslationModel/UG/mm/tpt_tokenindex.h"
#include "moses/TranslationModel/UG/mm/ug_corpus_token.h"
#include "moses/TranslationModel/UG/mm/ug_typedefs.h"
#include "moses/TranslationModel/UG/mm/tpt_pickler.h"
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "moses/TranslationModel/UG/mm/ug_phrasepair.h"
#include "moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h"

#include "moses/InputFileStream.h"
#include "moses/FactorTypeSet.h"
#include "moses/TargetPhrase.h"
#include <boost/dynamic_bitset.hpp>
#include "moses/TargetPhraseCollection.h"
#include "util/usage.hh"
#include <map>

#include "moses/TranslationModel/PhraseDictionary.h"
#include "sapt_phrase_scorers.h"

// TO DO:
// - make lexical phrase scorer take addition to the "dynamic overlay" into account
// - switch to pool of sapts, where each sapt has its own provenance feature
//   RESEARCH QUESTION: is this more effective than having multiple phrase tables, 
//   each with its own set of features?

using namespace std;
namespace Moses
{
  using namespace bitext;
  class Mmsapt 
#ifndef NO_MOSES
    : public PhraseDictionary
#endif
  {
    friend class Alignment;
    map<string,string> param;
  public:    
    typedef L2R_Token<SimpleWordId> Token;
    typedef mmBitext<Token> mmbitext;
    typedef imBitext<Token> imbitext;
    typedef Bitext<Token>     bitext;
    typedef TSA<Token>           tsa;
    typedef PhraseScorer<Token> pscorer;
  private:
    // vector<sptr<bitext> > shards;
    mmbitext btfix; 
    sptr<imbitext> btdyn; 
    string bname,extra_data;
    string L1;
    string L2;
    float  m_lbop_conf; // confidence level for lbop smoothing
    float  m_lex_alpha; // alpha paramter (j+a)/(m+a) for lexical smoothing
    // alpha parameter for lexical smoothing (joint+alpha)/(marg + alpha)
    // must be > 0 if dynamic 
    size_t m_default_sample_size;
    size_t m_workers;  // number of worker threads for sampling the bitexts
    vector<string> m_feature_set_names; // one or more of: standard, datasource
 
    // // deprecated!
    // char m_pfwd_denom; // denominator for computation of fwd phrase score:
    // // 'r' - divide by raw count
    // // 's' - divide by sample count
    // // 'g' - devide by number of "good" (i.e. coherent) samples 
    // // size_t num_features;

    size_t input_factor;
    size_t output_factor; // we can actually return entire Tokens!

    // bool withLogCountFeatures; // add logs of counts as features?
    // bool withCoherence; 
    // string m_pfwd_features; // which pfwd functions to use
    // string m_pbwd_features; // which pbwd functions to use

    // for display for human inspection (ttable dumps):
    vector<string> m_feature_names; // names of features activated
    vector<bool> m_is_logval;  // keeps track of which features are log valued 
    vector<bool> m_is_integer; // keeps track of which features are integer valued 

    vector<sptr<pscorer > > m_active_ff_fix; // activated feature functions (fix)
    vector<sptr<pscorer > > m_active_ff_dyn; // activated feature functions (dyn)
    vector<sptr<pscorer > > m_active_ff_common; // activated feature functions (dyn)

    void
    register_ff(sptr<pscorer> const& ff, vector<sptr<pscorer> > & registry);

    template<typename fftype>
    void 
    check_ff(string const ffname,vector<sptr<pscorer> >* registry = NULL);
    // add feature function if specified 
    
    template<typename fftype>
    void 
    check_ff(string const ffname, float const xtra, vector<sptr<pscorer> >* registry = NULL);
    // add feature function if specified

    void
    add_corpus_specific_features(vector<sptr<pscorer > >& ffvec);
    
    // built-in feature functions
    // PScorePfwd<Token> calc_pfwd_fix, calc_pfwd_dyn;
    // PScorePbwd<Token> calc_pbwd_fix, calc_pbwd_dyn;
    // PScoreLex<Token>  calc_lex; // this one I'd like to see as an external ff eventually
    // PScorePC<Token>   apply_pp; // apply phrase penalty 
    // PScoreLogCounts<Token>   add_logcounts_fix;
    // PScoreLogCounts<Token>   add_logcounts_dyn;
    void init(string const& line);
    mutable boost::mutex lock;
    bool withPbwd;
    bool poolCounts;
    vector<FactorType> ofactor;


  public:
    // typedef boost::unordered_map<uint64_t, sptr<TargetPhraseCollection> > tpcoll_cache_t;
    class TargetPhraseCollectionWrapper 
      : public TargetPhraseCollection
    {
    public:
      size_t   const revision; // time stamp from dynamic bitext
      uint64_t const      key; // phrase key
      uint32_t       refCount; // reference count
#if defined(timespec)
      timespec         tstamp; // last use
#else
      timeval          tstamp; // last use
#endif
      int                 idx; // position in history heap
      TargetPhraseCollectionWrapper(size_t r, uint64_t const k);
      ~TargetPhraseCollectionWrapper();
    };

  private:

    void read_config_file(string fname, map<string,string>& param);

    TargetPhraseCollectionWrapper*
    encache(TargetPhraseCollectionWrapper* const ptr) const;

    void
    decache(TargetPhraseCollectionWrapper* ptr) const;

    typedef map<uint64_t, TargetPhraseCollectionWrapper*> tpc_cache_t;
    mutable tpc_cache_t m_cache;
    mutable vector<TargetPhraseCollectionWrapper*> m_history;
    // phrase table feature weights for alignment:
    vector<float> feature_weights; 

    vector<vector<id_type> > wlex21; 
    // word translation lexicon (without counts, get these from calc_lex.COOC)
    typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> mm2dtable_t;
    mm2dtable_t COOCraw;

    TargetPhrase* 
    mkTPhrase(Phrase const& src, 
	      Moses::bitext::PhrasePair<Token>* fix, 
	      Moses::bitext::PhrasePair<Token>* dyn, 
	      sptr<Bitext<Token> > const& dynbt) const;

    // template<typename Token>
    // void 
    // expand(typename Bitext<Token>::iter const& m, Bitext<Token> const& bt, 
    // 	   pstats const& pstats, vector<PhrasePair<Token> >& dest);
    
#if 0
    TargetPhrase* 
    mkTPhrase
    (Phrase        const& src, 
     Bitext<Token> const& bt, 
     Moses::bitext::PhrasePair const& pp
     ) const;
#endif
    void
    process_pstats
    (Phrase   const& src,
     uint64_t const  pid1, 
     pstats   const& stats, 
     Bitext<Token> const & bt, 
     TargetPhraseCollection* tpcoll
     ) const;

    bool
    pool_pstats
    (Phrase   const& src,
     uint64_t const  pid1a, 
     pstats        * statsa, 
     Bitext<Token> const & bta,
     uint64_t const  pid1b, 
     pstats   const* statsb, 
     Bitext<Token> const & btb,
     TargetPhraseCollection* tpcoll
     ) const;
     
    bool
    combine_pstats
    (Phrase   const& src,
     uint64_t const  pid1a, 
     pstats   * statsa, 
     Bitext<Token> const & bta,
     uint64_t const  pid1b, 
     pstats   const* statsb, 
     Bitext<Token> const & btb,
     TargetPhraseCollection* tpcoll
     ) const;

    void
    load_extra_data(string bname, bool locking);

    mutable size_t m_tpc_ctr;
  public:
    // Mmsapt(string const& description, string const& line);
    Mmsapt(string const& line);
    void
    Load();
    
    // returns the prior table limit
    size_t SetTableLimit(size_t limit);

#ifndef NO_MOSES
    TargetPhraseCollection const* 
    GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
    //! Create a sentence-specific manager for SCFG rule lookup.
    ChartRuleLookupManager*
    CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &);

    ChartRuleLookupManager*
    CreateRuleLookupManager
    (const ChartParser &, const ChartCellCollectionBase &, std::size_t);
#endif

    void add(string const& s1, string const& s2, string const& a);

    // align two new sentences
    sptr<vector<int> >
    align(string const& src, string const& trg) const;

    void setWeights(vector<float> const& w);

    void 
    CleanUpAfterSentenceProcessing(const InputType& source);

    void 
    InitializeForInput(InputType const& source);

    void 
    Release(TargetPhraseCollection const* tpc) const;

    bool 
    ProvidesPrefixCheck() const;
    
    /// return true if prefix /phrase/ exists
    bool
    PrefixExists(Phrase const& phrase) const;

    vector<string> const&
    GetFeatureNames() const;
    
    // void
    // ScorePPfix(bitext::PhrasePair& pp) const;

    bool
    isLogVal(int i) const;
    
    bool
    isInteger(int i) const;

  private:
  };
} // end namespace
Initial check-in. 2013-09-25 03:51:50 +04:00			`// -- c++ --`
			`// Sampling phrase table implementation based on memory-mapped suffix arrays.`
			`// Design and code by Ulrich Germann.`
			`#pragma once`

Numerous bug fixes; implemented caching and improved life cycle management for TargetPhraseCollections. 2014-03-24 17:30:22 +04:00			`#include <time.h>`
Initial check-in. 2013-09-25 03:51:50 +04:00			`#include <boost/thread.hpp>`
Numerous bug fixes; implemented caching and improved life cycle management for TargetPhraseCollections. 2014-03-24 17:30:22 +04:00			`#include <boost/scoped_ptr.hpp>`
Initial check-in. 2013-09-25 03:51:50 +04:00
Added dynamicly updatable corpus; updated or added query functions. 2014-02-08 21:56:48 +04:00			`#include "moses/TypeDef.h"`
			`#include "moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h"`
			`#include "moses/TranslationModel/UG/generic/sampling/Sampling.h"`
			`#include "moses/TranslationModel/UG/generic/file_io/ug_stream.h"`
Initial check-in. 2013-09-25 03:51:50 +04:00
Added dynamicly updatable corpus; updated or added query functions. 2014-02-08 21:56:48 +04:00			`#include "moses/TranslationModel/UG/mm/ug_mm_ttrack.h"`
			`#include "moses/TranslationModel/UG/mm/ug_mm_tsa.h"`
			`#include "moses/TranslationModel/UG/mm/tpt_tokenindex.h"`
			`#include "moses/TranslationModel/UG/mm/ug_corpus_token.h"`
			`#include "moses/TranslationModel/UG/mm/ug_typedefs.h"`
			`#include "moses/TranslationModel/UG/mm/tpt_pickler.h"`
			`#include "moses/TranslationModel/UG/mm/ug_bitext.h"`
Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`#include "moses/TranslationModel/UG/mm/ug_phrasepair.h"`
Added dynamicly updatable corpus; updated or added query functions. 2014-02-08 21:56:48 +04:00			`#include "moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h"`
Initial check-in. 2013-09-25 03:51:50 +04:00
			`#include "moses/InputFileStream.h"`
			`#include "moses/FactorTypeSet.h"`
			`#include "moses/TargetPhrase.h"`
			`#include <boost/dynamic_bitset.hpp>`
			`#include "moses/TargetPhraseCollection.h"`
Mmsapt now uses timespec on linux, timeval om MacOS for time stamps. 2014-08-05 05:22:20 +04:00			`#include "util/usage.hh"`
Initial check-in. 2013-09-25 03:51:50 +04:00			`#include <map>`

Added dynamicly updatable corpus; updated or added query functions. 2014-02-08 21:56:48 +04:00			`#include "moses/TranslationModel/PhraseDictionary.h"`
Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`#include "sapt_phrase_scorers.h"`
Added dynamicly updatable corpus; updated or added query functions. 2014-02-08 21:56:48 +04:00
			`// TO DO:`
			`// - make lexical phrase scorer take addition to the "dynamic overlay" into account`
			`// - switch to pool of sapts, where each sapt has its own provenance feature`
			`// RESEARCH QUESTION: is this more effective than having multiple phrase tables,`
			`// each with its own set of features?`
Initial check-in. 2013-09-25 03:51:50 +04:00
			`using namespace std;`
			`namespace Moses`
			`{`
			`using namespace bitext;`
Added phrase-based word alignment to mmsapt (work in progress!). 2014-02-21 03:25:36 +04:00			`class Mmsapt`
			`#ifndef NO_MOSES`
			`: public PhraseDictionary`
			`#endif`
Initial check-in. 2013-09-25 03:51:50 +04:00			`{`
Added class Alignment as a friend and wlex21 and COOCraw for development purposes while working on word alignment issues. 2014-03-10 15:57:40 +04:00			`friend class Alignment;`
Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`map<string,string> param;`
Added dynamicly updatable corpus; updated or added query functions. 2014-02-08 21:56:48 +04:00			`public:`
Initial check-in. 2013-09-25 03:51:50 +04:00			`typedef L2R_Token<SimpleWordId> Token;`
			`typedef mmBitext<Token> mmbitext;`
Added dynamicly updatable corpus; updated or added query functions. 2014-02-08 21:56:48 +04:00			`typedef imBitext<Token> imbitext;`
Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`typedef Bitext<Token> bitext;`
Added phrase-based word alignment to mmsapt (work in progress!). 2014-02-21 03:25:36 +04:00			`typedef TSA<Token> tsa;`
Reorganization of phrase scorers in Mmsapt. 2014-06-14 16:03:31 +04:00			`typedef PhraseScorer<Token> pscorer;`
Added dynamicly updatable corpus; updated or added query functions. 2014-02-08 21:56:48 +04:00			`private:`
Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`// vector<sptr<bitext> > shards;`
Added phrase-based word alignment to mmsapt (work in progress!). 2014-02-21 03:25:36 +04:00			`mmbitext btfix;`
Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`sptr<imbitext> btdyn;`
Work in progress. 2014-03-13 03:13:44 +04:00			`string bname,extra_data;`
Initial check-in. 2013-09-25 03:51:50 +04:00			`string L1;`
			`string L2;`
Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`float m_lbop_conf; // confidence level for lbop smoothing`
			`float m_lex_alpha; // alpha paramter (j+a)/(m+a) for lexical smoothing`
Reorganization of phrase scorers in Mmsapt. 2014-06-14 16:03:31 +04:00			`// alpha parameter for lexical smoothing (joint+alpha)/(marg + alpha)`
			`// must be > 0 if dynamic`
Numerous bug fixes; implemented caching and improved life cycle management for TargetPhraseCollections. 2014-03-24 17:30:22 +04:00			`size_t m_default_sample_size;`
			`size_t m_workers; // number of worker threads for sampling the bitexts`
Option to add TM-specific word and phrase counts 2014-09-04 09:49:26 +04:00			`vector<string> m_feature_set_names; // one or more of: standard, datasource`

Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`// // deprecated!`
			`// char m_pfwd_denom; // denominator for computation of fwd phrase score:`
			`// // 'r' - divide by raw count`
			`// // 's' - divide by sample count`
			`// // 'g' - devide by number of "good" (i.e. coherent) samples`
			`// // size_t num_features;`
Reorganization of phrase scorers in Mmsapt. 2014-06-14 16:03:31 +04:00
Initial check-in. 2013-09-25 03:51:50 +04:00			`size_t input_factor;`
			`size_t output_factor; // we can actually return entire Tokens!`
Reorganization of phrase scorers in Mmsapt. 2014-06-14 16:03:31 +04:00
Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`// bool withLogCountFeatures; // add logs of counts as features?`
			`// bool withCoherence;`
			`// string m_pfwd_features; // which pfwd functions to use`
			`// string m_pbwd_features; // which pbwd functions to use`

			`// for display for human inspection (ttable dumps):`
Reorganization of phrase scorers in Mmsapt. 2014-06-14 16:03:31 +04:00			`vector<string> m_feature_names; // names of features activated`
Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`vector<bool> m_is_logval; // keeps track of which features are log valued`
			`vector<bool> m_is_integer; // keeps track of which features are integer valued`

Reorganization of phrase scorers in Mmsapt. 2014-06-14 16:03:31 +04:00			`vector<sptr<pscorer > > m_active_ff_fix; // activated feature functions (fix)`
			`vector<sptr<pscorer > > m_active_ff_dyn; // activated feature functions (dyn)`
			`vector<sptr<pscorer > > m_active_ff_common; // activated feature functions (dyn)`

Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`void`
			`register_ff(sptr<pscorer> const& ff, vector<sptr<pscorer> > & registry);`

			`template<typename fftype>`
			`void`
			`check_ff(string const ffname,vector<sptr<pscorer> >* registry = NULL);`
			`// add feature function if specified`

			`template<typename fftype>`
			`void`
			`check_ff(string const ffname, float const xtra, vector<sptr<pscorer> >* registry = NULL);`
			`// add feature function if specified`

			`void`
			`add_corpus_specific_features(vector<sptr<pscorer > >& ffvec);`
Reorganization of phrase scorers in Mmsapt. 2014-06-14 16:03:31 +04:00
Initial check-in. 2013-09-25 03:51:50 +04:00			`// built-in feature functions`
Reorganization of phrase scorers in Mmsapt. 2014-06-14 16:03:31 +04:00			`// PScorePfwd<Token> calc_pfwd_fix, calc_pfwd_dyn;`
			`// PScorePbwd<Token> calc_pbwd_fix, calc_pbwd_dyn;`
			`// PScoreLex<Token> calc_lex; // this one I'd like to see as an external ff eventually`
Option to add TM-specific word and phrase counts 2014-09-04 09:49:26 +04:00			`// PScorePC<Token> apply_pp; // apply phrase penalty`
Reorganization of phrase scorers in Mmsapt. 2014-06-14 16:03:31 +04:00			`// PScoreLogCounts<Token> add_logcounts_fix;`
			`// PScoreLogCounts<Token> add_logcounts_dyn;`
Initial check-in. 2013-09-25 03:51:50 +04:00			`void init(string const& line);`
			`mutable boost::mutex lock;`
Reorganization of phrase scorers in Mmsapt. 2014-06-14 16:03:31 +04:00			`bool withPbwd;`
Added dynamicly updatable corpus; updated or added query functions. 2014-02-08 21:56:48 +04:00			`bool poolCounts;`
			`vector<FactorType> ofactor;`

Reorganization of phrase scorers in Mmsapt. 2014-06-14 16:03:31 +04:00
Numerous bug fixes; implemented caching and improved life cycle management for TargetPhraseCollections. 2014-03-24 17:30:22 +04:00			`public:`
			`// typedef boost::unordered_map<uint64_t, sptr<TargetPhraseCollection> > tpcoll_cache_t;`
			`class TargetPhraseCollectionWrapper`
			`: public TargetPhraseCollection`
			`{`
			`public:`
			`size_t const revision; // time stamp from dynamic bitext`
			`uint64_t const key; // phrase key`
			`uint32_t refCount; // reference count`
Mmsapt now uses timespec on linux, timeval om MacOS for time stamps. 2014-08-05 05:22:20 +04:00			`#if defined(timespec)`
Numerous bug fixes; implemented caching and improved life cycle management for TargetPhraseCollections. 2014-03-24 17:30:22 +04:00			`timespec tstamp; // last use`
Mmsapt now uses timespec on linux, timeval om MacOS for time stamps. 2014-08-05 05:22:20 +04:00			`#else`
			`timeval tstamp; // last use`
			`#endif`
Numerous bug fixes; implemented caching and improved life cycle management for TargetPhraseCollections. 2014-03-24 17:30:22 +04:00			`int idx; // position in history heap`
			`TargetPhraseCollectionWrapper(size_t r, uint64_t const k);`
			`~TargetPhraseCollectionWrapper();`
			`};`

			`private:`

Added more phrase table features and configuration options to Mmsapt. 2014-06-03 18:41:59 +04:00			`void read_config_file(string fname, map<string,string>& param);`

Numerous bug fixes; implemented caching and improved life cycle management for TargetPhraseCollections. 2014-03-24 17:30:22 +04:00			`TargetPhraseCollectionWrapper*`
			`encache(TargetPhraseCollectionWrapper* const ptr) const;`

			`void`
			`decache(TargetPhraseCollectionWrapper* ptr) const;`

			`typedef map<uint64_t, TargetPhraseCollectionWrapper*> tpc_cache_t;`
			`mutable tpc_cache_t m_cache;`
			`mutable vector<TargetPhraseCollectionWrapper*> m_history;`
Added phrase-based word alignment to mmsapt (work in progress!). 2014-02-21 03:25:36 +04:00			`// phrase table feature weights for alignment:`
			`vector<float> feature_weights;`

Added class Alignment as a friend and wlex21 and COOCraw for development purposes while working on word alignment issues. 2014-03-10 15:57:40 +04:00			`vector<vector<id_type> > wlex21;`
			`// word translation lexicon (without counts, get these from calc_lex.COOC)`
			`typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> mm2dtable_t;`
			`mm2dtable_t COOCraw;`

Added dynamicly updatable corpus; updated or added query functions. 2014-02-08 21:56:48 +04:00			`TargetPhrase*`
Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`mkTPhrase(Phrase const& src,`
			`Moses::bitext::PhrasePair<Token>* fix,`
			`Moses::bitext::PhrasePair<Token>* dyn,`
			`sptr<Bitext<Token> > const& dynbt) const;`

			`// template<typename Token>`
			`// void`
			`// expand(typename Bitext<Token>::iter const& m, Bitext<Token> const& bt,`
			`// pstats const& pstats, vector<PhrasePair<Token> >& dest);`

			`#if 0`
			`TargetPhrase*`
			`mkTPhrase`
Added dynamicly updatable corpus; updated or added query functions. 2014-02-08 21:56:48 +04:00			`(Phrase const& src,`
			`Bitext<Token> const& bt,`
Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`Moses::bitext::PhrasePair const& pp`
Added dynamicly updatable corpus; updated or added query functions. 2014-02-08 21:56:48 +04:00			`) const;`
Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`#endif`
Added dynamicly updatable corpus; updated or added query functions. 2014-02-08 21:56:48 +04:00			`void`
			`process_pstats`
			`(Phrase const& src,`
			`uint64_t const pid1,`
			`pstats const& stats,`
			`Bitext<Token> const & bt,`
			`TargetPhraseCollection* tpcoll`
			`) const;`

			`bool`
			`pool_pstats`
			`(Phrase const& src,`
			`uint64_t const pid1a,`
			`pstats * statsa,`
			`Bitext<Token> const & bta,`
			`uint64_t const pid1b,`
			`pstats const* statsb,`
			`Bitext<Token> const & btb,`
			`TargetPhraseCollection* tpcoll`
			`) const;`

			`bool`
			`combine_pstats`
			`(Phrase const& src,`
			`uint64_t const pid1a,`
			`pstats * statsa,`
			`Bitext<Token> const & bta,`
			`uint64_t const pid1b,`
			`pstats const* statsb,`
			`Bitext<Token> const & btb,`
			`TargetPhraseCollection* tpcoll`
			`) const;`

Work in progress. 2014-03-13 03:13:44 +04:00			`void`
Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`load_extra_data(string bname, bool locking);`
Work in progress. 2014-03-13 03:13:44 +04:00
Numerous bug fixes; implemented caching and improved life cycle management for TargetPhraseCollections. 2014-03-24 17:30:22 +04:00			`mutable size_t m_tpc_ctr;`
Initial check-in. 2013-09-25 03:51:50 +04:00			`public:`
Removed constructor with both description and config line. 2014-02-22 04:25:02 +04:00			`// Mmsapt(string const& description, string const& line);`
Initial check-in. 2013-09-25 03:51:50 +04:00			`Mmsapt(string const& line);`
			`void`
			`Load();`

Added configurable options and SetTableLimit to Mmsapt. 2014-06-05 04:47:29 +04:00			`// returns the prior table limit`
			`size_t SetTableLimit(size_t limit);`

Added phrase-based word alignment to mmsapt (work in progress!). 2014-02-21 03:25:36 +04:00			`#ifndef NO_MOSES`
Initial check-in. 2013-09-25 03:51:50 +04:00			`TargetPhraseCollection const*`
			`GetTargetPhraseCollectionLEGACY(const Phrase& src) const;`
			`//! Create a sentence-specific manager for SCFG rule lookup.`
			`ChartRuleLookupManager*`
			`CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &);`
1. Added variant of CreateRuleLookupManager variant as required by the new pure virtual function in PhraseTable.h (from the master branch). 2. Bug fix in pool_stats. 2014-03-20 00:43:47 +04:00
			`ChartRuleLookupManager*`
			`CreateRuleLookupManager`
			`(const ChartParser &, const ChartCellCollectionBase &, std::size_t);`
Added phrase-based word alignment to mmsapt (work in progress!). 2014-02-21 03:25:36 +04:00			`#endif`
Initial check-in. 2013-09-25 03:51:50 +04:00
Added dynamicly updatable corpus; updated or added query functions. 2014-02-08 21:56:48 +04:00			`void add(string const& s1, string const& s2, string const& a);`

Added phrase-based word alignment to mmsapt (work in progress!). 2014-02-21 03:25:36 +04:00			`// align two new sentences`
			`sptr<vector<int> >`
			`align(string const& src, string const& trg) const;`

			`void setWeights(vector<float> const& w);`
1. Added variant of CreateRuleLookupManager variant as required by the new pure virtual function in PhraseTable.h (from the master branch). 2. Bug fix in pool_stats. 2014-03-20 00:43:47 +04:00
Numerous bug fixes; implemented caching and improved life cycle management for TargetPhraseCollections. 2014-03-24 17:30:22 +04:00			`void`
			`CleanUpAfterSentenceProcessing(const InputType& source);`
1. Added variant of CreateRuleLookupManager variant as required by the new pure virtual function in PhraseTable.h (from the master branch). 2. Bug fix in pool_stats. 2014-03-20 00:43:47 +04:00
Numerous bug fixes; implemented caching and improved life cycle management for TargetPhraseCollections. 2014-03-24 17:30:22 +04:00			`void`
			`InitializeForInput(InputType const& source);`

			`void`
1. Renamed PhraseDictionary member function release() to Release(). 2. Added option to check phrase prefixes for early stopping of InputPath generation when translating confusion networks. 2014-03-25 05:49:24 +04:00			`Release(TargetPhraseCollection const* tpc) const;`

			`bool`
			`ProvidesPrefixCheck() const;`
Numerous bug fixes; implemented caching and improved life cycle management for TargetPhraseCollections. 2014-03-24 17:30:22 +04:00
1. Renamed PhraseDictionary member function release() to Release(). 2. Added option to check phrase prefixes for early stopping of InputPath generation when translating confusion networks. 2014-03-25 05:49:24 +04:00			`/// return true if prefix /phrase/ exists`
			`bool`
			`PrefixExists(Phrase const& phrase) const;`

Reorganization of phrase scorers in Mmsapt. 2014-06-14 16:03:31 +04:00			`vector<string> const&`
			`GetFeatureNames() const;`

Major overhaul of Mmsapt. Reorganization of old and addition of new features in phrase tables. Many critical bug fixes. 2014-07-09 05:41:28 +04:00			`// void`
			`// ScorePPfix(bitext::PhrasePair& pp) const;`

			`bool`
			`isLogVal(int i) const;`

			`bool`
			`isInteger(int i) const;`
Reorganization of phrase scorers in Mmsapt. 2014-06-14 16:03:31 +04:00
Initial check-in. 2013-09-25 03:51:50 +04:00			`private:`
			`};`
			`} // end namespace`