mosesdecoder/moses/TranslationModel/PhraseDictionary.h

174 lines
4.6 KiB
C
Raw Normal View History

2013-09-25 02:56:47 +04:00
// -*- c++ -*-
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#ifndef moses_PhraseDictionary_h
#define moses_PhraseDictionary_h
#include <iostream>
#include <map>
#include <memory>
#include <list>
#include <stdexcept>
#include <vector>
#include <string>
#include <boost/unordered_map.hpp>
#ifdef WITH_THREADS
#include <boost/thread/tss.hpp>
#else
#include <boost/scoped_ptr.hpp>
#include <time.h>
#endif
#include "moses/Phrase.h"
#include "moses/TargetPhrase.h"
#include "moses/TargetPhraseCollection.h"
#include "moses/InputPath.h"
2013-08-30 19:03:06 +04:00
#include "moses/FF/DecodeFeature.h"
namespace Moses
{
class StaticData;
class InputType;
class WordsRange;
class ChartCellCollectionBase;
class ChartRuleLookupManager;
class ChartParser;
class CacheColl : public boost::unordered_map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >
{
// 1st = hash of source phrase/ address of phrase-table node
// 2nd = all translations
// 3rd = time of last access
public:
2014-01-15 19:42:02 +04:00
~CacheColl();
};
/**
* Abstract base class for phrase dictionaries (tables).
**/
2013-02-22 23:17:57 +04:00
class PhraseDictionary : public DecodeFeature
{
public:
virtual bool ProvidesPrefixCheck() const;
static const std::vector<PhraseDictionary*>& GetColl() {
2014-01-15 19:42:02 +04:00
return s_staticColl;
}
PhraseDictionary(const std::string &line);
2013-02-22 23:17:57 +04:00
2013-06-26 20:12:22 +04:00
virtual ~PhraseDictionary() {
}
//! table limit number.
size_t GetTableLimit() const {
return m_tableLimit;
}
virtual
void
Release(TargetPhraseCollection const* tpc) const;
/// return true if phrase table entries starting with /phrase/
// exist in the table.
virtual
bool
PrefixExists(Phrase const& phrase) const;
2013-09-27 12:35:24 +04:00
// LEGACY!
2013-09-25 02:56:47 +04:00
// The preferred method is to override GetTargetPhraseCollectionBatch().
// See class PhraseDictionaryMemory or PhraseDictionaryOnDisk for details
//! find list of translations that can translates src. Only for phrase input
2013-09-27 12:35:24 +04:00
virtual
2013-09-25 02:56:47 +04:00
TargetPhraseCollection const *
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
2013-09-27 12:35:24 +04:00
virtual
void
GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
//! Create entry for translation of source to targetPhrase
2013-06-26 20:12:22 +04:00
virtual void InitializeForInput(InputType const& source) {
}
// clean up temporary memory, called after processing each sentence
2013-06-26 20:12:22 +04:00
virtual void CleanUpAfterSentenceProcessing(const InputType& source) {
}
//! Create a sentence-specific manager for SCFG rule lookup.
virtual ChartRuleLookupManager *CreateRuleLookupManager(
const ChartParser &,
const ChartCellCollectionBase &,
std::size_t) = 0;
2013-06-26 20:12:22 +04:00
const std::string &GetFilePath() const {
return m_filePath;
}
2013-06-26 20:12:22 +04:00
const std::vector<FeatureFunction*> &GetFeaturesToApply() const {
return m_featuresToApply;
}
2013-06-26 20:12:22 +04:00
void SetParameter(const std::string& key, const std::string& value);
2013-02-25 18:50:33 +04:00
// LEGACY
//! find list of translations that can translates a portion of src. Used by confusion network decoding
2013-08-24 00:34:10 +04:00
virtual const TargetPhraseCollectionWithSourcePhrase* GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const;
2013-02-22 23:17:57 +04:00
protected:
static std::vector<PhraseDictionary*> s_staticColl;
2013-02-22 23:17:57 +04:00
size_t m_tableLimit;
std::string m_filePath;
2013-06-26 20:12:22 +04:00
// features to apply evaluate target phrase when loading.
// NOT when creating translation options. Those are in DecodeStep
std::vector<FeatureFunction*> m_featuresToApply;
// MUST be called at the start of Load()
void SetFeaturesToApply();
// cache
2013-08-16 18:05:36 +04:00
size_t m_maxCacheSize; // 0 = no caching
#ifdef WITH_THREADS
//reader-writer lock
mutable boost::thread_specific_ptr<CacheColl> m_cache;
#else
mutable boost::scoped_ptr<CacheColl> m_cache;
#endif
virtual const TargetPhraseCollection *GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const;
2013-08-16 18:05:36 +04:00
void ReduceCache() const;
protected:
CacheColl &GetCache() const;
};
Feature function overhaul. Each feature function is computed in one of three ways: 1) Stateless feature functions from the phrase table/generation table: these are computed when the TranslationOption is created. They become part of the ScoreBreakdown object contained in the TranslationOption and are added to the feature value vector when a hypothesis is extended. 2) Stateless feature functions that are computed during state exploration. Currently, only WordPenalty falls into this category, but these functions implement a method Evaluate which do does not receive a Hypothesis or any contextual information. 3) Stateful feature functions: these features receive the arc information (translation option), compute some value and then return some context information. The context information created by a particular feature function is passed back to it as the previous context when a hypothesis originating at the node where the previous edge terminates is created. States in the search space may be recombined if the context information is identical. The context information must be stored in an object implementing the FFState interface. TODO: 1) the command line interface / MERT interface needs to go to named parameters that are otherwise opaque 2) StatefulFeatureFunction's Evaluate method should just take a TranslationOption and a context object. It is not good that it takes a hypothesis, because then people may be tempted to access information about the "previous" hypothesis without "declaring" this dependency. 3) Future cost estimates should be handled using feature functions. All stateful feature functions need some kind of future cost estimate. 4) Philipp's poor-man's cube pruning is broken. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2087 1f5c12ca-751b-0410-a591-d2e778427230
2009-02-06 18:43:06 +03:00
}
#endif