2012-01-31 14:31:39 +04:00
|
|
|
#ifndef GLOBALLEXICALMODELUNLIMITED_H_
|
|
|
|
#define GLOBALLEXICALMODELUNLIMITED_H_
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include "Factor.h"
|
|
|
|
#include "Phrase.h"
|
|
|
|
#include "TypeDef.h"
|
|
|
|
#include "Util.h"
|
|
|
|
#include "WordsRange.h"
|
|
|
|
#include "FeatureFunction.h"
|
|
|
|
#include "FactorTypeSet.h"
|
|
|
|
#include "Sentence.h"
|
|
|
|
|
2012-02-29 00:22:09 +04:00
|
|
|
#include "FFState.h"
|
|
|
|
|
2012-02-04 22:17:57 +04:00
|
|
|
#ifdef WITH_THREADS
|
|
|
|
#include <boost/thread/tss.hpp>
|
|
|
|
#endif
|
|
|
|
|
2012-01-31 14:31:39 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
|
|
|
|
|
|
|
class Factor;
|
|
|
|
class Phrase;
|
|
|
|
class Hypothesis;
|
|
|
|
class InputType;
|
|
|
|
|
|
|
|
/** Discriminatively trained global lexicon model
|
|
|
|
* This is a implementation of Mauser et al., 2009's model that predicts
|
|
|
|
* each output word from _all_ the input words. The intuition behind this
|
|
|
|
* feature is that it uses context words for disambiguation
|
|
|
|
*/
|
|
|
|
|
2012-04-09 23:47:51 +04:00
|
|
|
class GlobalLexicalModelUnlimited : public StatelessFeatureFunction
|
2012-01-31 14:31:39 +04:00
|
|
|
{
|
2012-02-15 15:27:00 +04:00
|
|
|
typedef std::map< char, short > CharHash;
|
2012-02-03 19:35:26 +04:00
|
|
|
typedef std::map< std::string, short > StringHash;
|
2012-02-04 22:17:57 +04:00
|
|
|
|
|
|
|
struct ThreadLocalStorage
|
|
|
|
{
|
|
|
|
const Sentence *input;
|
|
|
|
};
|
|
|
|
|
|
|
|
private:
|
|
|
|
#ifdef WITH_THREADS
|
|
|
|
boost::thread_specific_ptr<ThreadLocalStorage> m_local;
|
|
|
|
#else
|
|
|
|
std::auto_ptr<ThreadLocalStorage> m_local;
|
|
|
|
#endif
|
|
|
|
|
2012-02-15 15:27:00 +04:00
|
|
|
CharHash m_punctuationHash;
|
2012-02-02 18:21:48 +04:00
|
|
|
|
2012-01-31 14:31:39 +04:00
|
|
|
std::vector< FactorType > m_inputFactors;
|
|
|
|
std::vector< FactorType > m_outputFactors;
|
2012-02-26 21:46:04 +04:00
|
|
|
bool m_unrestricted;
|
|
|
|
|
2012-02-27 00:14:49 +04:00
|
|
|
bool m_sourceContext;
|
2012-02-29 00:22:09 +04:00
|
|
|
bool m_biphrase;
|
|
|
|
bool m_bitrigger;
|
2012-01-31 14:31:39 +04:00
|
|
|
|
2012-03-22 19:04:18 +04:00
|
|
|
bool m_biasFeature;
|
|
|
|
bool m_ignorePunctuation;
|
|
|
|
|
|
|
|
std::set<std::string> m_vocabSource;
|
|
|
|
std::set<std::string> m_vocabTarget;
|
2012-01-31 14:31:39 +04:00
|
|
|
|
|
|
|
public:
|
2013-01-01 21:27:26 +04:00
|
|
|
GlobalLexicalModelUnlimited(const std::string &line);
|
2012-02-29 00:22:09 +04:00
|
|
|
|
2012-02-27 00:14:49 +04:00
|
|
|
bool Load(const std::string &filePathSource, const std::string &filePathTarget);
|
|
|
|
|
2012-01-31 14:31:39 +04:00
|
|
|
void InitializeForInput( Sentence const& in );
|
|
|
|
|
2012-02-29 00:22:09 +04:00
|
|
|
const FFState* EmptyHypothesisState(const InputType &) const {
|
|
|
|
return new DummyState();
|
|
|
|
}
|
|
|
|
|
2012-09-07 19:57:53 +04:00
|
|
|
//TODO: This implements the old interface, but cannot be updated because
|
|
|
|
//it appears to be stateful
|
2012-04-09 23:47:51 +04:00
|
|
|
void Evaluate(const Hypothesis& cur_hypo,
|
|
|
|
ScoreComponentCollection* accumulator) const;
|
2012-02-29 00:22:09 +04:00
|
|
|
|
2012-04-09 23:47:51 +04:00
|
|
|
void EvaluateChart(const ChartHypothesis& /* cur_hypo */,
|
|
|
|
int /* featureID */,
|
|
|
|
ScoreComponentCollection* ) const {
|
2012-02-29 00:22:09 +04:00
|
|
|
/* Not implemented */
|
|
|
|
assert(0);
|
|
|
|
}
|
2012-01-31 14:31:39 +04:00
|
|
|
|
2013-05-02 15:15:26 +04:00
|
|
|
virtual void Evaluate(const TargetPhrase &targetPhrase
|
|
|
|
, ScoreComponentCollection &scoreBreakdown
|
2013-05-02 17:55:26 +04:00
|
|
|
, ScoreComponentCollection &estimatedFutureScore) const;
|
2012-04-09 23:47:51 +04:00
|
|
|
|
2013-04-25 22:42:30 +04:00
|
|
|
void AddFeature(ScoreComponentCollection* accumulator,
|
|
|
|
StringPiece sourceTrigger, StringPiece sourceWord, StringPiece targetTrigger,
|
|
|
|
StringPiece targetWord) const;
|
2013-04-30 22:27:49 +04:00
|
|
|
|
|
|
|
virtual StatelessFeatureType GetStatelessFeatureType() const
|
2013-05-13 18:53:56 +04:00
|
|
|
{ return RequiresSource; }
|
2013-04-30 22:27:49 +04:00
|
|
|
|
2012-01-31 14:31:39 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
#endif /* GLOBALLEXICALMODELUNLIMITED_H_ */
|