mosesdecoder/moses/FF/GlobalLexicalModelUnlimited.h

113 lines
3.3 KiB
C
Raw Normal View History

#ifndef GLOBALLEXICALMODELUNLIMITED_H_
#define GLOBALLEXICALMODELUNLIMITED_H_
#include <stdexcept>
#include <string>
#include <vector>
#include <boost/unordered_set.hpp>
#include <boost/unordered_map.hpp>
2013-05-24 22:11:15 +04:00
#include "StatelessFeatureFunction.h"
2013-05-24 21:02:49 +04:00
#include "moses/Factor.h"
#include "moses/Phrase.h"
#include "moses/TypeDef.h"
#include "moses/Util.h"
2015-10-25 16:37:59 +03:00
#include "moses/Range.h"
2013-05-24 21:02:49 +04:00
#include "moses/FactorTypeSet.h"
#include "moses/Sentence.h"
#ifdef WITH_THREADS
#include <boost/thread/tss.hpp>
#endif
namespace Moses
{
class Factor;
class Phrase;
class Hypothesis;
class InputType;
/** Discriminatively trained global lexicon model
* This is a implementation of Mauser et al., 2009's model that predicts
* each output word from _all_ the input words. The intuition behind this
* feature is that it uses context words for disambiguation
*/
class GlobalLexicalModelUnlimited : public StatelessFeatureFunction
{
2013-05-29 21:16:15 +04:00
typedef std::map< char, short > CharHash;
typedef std::map< std::string, short > StringHash;
2013-05-29 21:16:15 +04:00
struct ThreadLocalStorage {
// const Sentence *input;
const Sentence *input;
};
private:
#ifdef WITH_THREADS
boost::thread_specific_ptr<ThreadLocalStorage> m_local;
#else
std::auto_ptr<ThreadLocalStorage> m_local;
#endif
2012-02-15 15:27:00 +04:00
CharHash m_punctuationHash;
std::vector< FactorType > m_inputFactors;
std::vector< FactorType > m_outputFactors;
2012-02-26 21:46:04 +04:00
bool m_unrestricted;
bool m_sourceContext;
bool m_biphrase;
bool m_bitrigger;
bool m_biasFeature;
bool m_ignorePunctuation;
boost::unordered_set<std::string> m_vocabSource;
boost::unordered_set<std::string> m_vocabTarget;
public:
GlobalLexicalModelUnlimited(const std::string &line);
bool Load(const std::string &filePathSource, const std::string &filePathTarget);
void InitializeForInput(ttasksptr const& ttask);
2012-09-07 19:57:53 +04:00
//TODO: This implements the old interface, but cannot be updated because
//it appears to be stateful
void EvaluateWhenApplied(const Hypothesis& cur_hypo,
2015-01-14 14:07:42 +03:00
ScoreComponentCollection* accumulator) const;
void EvaluateWhenApplied(const ChartHypothesis& /* cur_hypo */,
2015-01-14 14:07:42 +03:00
int /* featureID */,
ScoreComponentCollection* ) const {
throw std::logic_error("GlobalLexicalModelUnlimited not supported in chart decoder, yet");
}
void EvaluateWithSourceContext(const InputType &input
2015-01-14 14:07:42 +03:00
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
2015-11-04 18:10:45 +03:00
, ScoreComponentCollection *estimatedScores = NULL) const {
2015-01-14 14:07:42 +03:00
}
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
2015-01-14 14:07:42 +03:00
, const TranslationOptionList &translationOptionList) const {
}
void EvaluateInIsolation(const Phrase &source
2015-01-14 14:07:42 +03:00
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
2015-11-04 18:10:45 +03:00
, ScoreComponentCollection &estimatedScores) const {
2015-01-14 14:07:42 +03:00
}
2013-05-29 21:16:15 +04:00
void AddFeature(ScoreComponentCollection* accumulator,
StringPiece sourceTrigger, StringPiece sourceWord, StringPiece targetTrigger,
StringPiece targetWord) const;
};
}
#endif /* GLOBALLEXICALMODELUNLIMITED_H_ */