mosesdecoder/moses/src/PhrasePairFeature.h

97 lines
2.8 KiB
C
Raw Normal View History

#ifndef moses_PhrasePairFeature_h
#define moses_PhrasePairFeature_h
#include "Factor.h"
#include "FeatureFunction.h"
#include "Sentence.h"
#ifdef WITH_THREADS
#include <boost/thread/tss.hpp>
#endif
namespace Moses {
/**
2012-03-07 15:48:58 +04:00
* Phrase pair feature: complete source/target phrase pair
**/
class PhrasePairFeature: public StatelessFeatureFunction {
typedef std::map< char, short > CharHash;
2012-07-04 14:40:50 +04:00
typedef std::vector< std::set<std::string> > DocumentVector;
struct ThreadLocalStorage
{
2012-07-04 14:40:50 +04:00
const Sentence *input;
};
private:
#ifdef WITH_THREADS
boost::thread_specific_ptr<ThreadLocalStorage> m_local;
#else
std::auto_ptr<ThreadLocalStorage> m_local;
#endif
2012-07-04 14:40:50 +04:00
std::set<std::string> m_vocabSource;
//std::set<std::string> m_vocabTarget;
FactorType m_sourceFactorId;
FactorType m_targetFactorId;
bool m_unrestricted;
bool m_simple;
bool m_sourceContext;
float m_sparseProducerWeight;
bool m_ignorePunctuation;
CharHash m_punctuationHash;
public:
PhrasePairFeature (FactorType sourceFactorId, FactorType targetFactorId,
2012-07-04 14:40:50 +04:00
bool simple, bool sourceContext, bool ignorePunctuation) :
StatelessFeatureFunction("pp", ScoreProducer::unlimited),
m_sourceFactorId(sourceFactorId),
m_targetFactorId(targetFactorId),
m_unrestricted(true),
m_simple(simple),
2012-07-04 14:40:50 +04:00
m_sourceContext(sourceContext),
m_sparseProducerWeight(1),
m_ignorePunctuation(ignorePunctuation) {
std::cerr << "Creating phrase pair feature.. " << std::endl;
if (m_simple == 1) std::cerr << "using simple phrase pairs.. ";
if (m_sourceContext == 1) std::cerr << "using source context.. ";
// compile a list of punctuation characters
if (m_ignorePunctuation) {
std::cerr << "ignoring punctuation for triggers.. ";
char punctuation[] = "\"'!?¿·()#_,.:;•&@/\\0123456789~=";
for (size_t i=0; i < sizeof(punctuation)-1; ++i)
m_punctuationHash[punctuation[i]] = 1;
2012-03-22 19:49:11 +04:00
}
}
void Evaluate(
const PhraseBasedFeatureContext& context,
2012-09-07 19:57:53 +04:00
ScoreComponentCollection* accumulator) const;
void EvaluateChart(const TargetPhrase& targetPhrase,
const InputType& inputType,
const WordsRange& sourceSpan,
ScoreComponentCollection*) const {
CHECK(0); // feature function not valid in chart decoder
}
bool ComputeValueInTranslationOption() const;
std::string GetScoreProducerWeightShortName(unsigned) const;
size_t GetNumInputScores() const;
2012-07-04 14:40:50 +04:00
bool Load(const std::string &filePathSource/*, const std::string &filePathTarget*/);
void InitializeForInput( Sentence const& in );
void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
};
}
#endif