2011-03-22 17:33:16 +03:00
|
|
|
|
#ifndef moses_PhrasePairFeature_h
|
|
|
|
|
#define moses_PhrasePairFeature_h
|
|
|
|
|
|
|
|
|
|
#include "Factor.h"
|
|
|
|
|
#include "FeatureFunction.h"
|
2012-03-19 06:45:59 +04:00
|
|
|
|
#include "Sentence.h"
|
|
|
|
|
|
|
|
|
|
#ifdef WITH_THREADS
|
|
|
|
|
#include <boost/thread/tss.hpp>
|
|
|
|
|
#endif
|
2011-03-22 17:33:16 +03:00
|
|
|
|
|
|
|
|
|
namespace Moses {
|
|
|
|
|
|
|
|
|
|
/**
|
2012-03-07 15:48:58 +04:00
|
|
|
|
* Phrase pair feature: complete source/target phrase pair
|
2011-03-22 17:33:16 +03:00
|
|
|
|
**/
|
|
|
|
|
class PhrasePairFeature: public StatelessFeatureFunction {
|
2012-03-19 06:45:59 +04:00
|
|
|
|
|
2012-03-20 17:45:25 +04:00
|
|
|
|
typedef std::map< char, short > CharHash;
|
2012-07-04 14:40:50 +04:00
|
|
|
|
typedef std::vector< std::set<std::string> > DocumentVector;
|
2012-03-20 17:45:25 +04:00
|
|
|
|
|
2012-03-19 06:45:59 +04:00
|
|
|
|
struct ThreadLocalStorage
|
|
|
|
|
{
|
2012-07-04 14:40:50 +04:00
|
|
|
|
const Sentence *input;
|
2012-03-19 06:45:59 +04:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
private:
|
2012-03-20 17:45:25 +04:00
|
|
|
|
#ifdef WITH_THREADS
|
2012-03-19 06:45:59 +04:00
|
|
|
|
boost::thread_specific_ptr<ThreadLocalStorage> m_local;
|
2012-03-20 17:45:25 +04:00
|
|
|
|
#else
|
2012-03-19 06:45:59 +04:00
|
|
|
|
std::auto_ptr<ThreadLocalStorage> m_local;
|
2012-03-20 17:45:25 +04:00
|
|
|
|
#endif
|
|
|
|
|
|
2012-07-04 14:40:50 +04:00
|
|
|
|
std::set<std::string> m_vocabSource;
|
|
|
|
|
//std::set<std::string> m_vocabTarget;
|
2012-03-20 17:45:25 +04:00
|
|
|
|
FactorType m_sourceFactorId;
|
|
|
|
|
FactorType m_targetFactorId;
|
|
|
|
|
bool m_unrestricted;
|
|
|
|
|
bool m_simple;
|
|
|
|
|
bool m_sourceContext;
|
|
|
|
|
float m_sparseProducerWeight;
|
|
|
|
|
bool m_ignorePunctuation;
|
|
|
|
|
CharHash m_punctuationHash;
|
2012-03-19 06:45:59 +04:00
|
|
|
|
|
2011-03-22 17:33:16 +03:00
|
|
|
|
public:
|
2012-03-19 06:45:59 +04:00
|
|
|
|
PhrasePairFeature (FactorType sourceFactorId, FactorType targetFactorId,
|
2012-07-04 14:40:50 +04:00
|
|
|
|
bool simple, bool sourceContext, bool ignorePunctuation) :
|
2012-03-20 17:45:25 +04:00
|
|
|
|
StatelessFeatureFunction("pp", ScoreProducer::unlimited),
|
2012-03-19 06:45:59 +04:00
|
|
|
|
m_sourceFactorId(sourceFactorId),
|
|
|
|
|
m_targetFactorId(targetFactorId),
|
|
|
|
|
m_unrestricted(true),
|
|
|
|
|
m_simple(simple),
|
2012-07-04 14:40:50 +04:00
|
|
|
|
m_sourceContext(sourceContext),
|
2012-03-20 17:45:25 +04:00
|
|
|
|
m_sparseProducerWeight(1),
|
|
|
|
|
m_ignorePunctuation(ignorePunctuation) {
|
2012-03-19 06:45:59 +04:00
|
|
|
|
std::cerr << "Creating phrase pair feature.. " << std::endl;
|
|
|
|
|
if (m_simple == 1) std::cerr << "using simple phrase pairs.. ";
|
|
|
|
|
if (m_sourceContext == 1) std::cerr << "using source context.. ";
|
2012-03-20 17:45:25 +04:00
|
|
|
|
|
|
|
|
|
// compile a list of punctuation characters
|
|
|
|
|
if (m_ignorePunctuation) {
|
|
|
|
|
std::cerr << "ignoring punctuation for triggers.. ";
|
2012-03-22 19:04:18 +04:00
|
|
|
|
char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
|
2012-03-20 17:45:25 +04:00
|
|
|
|
for (size_t i=0; i < sizeof(punctuation)-1; ++i)
|
|
|
|
|
m_punctuationHash[punctuation[i]] = 1;
|
2012-03-22 19:49:11 +04:00
|
|
|
|
}
|
2012-03-19 06:45:59 +04:00
|
|
|
|
}
|
2011-03-22 17:33:16 +03:00
|
|
|
|
|
2012-09-19 21:00:53 +04:00
|
|
|
|
void Evaluate(
|
|
|
|
|
const PhraseBasedFeatureContext& context,
|
2012-09-07 19:57:53 +04:00
|
|
|
|
ScoreComponentCollection* accumulator) const;
|
2012-04-09 23:47:51 +04:00
|
|
|
|
|
2012-09-14 01:08:01 +04:00
|
|
|
|
void EvaluateChart(const TargetPhrase& targetPhrase,
|
|
|
|
|
const InputType& inputType,
|
|
|
|
|
const WordsRange& sourceSpan,
|
|
|
|
|
ScoreComponentCollection*) const {
|
2012-04-09 23:47:51 +04:00
|
|
|
|
CHECK(0); // feature function not valid in chart decoder
|
|
|
|
|
}
|
2011-03-22 17:33:16 +03:00
|
|
|
|
|
2012-04-09 23:47:51 +04:00
|
|
|
|
bool ComputeValueInTranslationOption() const;
|
2011-03-22 17:33:16 +03:00
|
|
|
|
|
2011-09-20 14:23:38 +04:00
|
|
|
|
std::string GetScoreProducerWeightShortName(unsigned) const;
|
2011-03-22 17:33:16 +03:00
|
|
|
|
size_t GetNumInputScores() const;
|
|
|
|
|
|
2012-07-04 14:40:50 +04:00
|
|
|
|
bool Load(const std::string &filePathSource/*, const std::string &filePathTarget*/);
|
|
|
|
|
|
2012-03-19 06:45:59 +04:00
|
|
|
|
void InitializeForInput( Sentence const& in );
|
|
|
|
|
|
2012-03-15 04:32:27 +04:00
|
|
|
|
void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
|
|
|
|
|
float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
|
2011-03-22 17:33:16 +03:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#endif
|