2011-08-13 06:40:54 +04:00
|
|
|
|
#ifndef moses_WordTranslationFeature_h
|
|
|
|
|
#define moses_WordTranslationFeature_h
|
|
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
|
#include <map>
|
|
|
|
|
|
|
|
|
|
#include "FeatureFunction.h"
|
|
|
|
|
#include "FactorCollection.h"
|
|
|
|
|
|
2012-02-29 00:22:09 +04:00
|
|
|
|
#include "Sentence.h"
|
|
|
|
|
#include "FFState.h"
|
|
|
|
|
|
2011-08-13 06:40:54 +04:00
|
|
|
|
namespace Moses
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
/** Sets the features for word translation
|
|
|
|
|
*/
|
2012-04-09 23:47:51 +04:00
|
|
|
|
class WordTranslationFeature : public StatelessFeatureFunction {
|
2012-02-29 00:22:09 +04:00
|
|
|
|
|
2012-03-20 17:45:25 +04:00
|
|
|
|
typedef std::map< char, short > CharHash;
|
2012-07-26 20:32:50 +04:00
|
|
|
|
typedef std::vector< std::set<std::string> > DocumentVector;
|
2012-03-20 17:45:25 +04:00
|
|
|
|
|
2011-08-13 06:40:54 +04:00
|
|
|
|
private:
|
|
|
|
|
std::set<std::string> m_vocabSource;
|
|
|
|
|
std::set<std::string> m_vocabTarget;
|
2012-07-26 20:32:50 +04:00
|
|
|
|
DocumentVector m_vocabDomain;
|
2011-08-13 06:40:54 +04:00
|
|
|
|
FactorType m_factorTypeSource;
|
|
|
|
|
FactorType m_factorTypeTarget;
|
|
|
|
|
bool m_unrestricted;
|
2012-03-07 18:04:25 +04:00
|
|
|
|
bool m_simple;
|
2012-02-27 00:14:49 +04:00
|
|
|
|
bool m_sourceContext;
|
2012-03-07 18:04:25 +04:00
|
|
|
|
bool m_targetContext;
|
2012-07-26 20:32:50 +04:00
|
|
|
|
bool m_domainTrigger;
|
2012-03-15 04:32:27 +04:00
|
|
|
|
float m_sparseProducerWeight;
|
2012-03-20 17:45:25 +04:00
|
|
|
|
bool m_ignorePunctuation;
|
|
|
|
|
CharHash m_punctuationHash;
|
2012-03-15 04:32:27 +04:00
|
|
|
|
|
2011-08-13 06:40:54 +04:00
|
|
|
|
public:
|
2012-10-03 21:53:55 +04:00
|
|
|
|
WordTranslationFeature(FactorType factorTypeSource, FactorType factorTypeTarget,
|
|
|
|
|
bool simple, bool sourceContext, bool targetContext, bool ignorePunctuation,
|
|
|
|
|
bool domainTrigger):
|
|
|
|
|
StatelessFeatureFunction("wt", ScoreProducer::unlimited),
|
|
|
|
|
m_factorTypeSource(factorTypeSource),
|
|
|
|
|
m_factorTypeTarget(factorTypeTarget),
|
|
|
|
|
m_unrestricted(true),
|
|
|
|
|
m_simple(simple),
|
|
|
|
|
m_sourceContext(sourceContext),
|
|
|
|
|
m_targetContext(targetContext),
|
|
|
|
|
m_domainTrigger(domainTrigger),
|
|
|
|
|
m_sparseProducerWeight(1),
|
|
|
|
|
m_ignorePunctuation(ignorePunctuation)
|
|
|
|
|
{
|
|
|
|
|
std::cerr << "Initializing word translation feature.. ";
|
|
|
|
|
if (m_simple == 1) std::cerr << "using simple word translations.. ";
|
|
|
|
|
if (m_sourceContext == 1) std::cerr << "using source context.. ";
|
|
|
|
|
if (m_targetContext == 1) std::cerr << "using target context.. ";
|
|
|
|
|
if (m_domainTrigger == 1) std::cerr << "using domain triggers.. ";
|
2011-08-13 06:40:54 +04:00
|
|
|
|
|
2012-10-03 21:53:55 +04:00
|
|
|
|
// compile a list of punctuation characters
|
|
|
|
|
if (m_ignorePunctuation) {
|
|
|
|
|
std::cerr << "ignoring punctuation for triggers.. ";
|
|
|
|
|
char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
|
|
|
|
|
for (size_t i=0; i < sizeof(punctuation)-1; ++i)
|
|
|
|
|
m_punctuationHash[punctuation[i]] = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::cerr << "done." << std::endl;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool Load(const std::string &filePathSource, const std::string &filePathTarget);
|
|
|
|
|
|
2012-02-29 00:22:09 +04:00
|
|
|
|
const FFState* EmptyHypothesisState(const InputType &) const {
|
2012-10-03 21:53:55 +04:00
|
|
|
|
return new DummyState();
|
2012-02-29 00:22:09 +04:00
|
|
|
|
}
|
2012-10-03 21:53:55 +04:00
|
|
|
|
|
|
|
|
|
void Evaluate(const PhraseBasedFeatureContext& context,
|
|
|
|
|
ScoreComponentCollection* accumulator) const;
|
2012-02-29 00:22:09 +04:00
|
|
|
|
|
2012-09-21 14:56:01 +04:00
|
|
|
|
void EvaluateChart(const ChartBasedFeatureContext& context,
|
2012-04-09 23:47:51 +04:00
|
|
|
|
ScoreComponentCollection* accumulator) const;
|
2011-08-13 06:40:54 +04:00
|
|
|
|
|
|
|
|
|
// basic properties
|
2012-10-03 21:53:55 +04:00
|
|
|
|
std::string GetScoreProducerWeightShortName(unsigned) const { return "wt"; }
|
|
|
|
|
size_t GetNumInputScores() const { return 0; }
|
2012-09-14 00:23:37 +04:00
|
|
|
|
|
|
|
|
|
bool ComputeValueInTranslationOption() const {return true;}
|
2012-10-03 21:53:55 +04:00
|
|
|
|
|
|
|
|
|
void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
|
|
|
|
|
float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
|
2011-08-13 06:40:54 +04:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif // moses_WordTranslationFeature_h
|