2012-09-03 10:23:32 +04:00
|
|
|
// $Id$
|
|
|
|
|
|
|
|
#ifndef _DOMAIN_H
|
|
|
|
#define _DOMAIN_H
|
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
#include <fstream>
|
|
|
|
#include <assert.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string>
|
|
|
|
#include <queue>
|
|
|
|
#include <map>
|
|
|
|
#include <cmath>
|
|
|
|
|
2012-11-03 03:30:51 +04:00
|
|
|
#include "ScoreFeature.h"
|
|
|
|
|
2012-09-03 10:23:32 +04:00
|
|
|
extern std::vector<std::string> tokenize( const char*);
|
|
|
|
|
|
|
|
namespace MosesTraining
|
|
|
|
{
|
|
|
|
|
|
|
|
class Domain
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
std::vector< std::pair< int, std::string > > spec;
|
|
|
|
std::vector< std::string > list;
|
|
|
|
std::map< std::string, int > name2id;
|
|
|
|
void load( const std::string &fileName );
|
2012-11-03 03:30:51 +04:00
|
|
|
std::string getDomainOfSentence( int sentenceId ) const;
|
|
|
|
};
|
|
|
|
|
|
|
|
class DomainFeature : public ScoreFeature
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
public:
|
|
|
|
|
|
|
|
DomainFeature(const std::string& domainFile);
|
2014-01-29 22:37:42 +04:00
|
|
|
|
2014-05-19 17:35:08 +04:00
|
|
|
void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
|
|
|
|
float count,
|
2014-01-29 22:37:42 +04:00
|
|
|
int sentenceId) const;
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
void add(const ScoreFeatureContext& context,
|
|
|
|
std::vector<float>& denseValues,
|
|
|
|
std::map<std::string,float>& sparseValues) const;
|
2012-11-03 03:30:51 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
protected:
|
2014-01-29 22:37:42 +04:00
|
|
|
/** Overridden in subclass */
|
2013-05-29 21:16:15 +04:00
|
|
|
virtual void add(const std::map<std::string,float>& domainCounts, float count,
|
|
|
|
const MaybeLog& maybeLog,
|
|
|
|
std::vector<float>& denseValues,
|
|
|
|
std::map<std::string,float>& sparseValues) const = 0;
|
2012-11-03 03:30:51 +04:00
|
|
|
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
Domain m_domain;
|
2012-11-03 03:30:51 +04:00
|
|
|
|
2014-01-29 22:37:42 +04:00
|
|
|
const std::string m_propertyKey;
|
|
|
|
|
2012-09-03 10:23:32 +04:00
|
|
|
};
|
|
|
|
|
2012-11-03 03:30:51 +04:00
|
|
|
class SubsetDomainFeature : public DomainFeature
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
public:
|
|
|
|
SubsetDomainFeature(const std::string& domainFile) :
|
|
|
|
DomainFeature(domainFile) {}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
virtual void add(const std::map<std::string,float>& domainCounts, float count,
|
|
|
|
const MaybeLog& maybeLog,
|
|
|
|
std::vector<float>& denseValues,
|
|
|
|
std::map<std::string,float>& sparseValues) const;
|
2012-11-03 03:30:51 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
class SparseSubsetDomainFeature : public DomainFeature
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
public:
|
|
|
|
SparseSubsetDomainFeature(const std::string& domainFile) :
|
|
|
|
DomainFeature(domainFile) {}
|
2012-11-03 03:30:51 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
protected:
|
|
|
|
virtual void add(const std::map<std::string,float>& domainCounts, float count,
|
|
|
|
const MaybeLog& maybeLog,
|
|
|
|
std::vector<float>& denseValues,
|
|
|
|
std::map<std::string,float>& sparseValues) const;
|
2012-11-03 03:30:51 +04:00
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
class IndicatorDomainFeature : public DomainFeature
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
public:
|
|
|
|
IndicatorDomainFeature(const std::string& domainFile) :
|
|
|
|
DomainFeature(domainFile) {}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
virtual void add(const std::map<std::string,float>& domainCounts, float count,
|
|
|
|
const MaybeLog& maybeLog,
|
|
|
|
std::vector<float>& denseValues,
|
|
|
|
std::map<std::string,float>& sparseValues) const;
|
2012-11-03 03:30:51 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
class SparseIndicatorDomainFeature : public DomainFeature
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
public:
|
|
|
|
SparseIndicatorDomainFeature(const std::string& domainFile) :
|
|
|
|
DomainFeature(domainFile) {}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
virtual void add(const std::map<std::string,float>& domainCounts, float count,
|
|
|
|
const MaybeLog& maybeLog,
|
|
|
|
std::vector<float>& denseValues,
|
|
|
|
std::map<std::string,float>& sparseValues) const;
|
2012-11-03 03:30:51 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
class RatioDomainFeature : public DomainFeature
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
public:
|
|
|
|
RatioDomainFeature(const std::string& domainFile) :
|
|
|
|
DomainFeature(domainFile) {}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
virtual void add(const std::map<std::string,float>& domainCounts, float count,
|
|
|
|
const MaybeLog& maybeLog,
|
|
|
|
std::vector<float>& denseValues,
|
|
|
|
std::map<std::string,float>& sparseValues) const;
|
2012-11-03 03:30:51 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
class SparseRatioDomainFeature : public DomainFeature
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
public:
|
|
|
|
SparseRatioDomainFeature(const std::string& domainFile) :
|
|
|
|
DomainFeature(domainFile) {}
|
|
|
|
|
|
|
|
protected:
|
|
|
|
virtual void add(const std::map<std::string,float>& domainCounts, float count,
|
|
|
|
const MaybeLog& maybeLog,
|
|
|
|
std::vector<float>& denseValues,
|
|
|
|
std::map<std::string,float>& sparseValues) const;
|
2012-11-03 03:30:51 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2012-09-03 10:23:32 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|