mosesdecoder/phrase-extract/DomainFeature.h
Jeroen Vermeulen b2d821a141 Unify tokenize() into util, and unit-test it.
The duplicate definition works fine in environments where the inline
definition becomes a weak symbol in the object file, but if it gets
generated as a regular definition, the duplicate definition causes link
problems.

In most call sites the return value could easily be made const, which
gives both the reader and the compiler a bit more certainty about the code's
intentions.  In theory this may help performance, but it's mainly for clarity.

The comments are based on reverse-engineering, and the unit tests are based
on the comments.  It's possible that some of what's in there is not essential,
in which case, don't feel bad about changing it!

I left a third identical definition in place, though I updated it with my
changes to avoid creeping divergence, and noted the duplication in a comment.
It would be nice to get rid of this definition as well, but it'd introduce
headers from the main Moses tree into biconcor, which may be against policy.
2015-04-22 09:59:05 +07:00

144 lines
3.7 KiB
C++

// $Id$
#ifndef _DOMAIN_H
#define _DOMAIN_H
#include <iostream>
#include <fstream>
#include <cassert>
#include <cstdlib>
#include <string>
#include <queue>
#include <map>
#include <cmath>
#include "ScoreFeature.h"
namespace MosesTraining
{
class Domain
{
public:
std::vector< std::pair< int, std::string > > spec;
std::vector< std::string > list;
std::map< std::string, int > name2id;
void load( const std::string &fileName );
std::string getDomainOfSentence( int sentenceId ) const;
};
class DomainFeature : public ScoreFeature
{
public:
DomainFeature(const std::string& domainFile);
void addPropertiesToPhrasePair(ExtractionPhrasePair &phrasePair,
float count,
int sentenceId) const;
void add(const ScoreFeatureContext& context,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
protected:
/** Overridden in subclass */
virtual void add(const std::map<std::string,float>& domainCounts, float count,
const MaybeLog& maybeLog,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const = 0;
Domain m_domain;
const std::string m_propertyKey;
};
class SubsetDomainFeature : public DomainFeature
{
public:
SubsetDomainFeature(const std::string& domainFile) :
DomainFeature(domainFile) {}
protected:
virtual void add(const std::map<std::string,float>& domainCounts, float count,
const MaybeLog& maybeLog,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
};
class SparseSubsetDomainFeature : public DomainFeature
{
public:
SparseSubsetDomainFeature(const std::string& domainFile) :
DomainFeature(domainFile) {}
protected:
virtual void add(const std::map<std::string,float>& domainCounts, float count,
const MaybeLog& maybeLog,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
};
class IndicatorDomainFeature : public DomainFeature
{
public:
IndicatorDomainFeature(const std::string& domainFile) :
DomainFeature(domainFile) {}
protected:
virtual void add(const std::map<std::string,float>& domainCounts, float count,
const MaybeLog& maybeLog,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
};
class SparseIndicatorDomainFeature : public DomainFeature
{
public:
SparseIndicatorDomainFeature(const std::string& domainFile) :
DomainFeature(domainFile) {}
protected:
virtual void add(const std::map<std::string,float>& domainCounts, float count,
const MaybeLog& maybeLog,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
};
class RatioDomainFeature : public DomainFeature
{
public:
RatioDomainFeature(const std::string& domainFile) :
DomainFeature(domainFile) {}
protected:
virtual void add(const std::map<std::string,float>& domainCounts, float count,
const MaybeLog& maybeLog,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
};
class SparseRatioDomainFeature : public DomainFeature
{
public:
SparseRatioDomainFeature(const std::string& domainFile) :
DomainFeature(domainFile) {}
protected:
virtual void add(const std::map<std::string,float>& domainCounts, float count,
const MaybeLog& maybeLog,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
};
}
#endif