added Dense and Sparse feature to scorer

This commit is contained in:
maria nadejde 2013-09-13 12:45:46 +02:00
parent 43a9323d0f
commit bff123635e
4 changed files with 38 additions and 17 deletions

View File

@ -7,7 +7,7 @@ namespace MosesTraining
InternalStructFeature::InternalStructFeature()
:m_type(0){
cout<<"InternalStructFeature: Construct "<<m_type<<"\n";
//cout<<"InternalStructFeature: Construct "<<m_type<<"\n";
}
bool InternalStructFeature::equals(const PhraseAlignment& lhs, const PhraseAlignment& rhs) const{
@ -25,21 +25,37 @@ bool InternalStructFeature::equals(const PhraseAlignment& lhs, const PhraseAlign
void InternalStructFeature::add(const ScoreFeatureContext& context,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const{
std::string *internalStruct=new string("(NP((DT)(NN)))");
add(internalStruct, denseValues, sparseValues);
for(size_t i=0; i<context.phrasePair.size(); i++) {
add(&context.phrasePair[i]->ghkmParse, denseValues, sparseValues);
}
}
void InternalStructFeatureDense::add(std::string *internalStruct,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const{
cout<<internalStruct<<endl;
//cout<<"Dense: "<<*internalStruct<<endl;
size_t start=0;
int countNP=0;
while((start = internalStruct->find("NP", start)) != string::npos) {
countNP++;
start+=2; //length of "NP"
}
//should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln?
//should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 )
denseValues.push_back(exp(countNP));
}
void InternalStructFeatureSparse::add(std::string *internalStruct,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const{
cout<<internalStruct<<endl;
//cout<<"Sparse: "<<*internalStruct<<endl;
if(internalStruct->find("VBZ")!=std::string::npos)
sparseValues["NT_VBZ"] = 1;
if(internalStruct->find("VBD")!=std::string::npos)
sparseValues["NT_VBD"] = 1;
}

View File

@ -36,7 +36,7 @@ protected:
/** Overriden in subclass */
virtual void add(std::string *internalStruct,
std::vector<float>& denseValues,
std::map<std::string,float>& sparseValues) const;
std::map<std::string,float>& sparseValues) const = 0;
int m_type;
};
@ -45,7 +45,7 @@ class InternalStructFeatureDense : public InternalStructFeature
{
public:
InternalStructFeatureDense()
:InternalStructFeature(){m_type=1; std::cout<<"InternalStructFeatureDense: Construct "<<m_type<<"\n";}
:InternalStructFeature(){m_type=1;} //std::cout<<"InternalStructFeatureDense: Construct "<<m_type<<"\n";}
protected:
virtual void add(std::string *internalStruct,
std::vector<float>& denseValues,
@ -56,7 +56,7 @@ class InternalStructFeatureSparse : public InternalStructFeature
{
public:
InternalStructFeatureSparse()
:InternalStructFeature(){m_type=2; std::cout<<"InternalStructFeatureSparse: Construct "<<m_type<<"\n";}
:InternalStructFeature(){m_type=2;}// std::cout<<"InternalStructFeatureSparse: Construct "<<m_type<<"\n";}
protected:
virtual void add(std::string *internalStruct,
std::vector<float>& denseValues,

View File

@ -19,7 +19,7 @@
#include "ScoreFeature.h"
#include "domain.h"
//#include "InternalStructFeature.h"
#include "InternalStructFeature.h"
using namespace std;
@ -37,8 +37,9 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args)
{
bool domainAdded = false;
bool sparseDomainAdded = false;
for (size_t i = 0; i < args.size(); ++i) {
if (args[i] == "--IgnoreSentenceId") {
if (args[i] == "--IgnoreSentenceId") {
m_includeSentenceId = true;
} else if (args[i].substr(0,8) == "--Domain") {
string type = args[i].substr(8);
@ -76,15 +77,17 @@ void ScoreFeatureManager::configure(const std::vector<std::string> args)
}
sparseDomainAdded = true;
m_includeSentenceId = true;
} else if(args[i] == "--GHKMFeatureSparse"){
//MARIA
m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse()));
} else if(args[i] == "--GHKMFeatureDense"){
//MARIA
m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureDense()));
} else {
UTIL_THROW(ScoreFeatureArgumentException,"Unknown score argument " << args[i]);
}
//MARIA
//m_features.push_back(ScoreFeaturePtr(new InternalStructSparseFeature()));
}
}
//MARIA
//m_features.push_back(ScoreFeaturePtr(new InternalStructFeatureSparse()));
//InternalStructFeatureSparse *test=new InternalStructFeatureSparse();
}

View File

@ -69,7 +69,9 @@ static void checkDomainConfigured(
ScoreFeatureManager manager;
manager.configure(args);
const std::vector<ScoreFeaturePtr>& features = manager.getFeatures();
BOOST_REQUIRE_EQUAL(features.size(), 1);
//BOOST_REQUIRE_EQUAL(features.size(), 2);
//if I add to features this check will fail?
BOOST_REQUIRE_EQUAL(features.size(), 1); //MARIA -> what is this check and why does it fail when I add my feature?
Expected* feature = dynamic_cast<Expected*>(features[0].get());
BOOST_REQUIRE(feature);
BOOST_CHECK(manager.includeSentenceId());