mosesdecoder/phrase-extract/InternalStructFeature.cpp

58 lines
2.1 KiB
C++
Raw Normal View History

2013-09-12 20:46:40 +04:00
#include "InternalStructFeature.h"
#include <map>
2013-09-12 20:46:40 +04:00
using namespace std;
namespace MosesTraining
{
void InternalStructFeature::add(const ScoreFeatureContext& context,
std::vector<float>& denseValues,
2014-05-19 17:35:08 +04:00
std::map<std::string,float>& sparseValues) const
{
const std::map<std::string,float> *allTrees = context.phrasePair.GetProperty("Tree"); // our would we rather want to take the most frequent one only?
for ( std::map<std::string,float>::const_iterator iter=allTrees->begin();
iter!=allTrees->end(); ++iter ) {
add(&(iter->first), iter->second, denseValues, sparseValues);
}
2013-09-12 20:46:40 +04:00
}
void InternalStructFeatureDense::add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
2014-05-19 17:35:08 +04:00
std::map<std::string,float>& sparseValues) const
{
//cout<<"Dense: "<<*internalStruct<<endl;
size_t start=0;
int countNP=0;
while((start = treeFragment->find("NP", start)) != string::npos) {
countNP += count;
start+=2; //length of "NP"
}
//should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln?
//should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 )
denseValues.push_back(exp(countNP));
2013-09-12 20:46:40 +04:00
}
void InternalStructFeatureSparse::add(const std::string *treeFragment,
float count,
std::vector<float>& denseValues,
2014-05-19 17:35:08 +04:00
std::map<std::string,float>& sparseValues) const
{
//cout<<"Sparse: "<<*internalStruct<<endl;
if(treeFragment->find("VBZ")!=std::string::npos)
sparseValues["NTVBZ"] += count;
if(treeFragment->find("VBD")!=std::string::npos)
sparseValues["NTVBD"] += count;
if(treeFragment->find("VBP")!=std::string::npos)
sparseValues["NTVBP"] += count;
if(treeFragment->find("PP")!=std::string::npos)
sparseValues["NTPP"] += count;
if(treeFragment->find("SBAR")!=std::string::npos)
sparseValues["NTSBAR"] += count;
2013-09-12 20:46:40 +04:00
}
}