2013-09-12 20:46:40 +04:00
|
|
|
#include "InternalStructFeature.h"
|
2014-01-29 22:37:42 +04:00
|
|
|
#include <map>
|
2013-09-12 20:46:40 +04:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
namespace MosesTraining
|
|
|
|
{
|
|
|
|
|
|
|
|
void InternalStructFeature::add(const ScoreFeatureContext& context,
|
2014-01-29 22:37:42 +04:00
|
|
|
std::vector<float>& denseValues,
|
|
|
|
std::map<std::string,float>& sparseValues) const {
|
|
|
|
const std::map<std::string,float> *allTrees = context.phrasePair.GetProperty("Tree"); // our would we rather want to take the most frequent one only?
|
|
|
|
for ( std::map<std::string,float>::const_iterator iter=allTrees->begin();
|
|
|
|
iter!=allTrees->end(); ++iter ) {
|
|
|
|
add(&(iter->first), iter->second, denseValues, sparseValues);
|
|
|
|
}
|
2013-09-12 20:46:40 +04:00
|
|
|
}
|
|
|
|
|
2014-01-29 22:37:42 +04:00
|
|
|
void InternalStructFeatureDense::add(const std::string *treeFragment,
|
|
|
|
float count,
|
|
|
|
std::vector<float>& denseValues,
|
|
|
|
std::map<std::string,float>& sparseValues) const {
|
2013-09-13 14:45:46 +04:00
|
|
|
//cout<<"Dense: "<<*internalStruct<<endl;
|
|
|
|
size_t start=0;
|
|
|
|
int countNP=0;
|
2014-01-29 22:37:42 +04:00
|
|
|
while((start = treeFragment->find("NP", start)) != string::npos) {
|
|
|
|
countNP += count;
|
2013-09-13 14:45:46 +04:00
|
|
|
start+=2; //length of "NP"
|
|
|
|
}
|
|
|
|
//should add e^countNP so in the decoder I get log(e^countNP)=countNP -> but is log or ln?
|
|
|
|
//should use this but don't know what it does? -> maybeLog( (bitmap == i) ? 2.718 : 1 )
|
|
|
|
denseValues.push_back(exp(countNP));
|
2013-09-12 20:46:40 +04:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2014-01-29 22:37:42 +04:00
|
|
|
void InternalStructFeatureSparse::add(const std::string *treeFragment,
|
|
|
|
float count,
|
|
|
|
std::vector<float>& denseValues,
|
|
|
|
std::map<std::string,float>& sparseValues) const {
|
|
|
|
//cout<<"Sparse: "<<*internalStruct<<endl;
|
|
|
|
if(treeFragment->find("VBZ")!=std::string::npos)
|
|
|
|
sparseValues["NTVBZ"] += count;
|
|
|
|
if(treeFragment->find("VBD")!=std::string::npos)
|
|
|
|
sparseValues["NTVBD"] += count;
|
|
|
|
if(treeFragment->find("VBP")!=std::string::npos)
|
|
|
|
sparseValues["NTVBP"] += count;
|
|
|
|
if(treeFragment->find("PP")!=std::string::npos)
|
|
|
|
sparseValues["NTPP"] += count;
|
|
|
|
if(treeFragment->find("SBAR")!=std::string::npos)
|
|
|
|
sparseValues["NTSBAR"] += count;
|
2013-09-12 20:46:40 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|