constants is PSD instead of struct FeatureTypes

This commit is contained in:
Ales Tamchyna 2012-07-12 16:03:38 -04:00
parent c3c702ec20
commit ef050fec85
3 changed files with 20 additions and 38 deletions

View File

@ -22,6 +22,7 @@ using namespace std;
using namespace Moses;
using namespace MosesTraining;
using namespace boost::bimaps;
using namespace PSD;
#define LINE_MAX_LENGTH 10000
@ -115,24 +116,12 @@ int main(int argc,char* argv[]){
int i = 0;
int csid = 0;
// configure features
FeatureTypes ft;
ft.m_sourceExternal = true;
ft.m_sourceInternal = true;
ft.m_targetInternal = true;
ft.m_paired = false;
ft.m_bagOfWords = false;
ft.m_contextWindow = 2;
ft.m_factors.push_back(0);
ft.m_factors.push_back(1);
ft.m_factors.push_back(2);
// create target phrase index for feature extractor
TargetIndexType extractorTargetIndex;
for (size_t i = 0; i < tgtPhraseVoc.phraseTable.size(); i++) {
extractorTargetIndex.insert(TargetIndexType::value_type(getPhrase(i, tgtVocab, tgtPhraseVoc), i));
}
FeatureExtractor extractor(ft, extractorTargetIndex, true);
FeatureExtractor extractor(extractorTargetIndex, true);
// prep feature consumers for PHRASAL setting
map<PHRASE_ID, FeatureConsumer*> consumers;

View File

@ -11,7 +11,7 @@ namespace PSD
FeatureExtractor::FeatureExtractor(FeatureTypes ft,
const TargetIndexType &targetIndex,
bool train)
: m_ft(ft), m_targetIndex(targetIndex), m_train(train)
: m_targetIndex(targetIndex), m_train(train)
{
}
@ -23,11 +23,11 @@ void FeatureExtractor::GenerateFeatures(FeatureConsumer *fc,
vector<float> &losses)
{
fc->SetNamespace('s', true);
if (m_ft.m_sourceExternal) {
if (PSD_SOURCE_EXTERNAL) {
GenerateContextFeatures(context, spanStart, spanEnd, fc);
}
if (m_ft.m_sourceInternal) {
if (PSD_SOURCE_INTERNAL) {
vector<string> sourceForms(spanEnd - spanStart + 1);
for (size_t i = spanStart; i <= spanEnd; i++) {
sourceForms[i] = context[i][0]; // XXX assumes that form is the 0th factor
@ -40,7 +40,7 @@ void FeatureExtractor::GenerateFeatures(FeatureConsumer *fc,
for (; transIt != translations.end(); transIt++, lossIt++) {
assert(lossIt != losses.end());
fc->SetNamespace('t', false);
if (m_ft.m_targetInternal) {
if (PSD_TARGET_INTERNAL) {
GenerateInternalFeatures(Tokenize(" ", m_targetIndex.right.find(*transIt)->second), fc);
}
@ -66,13 +66,12 @@ void FeatureExtractor::GenerateContextFeatures(const ContextType &context,
size_t spanEnd,
FeatureConsumer *fc)
{
vector<size_t>::const_iterator factorIt;
for (factorIt = m_ft.m_factors.begin(); factorIt != m_ft.m_factors.end(); factorIt++) {
for (size_t i = 1; i <= m_ft.m_contextWindow; i++) {
for (size_t fact = 0; fact <= PSD_FACTOR_COUNT; fact++) {
for (size_t i = 1; i <= PSD_CONTEXT_WINDOW; i++) {
if (spanStart >= i)
fc->AddFeature(BuildContextFeature(*factorIt, i, context[spanStart - i][*factorIt]));
fc->AddFeature(BuildContextFeature(fact, i, context[spanStart - i][fact]));
if (spanEnd + i < context.size())
fc->AddFeature(BuildContextFeature(*factorIt, i, context[spanStart - i][*factorIt]));
fc->AddFeature(BuildContextFeature(fact, i, context[spanStart - i][fact]));
}
}
}

View File

@ -17,28 +17,23 @@ typedef std::vector<std::vector<std::string> > ContextType;
// index of possible target spans
typedef boost::bimaps::bimap<std::string, size_t> TargetIndexType;
// configuration of feature extractor
struct FeatureTypes
{
bool m_sourceExternal; // generate context features
bool m_sourceInternal; // generate source-side phrase-internal features
bool m_targetInternal; // generate target-side phrase-internal features
bool m_paired; // generate paired features
bool m_bagOfWords; // generate bag-of-words features
// configuration of feature extraction, shared, global
const bool PSD_SOURCE_EXTERNAL = true; // generate context features
const bool PSD_SOURCE_INTERNAL = true; // generate source-side phrase-internal features
const bool PSD_TARGET_INTERNAL = true; // generate target-side phrase-internal features
const bool PSD_PAIRED = false; // generate paired features
const bool PSD_BAG_OF_wORDS = false; // generate bag-of-words features
size_t m_contextWindow; // window size for context features
const size_t PSD_CONTEXT_WINDOW = 2; // window size for context features
// list of factors that should be extracted from context (e.g. 0,1,2)
std::vector<size_t> m_factors;
};
const size_t[] PSD_FACTORS = { 0, 1, 2 };
const size_t PSD_FACTOR_COUNT = 3;
// extract features
class FeatureExtractor
{
public:
FeatureExtractor(FeatureTypes ft,
const TargetIndexType &targetIndex,
bool train);
FeatureExtractor(const TargetIndexType &targetIndex, bool train);
void GenerateFeatures(FeatureConsumer *fc,
const ContextType &context,
@ -48,7 +43,6 @@ public:
std::vector<float> &losses);
private:
FeatureTypes m_ft;
const TargetIndexType &m_targetIndex;
bool m_train;