calculate baseline score without optimisation

This commit is contained in:
Hieu Hoang 2014-07-11 16:26:48 +01:00
parent 8f5dc2b9e7
commit a402523ef5
7 changed files with 250 additions and 6 deletions

View File

@ -1671,6 +1671,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/PP/Factory.h</locationURI>
</link>
<link>
<name>PP/NonTermContextProperty.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/PP/NonTermContextProperty.cpp</locationURI>
</link>
<link>
<name>PP/NonTermContextProperty.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/PP/NonTermContextProperty.h</locationURI>
</link>
<link>
<name>PP/PhraseProperty.cpp</name>
<type>1</type>

View File

@ -151,24 +151,26 @@ Phrase ChartHypothesis::GetOutputPhrase() const
void ChartHypothesis::GetOutputPhrase(int leftRightMost, int numWords, Phrase &outPhrase) const
{
int targetSize = GetCurrTargetPhrase().GetSize();
const TargetPhrase &tp = GetCurrTargetPhrase();
int targetSize = tp.GetSize();
for (int i = 0; i < targetSize; ++i) {
int pos;
if (leftRightMost == 1) {
pos = i;
}
else if (leftRightMost == 2) {
pos = targetSize - i;
pos = targetSize - i - 1;
}
else {
abort();
}
const Word &word = GetCurrTargetPhrase().GetWord(pos);
const Word &word = tp.GetWord(pos);
if (word.IsNonTerminal()) {
// non-term. fill out with prev hypo
size_t nonTermInd = GetCurrTargetPhrase().GetAlignNonTerm().GetNonTermIndexMap()[pos];
size_t nonTermInd = tp.GetAlignNonTerm().GetNonTermIndexMap()[pos];
const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
prevHypo->GetOutputPhrase(outPhrase);
} else {

View File

@ -128,6 +128,7 @@ public:
int featureID,
ScoreComponentCollection* accumulator) const
{
/*
std::vector<int> leftIds, rightIds;
Phrase leftPhrase, rightPhrase;
hypo.GetOutputPhrase(1, m_order, leftPhrase);
@ -147,6 +148,26 @@ public:
LBLLMState *state = new LBLLMState(leftIds, rightIds);
return state;
*/
// baseline non-optimized scoring
Phrase phrase;
hypo.GetOutputPhrase(phrase);
std::cerr << "phrase=" << phrase << std::endl;
std::vector<int> ids;
ids = mapper->convert(phrase);
LBLFeatures leftScores = scoreFullContexts(ids);
std::vector<float> scores(2);
scores[0] = leftScores.LMScore;
scores[1] = leftScores.OOVScore;
accumulator->Assign(this, scores);
LBLLMState *state = new LBLLMState();
return state;
}
void SetParameter(const std::string& key, const std::string& value)

View File

@ -8,6 +8,7 @@
#include "moses/PP/SourceLabelsPhraseProperty.h"
#include "moses/PP/TreeStructurePhraseProperty.h"
#include "moses/PP/SpanLengthPhraseProperty.h"
#include "moses/PP/NonTermContextProperty.h"
namespace Moses
{
@ -57,6 +58,7 @@ PhrasePropertyFactory::PhrasePropertyFactory()
MOSES_PNAME2("SourceLabels", SourceLabelsPhraseProperty);
MOSES_PNAME2("Tree",TreeStructurePhraseProperty);
MOSES_PNAME2("SpanLength", SpanLengthPhraseProperty);
MOSES_PNAME2("NonTermContext", NonTermContextProperty);
}
PhrasePropertyFactory::~PhrasePropertyFactory()

View File

@ -0,0 +1,137 @@
#include "moses/PP/NonTermContextProperty.h"
#include <string>
#include <assert.h>
#include "moses/Util.h"
#include "moses/FactorCollection.h"
using namespace std;
namespace Moses
{
NonTermContextProperty::NonTermContextProperty()
{
}
NonTermContextProperty::~NonTermContextProperty()
{
//RemoveAllInColl(m_probStores);
}
void NonTermContextProperty::ProcessValue(const std::string &value)
{
vector<string> toks;
Tokenize(toks, value);
FactorCollection &fc = FactorCollection::Instance();
size_t numNT = Scan<size_t>(toks[0]);
m_probStores.resize(numNT);
size_t ind = 1;
while (ind < toks.size()) {
vector<const Factor *> factors;
for (size_t nt = 0; nt < numNT; ++nt) {
size_t ntInd = Scan<size_t>(toks[ind]);
assert(nt == ntInd);
++ind;
for (size_t contextInd = 0; contextInd < 4; ++contextInd) {
//cerr << "toks[" << ind << "]=" << toks[ind] << endl;
const Factor *factor = fc.AddFactor(toks[ind], false);
factors.push_back(factor);
++ind;
}
}
// done with the context. Just get the count and put it all into data structures
// cerr << "count=" << toks[ind] << endl;
float count = Scan<float>(toks[ind]);
++ind;
for (size_t i = 0; i < factors.size(); ++i) {
size_t ntInd = i / 4;
size_t contextInd = i % 4;
const Factor *factor = factors[i];
AddToMap(ntInd, contextInd, factor, count);
}
}
}
void NonTermContextProperty::AddToMap(size_t ntIndex, size_t index, const Factor *factor, float count)
{
if (ntIndex <= m_probStores.size()) {
m_probStores.resize(ntIndex + 1);
}
ProbStore &probStore = m_probStores[ntIndex];
probStore.AddToMap(index, factor, count);
}
float NonTermContextProperty::GetProb(size_t ntInd,
size_t contextInd,
const Factor *factor,
float smoothConstant) const
{
UTIL_THROW_IF2(ntInd >= m_probStores.size(), "Invalid nt index=" << ntInd);
const ProbStore &probStore = m_probStores[ntInd];
float ret = probStore.GetProb(contextInd, factor, smoothConstant);
return ret;
}
//////////////////////////////////////////
void NonTermContextProperty::ProbStore::AddToMap(size_t index, const Factor *factor, float count)
{
Map &map = m_vec[index];
Map::iterator iter = map.find(factor);
if (iter == map.end()) {
map[factor] = count;
}
else {
float &currCount = iter->second;
currCount += count;
}
m_totalCount += count;
}
float NonTermContextProperty::ProbStore::GetProb(size_t contextInd,
const Factor *factor,
float smoothConstant) const
{
float count = GetCount(contextInd, factor, smoothConstant);
float total = GetTotalCount(contextInd, smoothConstant);
float ret = count / total;
return ret;
}
float NonTermContextProperty::ProbStore::GetCount(size_t contextInd,
const Factor *factor,
float smoothConstant) const
{
const Map &map = m_vec[contextInd];
float count = smoothConstant;
Map::const_iterator iter = map.find(factor);
if (iter == map.end()) {
// nothing
}
else {
count += iter->second;
}
return count;
}
float NonTermContextProperty::ProbStore::GetTotalCount(size_t contextInd, float smoothConstant) const
{
const Map &map = m_vec[contextInd];
return m_totalCount + smoothConstant * map.size();
}
} // namespace Moses

View File

@ -0,0 +1,73 @@
#pragma once
#include "moses/PP/PhraseProperty.h"
#include "util/exception.hh"
#include <string>
#include <list>
#include <map>
#include <vector>
namespace Moses
{
class Factor;
class NonTermContextProperty : public PhraseProperty
{
public:
NonTermContextProperty();
~NonTermContextProperty();
virtual void ProcessValue(const std::string &value);
virtual const std::string *GetValueString() const {
UTIL_THROW2("NonTermContextProperty: value string not available in this phrase property");
return NULL;
};
float GetProb(size_t ntInd,
size_t contextInd,
const Factor *factor,
float smoothConstant) const;
protected:
class ProbStore {
typedef std::map<const Factor*, float> Map; // map word -> prob
typedef std::vector<Map> Vec; // left outside, left inside, right inside, right outside
Vec m_vec;
float m_totalCount;
float GetCount(size_t contextInd,
const Factor *factor,
float smoothConstant) const;
float GetTotalCount(size_t contextInd, float smoothConstant) const;
public:
ProbStore()
:m_vec(4)
,m_totalCount(0)
{}
float GetProb(size_t contextInd,
const Factor *factor,
float smoothConstant) const;
float GetSize(size_t index) const
{ return m_vec[index].size(); }
void AddToMap(size_t index, const Factor *factor, float count);
};
// by nt index
std::vector<ProbStore> m_probStores;
void AddToMap(size_t ntIndex, size_t index, const Factor *factor, float count);
};
} // namespace Moses

View File

@ -139,8 +139,7 @@ CreateFromString(FactorDirection direction
<< " contains factor delimiter "
<< StaticData::Instance().GetFactorDelimiter()
<< " too many times.");
UTIL_THROW_IF(i < factorOrder.size(),util::Exception,
UTIL_THROW_IF(!isNonTerminal && i < factorOrder.size(),util::Exception,
"Too few factors in string '" << str << "'.");
}
else