Code design debizarrification: Indexes of feature functions into the dense vector of all feature

values are now stored on the feature function instead of in a global map that is a static
member of ScoreComponentCollection.
This commit is contained in:
Ulrich Germann 2015-04-26 16:46:36 +01:00
parent e63561ae7f
commit fbf8b1f8b8
8 changed files with 93 additions and 71 deletions

View File

@ -64,6 +64,7 @@ FeatureFunction(const std::string& line)
, m_requireSortingAfterSourceContext(false) , m_requireSortingAfterSourceContext(false)
, m_verbosity(std::numeric_limits<std::size_t>::max()) , m_verbosity(std::numeric_limits<std::size_t>::max())
, m_numScoreComponents(1) , m_numScoreComponents(1)
, m_index(0)
{ {
m_numTuneableComponents = m_numScoreComponents; m_numTuneableComponents = m_numScoreComponents;
Initialize(line); Initialize(line);
@ -76,6 +77,7 @@ FeatureFunction(size_t numScoreComponents,
, m_requireSortingAfterSourceContext(false) , m_requireSortingAfterSourceContext(false)
, m_verbosity(std::numeric_limits<std::size_t>::max()) , m_verbosity(std::numeric_limits<std::size_t>::max())
, m_numScoreComponents(numScoreComponents) , m_numScoreComponents(numScoreComponents)
, m_index(0)
{ {
m_numTuneableComponents = m_numScoreComponents; m_numTuneableComponents = m_numScoreComponents;
Initialize(line); Initialize(line);
@ -197,5 +199,21 @@ FeatureFunction
::CleanUpAfterSentenceProcessing(ttasksptr const& ttask) ::CleanUpAfterSentenceProcessing(ttasksptr const& ttask)
{ CleanUpAfterSentenceProcessing(*(ttask->GetSource().get())); } { CleanUpAfterSentenceProcessing(*(ttask->GetSource().get())); }
size_t
FeatureFunction
::GetIndex() const
{ return m_index; }
/// set index
// @return index of the next FF
size_t
FeatureFunction
::SetIndex(size_t const idx)
{
m_index = idx;
return this->GetNumScoreComponents() + idx;
}
} }

View File

@ -43,6 +43,7 @@ protected:
bool m_requireSortingAfterSourceContext; bool m_requireSortingAfterSourceContext;
size_t m_verbosity; size_t m_verbosity;
size_t m_numScoreComponents; size_t m_numScoreComponents;
size_t m_index; // index into vector covering ALL feature function values
std::vector<bool> m_tuneableComponents; std::vector<bool> m_tuneableComponents;
size_t m_numTuneableComponents; size_t m_numTuneableComponents;
//In case there's multiple producers with the same description //In case there's multiple producers with the same description
@ -114,6 +115,8 @@ public:
virtual std::vector<float> DefaultWeights() const; virtual std::vector<float> DefaultWeights() const;
size_t GetIndex() const;
size_t SetIndex(size_t const idx);
protected: protected:
virtual void virtual void

View File

@ -221,7 +221,7 @@ void PhraseOrientationFeature::LookaheadScore(const OrientationPhraseProperty *o
ScoreComponentCollection &scoreBreakdown, ScoreComponentCollection &scoreBreakdown,
bool subtract) const bool subtract) const
{ {
size_t ffScoreIndex = scoreBreakdown.GetIndexes(this).first; size_t ffScoreIndex = m_index;
std::vector<float> scoresL2R; std::vector<float> scoresL2R;
scoresL2R.push_back( TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono()) ); scoresL2R.push_back( TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono()) );

View File

@ -23,8 +23,6 @@ void UnalignedWordCountFeature::EvaluateInIsolation(const Phrase &source
, ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const , ScoreComponentCollection &estimatedFutureScore) const
{ {
const size_t ffScoreIndex(scoreBreakdown.GetIndexes(this).first);
const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignTerm(); const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignTerm();
const size_t sourceLength = source.GetSize(); const size_t sourceLength = source.GetSize();
const size_t targetLength = targetPhrase.GetSize(); const size_t targetLength = targetPhrase.GetSize();
@ -57,8 +55,8 @@ void UnalignedWordCountFeature::EvaluateInIsolation(const Phrase &source
} }
} }
scoreBreakdown.PlusEquals(ffScoreIndex, sourceUnalignedCount); scoreBreakdown.PlusEquals(m_index, sourceUnalignedCount);
scoreBreakdown.PlusEquals(ffScoreIndex+1, targetUnalignedCount); scoreBreakdown.PlusEquals(m_index+1, targetUnalignedCount);
IFFEATUREVERBOSE(2) { IFFEATUREVERBOSE(2) {
FEATUREVERBOSE(2, source << std::endl); FEATUREVERBOSE(2, source << std::endl);

View File

@ -790,7 +790,7 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
prev_approx_label -= prev->GetApproximateScoreLabel(); prev_approx_label -= prev->GetApproximateScoreLabel();
} }
} }
size_t ff_idx = accumulator->GetIndexes(this).first; size_t ff_idx = m_index; // accumulator->GetIndexes(this).first;
accumulator->PlusEquals(ff_idx, prev_approx_head); accumulator->PlusEquals(ff_idx, prev_approx_head);
accumulator->PlusEquals(ff_idx+1, prev_approx_label); accumulator->PlusEquals(ff_idx+1, prev_approx_label);

View File

@ -1,6 +1,8 @@
// $Id$ // $Id$
#include <vector> #include <vector>
#include <boost/algorithm/string/predicate.hpp> #include <boost/algorithm/string/predicate.hpp>
#include <boost/foreach.hpp>
#include <boost/format.hpp>
#include "util/exception.hh" #include "util/exception.hh"
#include "ScoreComponentCollection.h" #include "ScoreComponentCollection.h"
#include "StaticData.h" #include "StaticData.h"
@ -47,7 +49,7 @@ std::ostream& operator<<(std::ostream& os, const ScorePair& rhs)
return os; return os;
} }
ScoreComponentCollection::ScoreIndexMap ScoreComponentCollection::s_scoreIndexes; //ScoreComponentCollection::ScoreIndexMap ScoreComponentCollection::s_scoreIndexes;
size_t ScoreComponentCollection::s_denseVectorSize = 0; size_t ScoreComponentCollection::s_denseVectorSize = 0;
ScoreComponentCollection:: ScoreComponentCollection::
@ -58,15 +60,14 @@ ScoreComponentCollection()
void void
ScoreComponentCollection:: ScoreComponentCollection::
RegisterScoreProducer(const FeatureFunction* scoreProducer) RegisterScoreProducer(FeatureFunction* scoreProducer)
{ {
size_t start = s_denseVectorSize; size_t start = s_denseVectorSize;
size_t end = start + scoreProducer->GetNumScoreComponents(); s_denseVectorSize = scoreProducer->SetIndex(s_denseVectorSize);
VERBOSE(1, "FeatureFunction: " VERBOSE(1, "FeatureFunction: "
<< scoreProducer->GetScoreProducerDescription() << scoreProducer->GetScoreProducerDescription()
<< " start: " << start << " end: " << (end-1) << endl); << " start: " << start
s_scoreIndexes[scoreProducer] = pair<size_t,size_t>(start,end); << " end: " << (s_denseVectorSize-1) << endl);
s_denseVectorSize = end;
} }
@ -191,21 +192,23 @@ void ScoreComponentCollection::Save(ostream& out, bool multiline) const
sep = "="; sep = "=";
linesep = " "; linesep = " ";
} }
ScoreIndexMap::const_iterator iter = s_scoreIndexes.begin();
for (; iter != s_scoreIndexes.end(); ++iter ) {
string name = iter->first->GetScoreProducerDescription();
IndexPair ip = iter->second; // feature indices
if (ip.second-ip.first == 1) {
out << name << sep << m_scores[ip.first] << linesep;
} else {
for (size_t i=ip.first; i < ip.second; ++i) {
ostringstream fullname;
fullname << name << "_" << (i + 1 - ip.first);
out << fullname.str() << sep << m_scores[i] << linesep;
}
}
}
std::vector<FeatureFunction*> const& all_ff
= FeatureFunction::GetFeatureFunctions();
BOOST_FOREACH(FeatureFunction const* ff, all_ff)
{
string name = ff->GetScoreProducerDescription();
size_t i = ff->GetIndex();
if (ff->GetNumScoreComponents() == 1)
out << name << sep << m_scores[i] << linesep;
else
{
size_t stop = i + ff->GetNumScoreComponents();
boost::format fmt("%s_%d");
for (size_t k = 1; i < stop; ++i, ++k)
out << fmt % name % k << sep << m_scores[i] << linesep;
}
}
// write sparse features // write sparse features
m_scores.write(out,sep,linesep); m_scores.write(out,sep,linesep);
} }
@ -242,8 +245,8 @@ void
ScoreComponentCollection:: ScoreComponentCollection::
Assign(const FeatureFunction* sp, const std::vector<float>& scores) Assign(const FeatureFunction* sp, const std::vector<float>& scores)
{ {
IndexPair indexes = GetIndexes(sp); size_t numScores = sp->GetNumScoreComponents();
size_t numScores = indexes.second - indexes.first; size_t offset = sp->GetIndex();
if (scores.size() != numScores) { if (scores.size() != numScores) {
UTIL_THROW(util::Exception, "Feature function " UTIL_THROW(util::Exception, "Feature function "
@ -253,7 +256,7 @@ Assign(const FeatureFunction* sp, const std::vector<float>& scores)
} }
for (size_t i = 0; i < scores.size(); ++i) { for (size_t i = 0; i < scores.size(); ++i) {
m_scores[i + indexes.first] = scores[i]; m_scores[i + offset] = scores[i];
} }
} }

View File

@ -95,24 +95,24 @@ private:
FVector m_scores; FVector m_scores;
public: public:
typedef std::pair<size_t,size_t> IndexPair; // typedef std::pair<size_t,size_t> IndexPair;
private: private:
typedef std::map<const FeatureFunction*,IndexPair> ScoreIndexMap; // typedef std::map<const FeatureFunction*,IndexPair> ScoreIndexMap;
static ScoreIndexMap s_scoreIndexes; // static ScoreIndexMap s_scoreIndexes;
static size_t s_denseVectorSize; static size_t s_denseVectorSize;
public: public:
static IndexPair GetIndexes(const FeatureFunction* sp) { // static IndexPair GetIndexes(const FeatureFunction* sp) {
ScoreIndexMap::const_iterator indexIter = s_scoreIndexes.find(sp); // ScoreIndexMap::const_iterator indexIter = s_scoreIndexes.find(sp);
if (indexIter == s_scoreIndexes.end()) { // if (indexIter == s_scoreIndexes.end()) {
std::stringstream strme; // std::stringstream strme;
strme << "ERROR: FeatureFunction: " << sp->GetScoreProducerDescription() << // strme << "ERROR: FeatureFunction: " << sp->GetScoreProducerDescription() <<
" not registered with ScoreIndexMap" << std::endl; // " not registered with ScoreIndexMap" << std::endl;
strme << "You must call ScoreComponentCollection.RegisterScoreProducer() " << // strme << "You must call ScoreComponentCollection.RegisterScoreProducer() " <<
" for every FeatureFunction" << std::endl; // " for every FeatureFunction" << std::endl;
UTIL_THROW2(strme.str()); // UTIL_THROW2(strme.str());
} // }
return indexIter->second; // return indexIter->second;
} // }
public: public:
static void ResetCounter() { static void ResetCounter() {
@ -136,7 +136,7 @@ public:
* Register a ScoreProducer with a fixed number of scores, so that it can * Register a ScoreProducer with a fixed number of scores, so that it can
* be allocated space in the dense part of the feature vector. * be allocated space in the dense part of the feature vector.
**/ **/
static void RegisterScoreProducer(const FeatureFunction* scoreProducer); static void RegisterScoreProducer(FeatureFunction* scoreProducer);
/** Load from file */ /** Load from file */
bool Load(const std::string& filename) { bool Load(const std::string& filename) {
@ -229,22 +229,23 @@ public:
//! Add scores from a single ScoreProducer only //! Add scores from a single ScoreProducer only
//! The length of scores must be equal to the number of score components //! The length of scores must be equal to the number of score components
//! produced by sp //! produced by sp
void PlusEquals(const FeatureFunction* sp, const ScoreComponentCollection& scores) { void
IndexPair indexes = GetIndexes(sp); PlusEquals(const FeatureFunction* sp,
for (size_t i = indexes.first; i < indexes.second; ++i) { const ScoreComponentCollection& scores) {
m_scores[i] += scores.m_scores[i]; size_t i = sp->GetIndex();
} size_t stop = i + sp->GetNumScoreComponents();
for (;i < stop; ++i) m_scores[i] += scores.m_scores[i];
} }
//! Add scores from a single FeatureFunction only //! Add scores from a single FeatureFunction only
//! The length of scores must be equal to the number of score components //! The length of scores must be equal to the number of score components
//! produced by sp //! produced by sp
void PlusEquals(const FeatureFunction* sp, const std::vector<float>& scores) { void PlusEquals(const FeatureFunction* sp, const std::vector<float>& scores) {
IndexPair indexes = GetIndexes(sp); UTIL_THROW_IF2(scores.size() != sp->GetNumScoreComponents(),
UTIL_THROW_IF2(scores.size() != indexes.second - indexes.first,
"Number of scores is incorrect"); "Number of scores is incorrect");
size_t offset = sp->GetIndex();
for (size_t i = 0; i < scores.size(); ++i) { for (size_t i = 0; i < scores.size(); ++i) {
m_scores[i + indexes.first] += scores[i]; m_scores[i + offset] += scores[i];
} }
} }
@ -252,10 +253,9 @@ public:
//! to add the score from a single ScoreProducer that produces //! to add the score from a single ScoreProducer that produces
//! a single value //! a single value
void PlusEquals(const FeatureFunction* sp, float score) { void PlusEquals(const FeatureFunction* sp, float score) {
IndexPair indexes = GetIndexes(sp); UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
UTIL_THROW_IF2(1 != indexes.second - indexes.first,
"Number of scores is incorrect"); "Number of scores is incorrect");
m_scores[indexes.first] += score; m_scores[sp->GetIndex()] += score;
} }
//For features which have an unbounded number of components //For features which have an unbounded number of components
@ -287,10 +287,10 @@ public:
//! to add the score from a single ScoreProducer that produces //! to add the score from a single ScoreProducer that produces
//! a single value //! a single value
void Assign(const FeatureFunction* sp, float score) { void Assign(const FeatureFunction* sp, float score) {
IndexPair indexes = GetIndexes(sp);
UTIL_THROW_IF2(1 != indexes.second - indexes.first, UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
"Feature function must must only contain 1 score"); "Feature function must must only contain 1 score");
m_scores[indexes.first] = score; m_scores[sp->GetIndex()] = score;
} }
// Assign score by index // Assign score by index
@ -329,9 +329,9 @@ public:
size_t components = sp->GetNumScoreComponents(); size_t components = sp->GetNumScoreComponents();
std::vector<float> res(components); std::vector<float> res(components);
IndexPair indexes = GetIndexes(sp); size_t offset = sp->GetIndex();
for (size_t i = 0; i < res.size(); ++i) { for (size_t i = 0; i < res.size(); ++i) {
res[i] = m_scores[i + indexes.first]; res[i] = m_scores[i + offset];
} }
return res; return res;
} }
@ -364,18 +364,17 @@ public:
m_scores.capMin(minValue); m_scores.capMin(minValue);
} }
std::pair<size_t,size_t> GetIndexesForProducer(const FeatureFunction* sp) const { // std::pair<size_t,size_t> GetIndexesForProducer(const FeatureFunction* sp) const {
IndexPair indexPair = GetIndexes(sp); // IndexPair indexPair = GetIndexes(sp);
return indexPair; // return indexPair;
} // }
//! if a FeatureFunction produces a single score (for example, a language model score) //! if a FeatureFunction produces a single score (for example, a language model score)
//! this will return it. If not, this method will throw //! this will return it. If not, this method will throw
float GetScoreForProducer(const FeatureFunction* sp) const { float GetScoreForProducer(const FeatureFunction* sp) const {
IndexPair indexes = GetIndexes(sp); UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
UTIL_THROW_IF2(indexes.second - indexes.first != 1,
"Feature function must must only contain 1 score"); "Feature function must must only contain 1 score");
return m_scores[indexes.first]; return m_scores[sp->GetIndex()];
} }
//For features which have an unbounded number of components //For features which have an unbounded number of components

View File

@ -101,12 +101,13 @@ int main(int argc, char* argv[])
Phrase const& phr = static_cast<Phrase const&>(*(*trg)[i]); Phrase const& phr = static_cast<Phrase const&>(*(*trg)[i]);
cout << setw(3) << ++k << " " << phr << endl; cout << setw(3) << ++k << " " << phr << endl;
ScoreComponentCollection const& scc = (*trg)[i]->GetScoreBreakdown(); ScoreComponentCollection const& scc = (*trg)[i]->GetScoreBreakdown();
ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT); size_t start = PT->GetIndex();
size_t stop = start + PT->GetNumScoreComponents();
FVector const& scores = scc.GetScoresVector(); FVector const& scores = scc.GetScoresVector();
cout << " "; cout << " ";
for (size_t k = idx.first; k < idx.second; ++k) for (size_t k = start; k < stop; ++k)
{ {
size_t j = k-idx.first; size_t j = k-start;
float f = (mmsapt ? mmsapt->isLogVal(j) ? exp(scores[k]) : scores[k] float f = (mmsapt ? mmsapt->isLogVal(j) ? exp(scores[k]) : scores[k]
: scores[k] < 0 ? exp(scores[k]) : scores[k]); : scores[k] < 0 ? exp(scores[k]) : scores[k]);
string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f"; string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f";