Code design debizarrification: Indexes of feature functions into the dense vector of all feature

values are now stored on the feature function instead of in a global map that is a static
member of ScoreComponentCollection.
This commit is contained in:
Ulrich Germann 2015-04-26 16:46:36 +01:00
parent e63561ae7f
commit fbf8b1f8b8
8 changed files with 93 additions and 71 deletions

View File

@ -64,6 +64,7 @@ FeatureFunction(const std::string& line)
, m_requireSortingAfterSourceContext(false)
, m_verbosity(std::numeric_limits<std::size_t>::max())
, m_numScoreComponents(1)
, m_index(0)
{
m_numTuneableComponents = m_numScoreComponents;
Initialize(line);
@ -76,6 +77,7 @@ FeatureFunction(size_t numScoreComponents,
, m_requireSortingAfterSourceContext(false)
, m_verbosity(std::numeric_limits<std::size_t>::max())
, m_numScoreComponents(numScoreComponents)
, m_index(0)
{
m_numTuneableComponents = m_numScoreComponents;
Initialize(line);
@ -197,5 +199,21 @@ FeatureFunction
::CleanUpAfterSentenceProcessing(ttasksptr const& ttask)
{ CleanUpAfterSentenceProcessing(*(ttask->GetSource().get())); }
size_t
FeatureFunction
::GetIndex() const
{ return m_index; }
/// set index
// @return index of the next FF
size_t
FeatureFunction
::SetIndex(size_t const idx)
{
m_index = idx;
return this->GetNumScoreComponents() + idx;
}
}

View File

@ -43,6 +43,7 @@ protected:
bool m_requireSortingAfterSourceContext;
size_t m_verbosity;
size_t m_numScoreComponents;
size_t m_index; // index into vector covering ALL feature function values
std::vector<bool> m_tuneableComponents;
size_t m_numTuneableComponents;
//In case there's multiple producers with the same description
@ -114,6 +115,8 @@ public:
virtual std::vector<float> DefaultWeights() const;
size_t GetIndex() const;
size_t SetIndex(size_t const idx);
protected:
virtual void

View File

@ -221,7 +221,7 @@ void PhraseOrientationFeature::LookaheadScore(const OrientationPhraseProperty *o
ScoreComponentCollection &scoreBreakdown,
bool subtract) const
{
size_t ffScoreIndex = scoreBreakdown.GetIndexes(this).first;
size_t ffScoreIndex = m_index;
std::vector<float> scoresL2R;
scoresL2R.push_back( TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono()) );

View File

@ -23,8 +23,6 @@ void UnalignedWordCountFeature::EvaluateInIsolation(const Phrase &source
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
const size_t ffScoreIndex(scoreBreakdown.GetIndexes(this).first);
const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignTerm();
const size_t sourceLength = source.GetSize();
const size_t targetLength = targetPhrase.GetSize();
@ -57,8 +55,8 @@ void UnalignedWordCountFeature::EvaluateInIsolation(const Phrase &source
}
}
scoreBreakdown.PlusEquals(ffScoreIndex, sourceUnalignedCount);
scoreBreakdown.PlusEquals(ffScoreIndex+1, targetUnalignedCount);
scoreBreakdown.PlusEquals(m_index, sourceUnalignedCount);
scoreBreakdown.PlusEquals(m_index+1, targetUnalignedCount);
IFFEATUREVERBOSE(2) {
FEATUREVERBOSE(2, source << std::endl);

View File

@ -790,7 +790,7 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
prev_approx_label -= prev->GetApproximateScoreLabel();
}
}
size_t ff_idx = accumulator->GetIndexes(this).first;
size_t ff_idx = m_index; // accumulator->GetIndexes(this).first;
accumulator->PlusEquals(ff_idx, prev_approx_head);
accumulator->PlusEquals(ff_idx+1, prev_approx_label);

View File

@ -1,6 +1,8 @@
// $Id$
#include <vector>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/foreach.hpp>
#include <boost/format.hpp>
#include "util/exception.hh"
#include "ScoreComponentCollection.h"
#include "StaticData.h"
@ -47,7 +49,7 @@ std::ostream& operator<<(std::ostream& os, const ScorePair& rhs)
return os;
}
ScoreComponentCollection::ScoreIndexMap ScoreComponentCollection::s_scoreIndexes;
//ScoreComponentCollection::ScoreIndexMap ScoreComponentCollection::s_scoreIndexes;
size_t ScoreComponentCollection::s_denseVectorSize = 0;
ScoreComponentCollection::
@ -58,15 +60,14 @@ ScoreComponentCollection()
void
ScoreComponentCollection::
RegisterScoreProducer(const FeatureFunction* scoreProducer)
RegisterScoreProducer(FeatureFunction* scoreProducer)
{
size_t start = s_denseVectorSize;
size_t end = start + scoreProducer->GetNumScoreComponents();
s_denseVectorSize = scoreProducer->SetIndex(s_denseVectorSize);
VERBOSE(1, "FeatureFunction: "
<< scoreProducer->GetScoreProducerDescription()
<< " start: " << start << " end: " << (end-1) << endl);
s_scoreIndexes[scoreProducer] = pair<size_t,size_t>(start,end);
s_denseVectorSize = end;
<< " start: " << start
<< " end: " << (s_denseVectorSize-1) << endl);
}
@ -191,21 +192,23 @@ void ScoreComponentCollection::Save(ostream& out, bool multiline) const
sep = "=";
linesep = " ";
}
ScoreIndexMap::const_iterator iter = s_scoreIndexes.begin();
for (; iter != s_scoreIndexes.end(); ++iter ) {
string name = iter->first->GetScoreProducerDescription();
IndexPair ip = iter->second; // feature indices
if (ip.second-ip.first == 1) {
out << name << sep << m_scores[ip.first] << linesep;
} else {
for (size_t i=ip.first; i < ip.second; ++i) {
ostringstream fullname;
fullname << name << "_" << (i + 1 - ip.first);
out << fullname.str() << sep << m_scores[i] << linesep;
}
}
}
std::vector<FeatureFunction*> const& all_ff
= FeatureFunction::GetFeatureFunctions();
BOOST_FOREACH(FeatureFunction const* ff, all_ff)
{
string name = ff->GetScoreProducerDescription();
size_t i = ff->GetIndex();
if (ff->GetNumScoreComponents() == 1)
out << name << sep << m_scores[i] << linesep;
else
{
size_t stop = i + ff->GetNumScoreComponents();
boost::format fmt("%s_%d");
for (size_t k = 1; i < stop; ++i, ++k)
out << fmt % name % k << sep << m_scores[i] << linesep;
}
}
// write sparse features
m_scores.write(out,sep,linesep);
}
@ -242,8 +245,8 @@ void
ScoreComponentCollection::
Assign(const FeatureFunction* sp, const std::vector<float>& scores)
{
IndexPair indexes = GetIndexes(sp);
size_t numScores = indexes.second - indexes.first;
size_t numScores = sp->GetNumScoreComponents();
size_t offset = sp->GetIndex();
if (scores.size() != numScores) {
UTIL_THROW(util::Exception, "Feature function "
@ -253,7 +256,7 @@ Assign(const FeatureFunction* sp, const std::vector<float>& scores)
}
for (size_t i = 0; i < scores.size(); ++i) {
m_scores[i + indexes.first] = scores[i];
m_scores[i + offset] = scores[i];
}
}

View File

@ -95,24 +95,24 @@ private:
FVector m_scores;
public:
typedef std::pair<size_t,size_t> IndexPair;
// typedef std::pair<size_t,size_t> IndexPair;
private:
typedef std::map<const FeatureFunction*,IndexPair> ScoreIndexMap;
static ScoreIndexMap s_scoreIndexes;
// typedef std::map<const FeatureFunction*,IndexPair> ScoreIndexMap;
// static ScoreIndexMap s_scoreIndexes;
static size_t s_denseVectorSize;
public:
static IndexPair GetIndexes(const FeatureFunction* sp) {
ScoreIndexMap::const_iterator indexIter = s_scoreIndexes.find(sp);
if (indexIter == s_scoreIndexes.end()) {
std::stringstream strme;
strme << "ERROR: FeatureFunction: " << sp->GetScoreProducerDescription() <<
" not registered with ScoreIndexMap" << std::endl;
strme << "You must call ScoreComponentCollection.RegisterScoreProducer() " <<
" for every FeatureFunction" << std::endl;
UTIL_THROW2(strme.str());
}
return indexIter->second;
}
// static IndexPair GetIndexes(const FeatureFunction* sp) {
// ScoreIndexMap::const_iterator indexIter = s_scoreIndexes.find(sp);
// if (indexIter == s_scoreIndexes.end()) {
// std::stringstream strme;
// strme << "ERROR: FeatureFunction: " << sp->GetScoreProducerDescription() <<
// " not registered with ScoreIndexMap" << std::endl;
// strme << "You must call ScoreComponentCollection.RegisterScoreProducer() " <<
// " for every FeatureFunction" << std::endl;
// UTIL_THROW2(strme.str());
// }
// return indexIter->second;
// }
public:
static void ResetCounter() {
@ -136,7 +136,7 @@ public:
* Register a ScoreProducer with a fixed number of scores, so that it can
* be allocated space in the dense part of the feature vector.
**/
static void RegisterScoreProducer(const FeatureFunction* scoreProducer);
static void RegisterScoreProducer(FeatureFunction* scoreProducer);
/** Load from file */
bool Load(const std::string& filename) {
@ -229,22 +229,23 @@ public:
//! Add scores from a single ScoreProducer only
//! The length of scores must be equal to the number of score components
//! produced by sp
void PlusEquals(const FeatureFunction* sp, const ScoreComponentCollection& scores) {
IndexPair indexes = GetIndexes(sp);
for (size_t i = indexes.first; i < indexes.second; ++i) {
m_scores[i] += scores.m_scores[i];
}
void
PlusEquals(const FeatureFunction* sp,
const ScoreComponentCollection& scores) {
size_t i = sp->GetIndex();
size_t stop = i + sp->GetNumScoreComponents();
for (;i < stop; ++i) m_scores[i] += scores.m_scores[i];
}
//! Add scores from a single FeatureFunction only
//! The length of scores must be equal to the number of score components
//! produced by sp
void PlusEquals(const FeatureFunction* sp, const std::vector<float>& scores) {
IndexPair indexes = GetIndexes(sp);
UTIL_THROW_IF2(scores.size() != indexes.second - indexes.first,
UTIL_THROW_IF2(scores.size() != sp->GetNumScoreComponents(),
"Number of scores is incorrect");
size_t offset = sp->GetIndex();
for (size_t i = 0; i < scores.size(); ++i) {
m_scores[i + indexes.first] += scores[i];
m_scores[i + offset] += scores[i];
}
}
@ -252,10 +253,9 @@ public:
//! to add the score from a single ScoreProducer that produces
//! a single value
void PlusEquals(const FeatureFunction* sp, float score) {
IndexPair indexes = GetIndexes(sp);
UTIL_THROW_IF2(1 != indexes.second - indexes.first,
UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
"Number of scores is incorrect");
m_scores[indexes.first] += score;
m_scores[sp->GetIndex()] += score;
}
//For features which have an unbounded number of components
@ -287,10 +287,10 @@ public:
//! to add the score from a single ScoreProducer that produces
//! a single value
void Assign(const FeatureFunction* sp, float score) {
IndexPair indexes = GetIndexes(sp);
UTIL_THROW_IF2(1 != indexes.second - indexes.first,
UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
"Feature function must must only contain 1 score");
m_scores[indexes.first] = score;
m_scores[sp->GetIndex()] = score;
}
// Assign score by index
@ -329,9 +329,9 @@ public:
size_t components = sp->GetNumScoreComponents();
std::vector<float> res(components);
IndexPair indexes = GetIndexes(sp);
size_t offset = sp->GetIndex();
for (size_t i = 0; i < res.size(); ++i) {
res[i] = m_scores[i + indexes.first];
res[i] = m_scores[i + offset];
}
return res;
}
@ -364,18 +364,17 @@ public:
m_scores.capMin(minValue);
}
std::pair<size_t,size_t> GetIndexesForProducer(const FeatureFunction* sp) const {
IndexPair indexPair = GetIndexes(sp);
return indexPair;
}
// std::pair<size_t,size_t> GetIndexesForProducer(const FeatureFunction* sp) const {
// IndexPair indexPair = GetIndexes(sp);
// return indexPair;
// }
//! if a FeatureFunction produces a single score (for example, a language model score)
//! this will return it. If not, this method will throw
float GetScoreForProducer(const FeatureFunction* sp) const {
IndexPair indexes = GetIndexes(sp);
UTIL_THROW_IF2(indexes.second - indexes.first != 1,
UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
"Feature function must must only contain 1 score");
return m_scores[indexes.first];
return m_scores[sp->GetIndex()];
}
//For features which have an unbounded number of components

View File

@ -101,12 +101,13 @@ int main(int argc, char* argv[])
Phrase const& phr = static_cast<Phrase const&>(*(*trg)[i]);
cout << setw(3) << ++k << " " << phr << endl;
ScoreComponentCollection const& scc = (*trg)[i]->GetScoreBreakdown();
ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
size_t start = PT->GetIndex();
size_t stop = start + PT->GetNumScoreComponents();
FVector const& scores = scc.GetScoresVector();
cout << " ";
for (size_t k = idx.first; k < idx.second; ++k)
for (size_t k = start; k < stop; ++k)
{
size_t j = k-idx.first;
size_t j = k-start;
float f = (mmsapt ? mmsapt->isLogVal(j) ? exp(scores[k]) : scores[k]
: scores[k] < 0 ? exp(scores[k]) : scores[k]);
string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f";