mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-24 04:12:47 +03:00
Code design debizarrification: Indexes of feature functions into the dense vector of all feature
values are now stored on the feature function instead of in a global map that is a static member of ScoreComponentCollection.
This commit is contained in:
parent
e63561ae7f
commit
fbf8b1f8b8
@ -64,6 +64,7 @@ FeatureFunction(const std::string& line)
|
||||
, m_requireSortingAfterSourceContext(false)
|
||||
, m_verbosity(std::numeric_limits<std::size_t>::max())
|
||||
, m_numScoreComponents(1)
|
||||
, m_index(0)
|
||||
{
|
||||
m_numTuneableComponents = m_numScoreComponents;
|
||||
Initialize(line);
|
||||
@ -76,6 +77,7 @@ FeatureFunction(size_t numScoreComponents,
|
||||
, m_requireSortingAfterSourceContext(false)
|
||||
, m_verbosity(std::numeric_limits<std::size_t>::max())
|
||||
, m_numScoreComponents(numScoreComponents)
|
||||
, m_index(0)
|
||||
{
|
||||
m_numTuneableComponents = m_numScoreComponents;
|
||||
Initialize(line);
|
||||
@ -197,5 +199,21 @@ FeatureFunction
|
||||
::CleanUpAfterSentenceProcessing(ttasksptr const& ttask)
|
||||
{ CleanUpAfterSentenceProcessing(*(ttask->GetSource().get())); }
|
||||
|
||||
size_t
|
||||
FeatureFunction
|
||||
::GetIndex() const
|
||||
{ return m_index; }
|
||||
|
||||
|
||||
/// set index
|
||||
// @return index of the next FF
|
||||
size_t
|
||||
FeatureFunction
|
||||
::SetIndex(size_t const idx)
|
||||
{
|
||||
m_index = idx;
|
||||
return this->GetNumScoreComponents() + idx;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -43,6 +43,7 @@ protected:
|
||||
bool m_requireSortingAfterSourceContext;
|
||||
size_t m_verbosity;
|
||||
size_t m_numScoreComponents;
|
||||
size_t m_index; // index into vector covering ALL feature function values
|
||||
std::vector<bool> m_tuneableComponents;
|
||||
size_t m_numTuneableComponents;
|
||||
//In case there's multiple producers with the same description
|
||||
@ -114,6 +115,8 @@ public:
|
||||
|
||||
virtual std::vector<float> DefaultWeights() const;
|
||||
|
||||
size_t GetIndex() const;
|
||||
size_t SetIndex(size_t const idx);
|
||||
|
||||
protected:
|
||||
virtual void
|
||||
|
@ -221,7 +221,7 @@ void PhraseOrientationFeature::LookaheadScore(const OrientationPhraseProperty *o
|
||||
ScoreComponentCollection &scoreBreakdown,
|
||||
bool subtract) const
|
||||
{
|
||||
size_t ffScoreIndex = scoreBreakdown.GetIndexes(this).first;
|
||||
size_t ffScoreIndex = m_index;
|
||||
|
||||
std::vector<float> scoresL2R;
|
||||
scoresL2R.push_back( TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono()) );
|
||||
|
@ -23,8 +23,6 @@ void UnalignedWordCountFeature::EvaluateInIsolation(const Phrase &source
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
const size_t ffScoreIndex(scoreBreakdown.GetIndexes(this).first);
|
||||
|
||||
const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignTerm();
|
||||
const size_t sourceLength = source.GetSize();
|
||||
const size_t targetLength = targetPhrase.GetSize();
|
||||
@ -57,8 +55,8 @@ void UnalignedWordCountFeature::EvaluateInIsolation(const Phrase &source
|
||||
}
|
||||
}
|
||||
|
||||
scoreBreakdown.PlusEquals(ffScoreIndex, sourceUnalignedCount);
|
||||
scoreBreakdown.PlusEquals(ffScoreIndex+1, targetUnalignedCount);
|
||||
scoreBreakdown.PlusEquals(m_index, sourceUnalignedCount);
|
||||
scoreBreakdown.PlusEquals(m_index+1, targetUnalignedCount);
|
||||
|
||||
IFFEATUREVERBOSE(2) {
|
||||
FEATUREVERBOSE(2, source << std::endl);
|
||||
|
@ -790,7 +790,7 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
|
||||
prev_approx_label -= prev->GetApproximateScoreLabel();
|
||||
}
|
||||
}
|
||||
size_t ff_idx = accumulator->GetIndexes(this).first;
|
||||
size_t ff_idx = m_index; // accumulator->GetIndexes(this).first;
|
||||
|
||||
accumulator->PlusEquals(ff_idx, prev_approx_head);
|
||||
accumulator->PlusEquals(ff_idx+1, prev_approx_label);
|
||||
|
@ -1,6 +1,8 @@
|
||||
// $Id$
|
||||
#include <vector>
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/format.hpp>
|
||||
#include "util/exception.hh"
|
||||
#include "ScoreComponentCollection.h"
|
||||
#include "StaticData.h"
|
||||
@ -47,7 +49,7 @@ std::ostream& operator<<(std::ostream& os, const ScorePair& rhs)
|
||||
return os;
|
||||
}
|
||||
|
||||
ScoreComponentCollection::ScoreIndexMap ScoreComponentCollection::s_scoreIndexes;
|
||||
//ScoreComponentCollection::ScoreIndexMap ScoreComponentCollection::s_scoreIndexes;
|
||||
size_t ScoreComponentCollection::s_denseVectorSize = 0;
|
||||
|
||||
ScoreComponentCollection::
|
||||
@ -58,15 +60,14 @@ ScoreComponentCollection()
|
||||
|
||||
void
|
||||
ScoreComponentCollection::
|
||||
RegisterScoreProducer(const FeatureFunction* scoreProducer)
|
||||
RegisterScoreProducer(FeatureFunction* scoreProducer)
|
||||
{
|
||||
size_t start = s_denseVectorSize;
|
||||
size_t end = start + scoreProducer->GetNumScoreComponents();
|
||||
s_denseVectorSize = scoreProducer->SetIndex(s_denseVectorSize);
|
||||
VERBOSE(1, "FeatureFunction: "
|
||||
<< scoreProducer->GetScoreProducerDescription()
|
||||
<< " start: " << start << " end: " << (end-1) << endl);
|
||||
s_scoreIndexes[scoreProducer] = pair<size_t,size_t>(start,end);
|
||||
s_denseVectorSize = end;
|
||||
<< " start: " << start
|
||||
<< " end: " << (s_denseVectorSize-1) << endl);
|
||||
}
|
||||
|
||||
|
||||
@ -191,21 +192,23 @@ void ScoreComponentCollection::Save(ostream& out, bool multiline) const
|
||||
sep = "=";
|
||||
linesep = " ";
|
||||
}
|
||||
ScoreIndexMap::const_iterator iter = s_scoreIndexes.begin();
|
||||
for (; iter != s_scoreIndexes.end(); ++iter ) {
|
||||
string name = iter->first->GetScoreProducerDescription();
|
||||
IndexPair ip = iter->second; // feature indices
|
||||
if (ip.second-ip.first == 1) {
|
||||
out << name << sep << m_scores[ip.first] << linesep;
|
||||
} else {
|
||||
for (size_t i=ip.first; i < ip.second; ++i) {
|
||||
ostringstream fullname;
|
||||
fullname << name << "_" << (i + 1 - ip.first);
|
||||
out << fullname.str() << sep << m_scores[i] << linesep;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<FeatureFunction*> const& all_ff
|
||||
= FeatureFunction::GetFeatureFunctions();
|
||||
BOOST_FOREACH(FeatureFunction const* ff, all_ff)
|
||||
{
|
||||
string name = ff->GetScoreProducerDescription();
|
||||
size_t i = ff->GetIndex();
|
||||
if (ff->GetNumScoreComponents() == 1)
|
||||
out << name << sep << m_scores[i] << linesep;
|
||||
else
|
||||
{
|
||||
size_t stop = i + ff->GetNumScoreComponents();
|
||||
boost::format fmt("%s_%d");
|
||||
for (size_t k = 1; i < stop; ++i, ++k)
|
||||
out << fmt % name % k << sep << m_scores[i] << linesep;
|
||||
}
|
||||
}
|
||||
// write sparse features
|
||||
m_scores.write(out,sep,linesep);
|
||||
}
|
||||
@ -242,8 +245,8 @@ void
|
||||
ScoreComponentCollection::
|
||||
Assign(const FeatureFunction* sp, const std::vector<float>& scores)
|
||||
{
|
||||
IndexPair indexes = GetIndexes(sp);
|
||||
size_t numScores = indexes.second - indexes.first;
|
||||
size_t numScores = sp->GetNumScoreComponents();
|
||||
size_t offset = sp->GetIndex();
|
||||
|
||||
if (scores.size() != numScores) {
|
||||
UTIL_THROW(util::Exception, "Feature function "
|
||||
@ -253,7 +256,7 @@ Assign(const FeatureFunction* sp, const std::vector<float>& scores)
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < scores.size(); ++i) {
|
||||
m_scores[i + indexes.first] = scores[i];
|
||||
m_scores[i + offset] = scores[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -95,24 +95,24 @@ private:
|
||||
FVector m_scores;
|
||||
|
||||
public:
|
||||
typedef std::pair<size_t,size_t> IndexPair;
|
||||
// typedef std::pair<size_t,size_t> IndexPair;
|
||||
private:
|
||||
typedef std::map<const FeatureFunction*,IndexPair> ScoreIndexMap;
|
||||
static ScoreIndexMap s_scoreIndexes;
|
||||
// typedef std::map<const FeatureFunction*,IndexPair> ScoreIndexMap;
|
||||
// static ScoreIndexMap s_scoreIndexes;
|
||||
static size_t s_denseVectorSize;
|
||||
public:
|
||||
static IndexPair GetIndexes(const FeatureFunction* sp) {
|
||||
ScoreIndexMap::const_iterator indexIter = s_scoreIndexes.find(sp);
|
||||
if (indexIter == s_scoreIndexes.end()) {
|
||||
std::stringstream strme;
|
||||
strme << "ERROR: FeatureFunction: " << sp->GetScoreProducerDescription() <<
|
||||
" not registered with ScoreIndexMap" << std::endl;
|
||||
strme << "You must call ScoreComponentCollection.RegisterScoreProducer() " <<
|
||||
" for every FeatureFunction" << std::endl;
|
||||
UTIL_THROW2(strme.str());
|
||||
}
|
||||
return indexIter->second;
|
||||
}
|
||||
// static IndexPair GetIndexes(const FeatureFunction* sp) {
|
||||
// ScoreIndexMap::const_iterator indexIter = s_scoreIndexes.find(sp);
|
||||
// if (indexIter == s_scoreIndexes.end()) {
|
||||
// std::stringstream strme;
|
||||
// strme << "ERROR: FeatureFunction: " << sp->GetScoreProducerDescription() <<
|
||||
// " not registered with ScoreIndexMap" << std::endl;
|
||||
// strme << "You must call ScoreComponentCollection.RegisterScoreProducer() " <<
|
||||
// " for every FeatureFunction" << std::endl;
|
||||
// UTIL_THROW2(strme.str());
|
||||
// }
|
||||
// return indexIter->second;
|
||||
// }
|
||||
|
||||
public:
|
||||
static void ResetCounter() {
|
||||
@ -136,7 +136,7 @@ public:
|
||||
* Register a ScoreProducer with a fixed number of scores, so that it can
|
||||
* be allocated space in the dense part of the feature vector.
|
||||
**/
|
||||
static void RegisterScoreProducer(const FeatureFunction* scoreProducer);
|
||||
static void RegisterScoreProducer(FeatureFunction* scoreProducer);
|
||||
|
||||
/** Load from file */
|
||||
bool Load(const std::string& filename) {
|
||||
@ -229,22 +229,23 @@ public:
|
||||
//! Add scores from a single ScoreProducer only
|
||||
//! The length of scores must be equal to the number of score components
|
||||
//! produced by sp
|
||||
void PlusEquals(const FeatureFunction* sp, const ScoreComponentCollection& scores) {
|
||||
IndexPair indexes = GetIndexes(sp);
|
||||
for (size_t i = indexes.first; i < indexes.second; ++i) {
|
||||
m_scores[i] += scores.m_scores[i];
|
||||
}
|
||||
void
|
||||
PlusEquals(const FeatureFunction* sp,
|
||||
const ScoreComponentCollection& scores) {
|
||||
size_t i = sp->GetIndex();
|
||||
size_t stop = i + sp->GetNumScoreComponents();
|
||||
for (;i < stop; ++i) m_scores[i] += scores.m_scores[i];
|
||||
}
|
||||
|
||||
//! Add scores from a single FeatureFunction only
|
||||
//! The length of scores must be equal to the number of score components
|
||||
//! produced by sp
|
||||
void PlusEquals(const FeatureFunction* sp, const std::vector<float>& scores) {
|
||||
IndexPair indexes = GetIndexes(sp);
|
||||
UTIL_THROW_IF2(scores.size() != indexes.second - indexes.first,
|
||||
UTIL_THROW_IF2(scores.size() != sp->GetNumScoreComponents(),
|
||||
"Number of scores is incorrect");
|
||||
size_t offset = sp->GetIndex();
|
||||
for (size_t i = 0; i < scores.size(); ++i) {
|
||||
m_scores[i + indexes.first] += scores[i];
|
||||
m_scores[i + offset] += scores[i];
|
||||
}
|
||||
}
|
||||
|
||||
@ -252,10 +253,9 @@ public:
|
||||
//! to add the score from a single ScoreProducer that produces
|
||||
//! a single value
|
||||
void PlusEquals(const FeatureFunction* sp, float score) {
|
||||
IndexPair indexes = GetIndexes(sp);
|
||||
UTIL_THROW_IF2(1 != indexes.second - indexes.first,
|
||||
UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
|
||||
"Number of scores is incorrect");
|
||||
m_scores[indexes.first] += score;
|
||||
m_scores[sp->GetIndex()] += score;
|
||||
}
|
||||
|
||||
//For features which have an unbounded number of components
|
||||
@ -287,10 +287,10 @@ public:
|
||||
//! to add the score from a single ScoreProducer that produces
|
||||
//! a single value
|
||||
void Assign(const FeatureFunction* sp, float score) {
|
||||
IndexPair indexes = GetIndexes(sp);
|
||||
UTIL_THROW_IF2(1 != indexes.second - indexes.first,
|
||||
|
||||
UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
|
||||
"Feature function must must only contain 1 score");
|
||||
m_scores[indexes.first] = score;
|
||||
m_scores[sp->GetIndex()] = score;
|
||||
}
|
||||
|
||||
// Assign score by index
|
||||
@ -329,9 +329,9 @@ public:
|
||||
size_t components = sp->GetNumScoreComponents();
|
||||
|
||||
std::vector<float> res(components);
|
||||
IndexPair indexes = GetIndexes(sp);
|
||||
size_t offset = sp->GetIndex();
|
||||
for (size_t i = 0; i < res.size(); ++i) {
|
||||
res[i] = m_scores[i + indexes.first];
|
||||
res[i] = m_scores[i + offset];
|
||||
}
|
||||
return res;
|
||||
}
|
||||
@ -364,18 +364,17 @@ public:
|
||||
m_scores.capMin(minValue);
|
||||
}
|
||||
|
||||
std::pair<size_t,size_t> GetIndexesForProducer(const FeatureFunction* sp) const {
|
||||
IndexPair indexPair = GetIndexes(sp);
|
||||
return indexPair;
|
||||
}
|
||||
// std::pair<size_t,size_t> GetIndexesForProducer(const FeatureFunction* sp) const {
|
||||
// IndexPair indexPair = GetIndexes(sp);
|
||||
// return indexPair;
|
||||
// }
|
||||
|
||||
//! if a FeatureFunction produces a single score (for example, a language model score)
|
||||
//! this will return it. If not, this method will throw
|
||||
float GetScoreForProducer(const FeatureFunction* sp) const {
|
||||
IndexPair indexes = GetIndexes(sp);
|
||||
UTIL_THROW_IF2(indexes.second - indexes.first != 1,
|
||||
UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
|
||||
"Feature function must must only contain 1 score");
|
||||
return m_scores[indexes.first];
|
||||
return m_scores[sp->GetIndex()];
|
||||
}
|
||||
|
||||
//For features which have an unbounded number of components
|
||||
|
@ -101,12 +101,13 @@ int main(int argc, char* argv[])
|
||||
Phrase const& phr = static_cast<Phrase const&>(*(*trg)[i]);
|
||||
cout << setw(3) << ++k << " " << phr << endl;
|
||||
ScoreComponentCollection const& scc = (*trg)[i]->GetScoreBreakdown();
|
||||
ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
|
||||
size_t start = PT->GetIndex();
|
||||
size_t stop = start + PT->GetNumScoreComponents();
|
||||
FVector const& scores = scc.GetScoresVector();
|
||||
cout << " ";
|
||||
for (size_t k = idx.first; k < idx.second; ++k)
|
||||
for (size_t k = start; k < stop; ++k)
|
||||
{
|
||||
size_t j = k-idx.first;
|
||||
size_t j = k-start;
|
||||
float f = (mmsapt ? mmsapt->isLogVal(j) ? exp(scores[k]) : scores[k]
|
||||
: scores[k] < 0 ? exp(scores[k]) : scores[k]);
|
||||
string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f";
|
||||
|
Loading…
Reference in New Issue
Block a user