mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 04:43:03 +03:00
Code design debizarrification: Indexes of feature functions into the dense vector of all feature
values are now stored on the feature function instead of in a global map that is a static member of ScoreComponentCollection.
This commit is contained in:
parent
e63561ae7f
commit
fbf8b1f8b8
@ -64,6 +64,7 @@ FeatureFunction(const std::string& line)
|
|||||||
, m_requireSortingAfterSourceContext(false)
|
, m_requireSortingAfterSourceContext(false)
|
||||||
, m_verbosity(std::numeric_limits<std::size_t>::max())
|
, m_verbosity(std::numeric_limits<std::size_t>::max())
|
||||||
, m_numScoreComponents(1)
|
, m_numScoreComponents(1)
|
||||||
|
, m_index(0)
|
||||||
{
|
{
|
||||||
m_numTuneableComponents = m_numScoreComponents;
|
m_numTuneableComponents = m_numScoreComponents;
|
||||||
Initialize(line);
|
Initialize(line);
|
||||||
@ -76,6 +77,7 @@ FeatureFunction(size_t numScoreComponents,
|
|||||||
, m_requireSortingAfterSourceContext(false)
|
, m_requireSortingAfterSourceContext(false)
|
||||||
, m_verbosity(std::numeric_limits<std::size_t>::max())
|
, m_verbosity(std::numeric_limits<std::size_t>::max())
|
||||||
, m_numScoreComponents(numScoreComponents)
|
, m_numScoreComponents(numScoreComponents)
|
||||||
|
, m_index(0)
|
||||||
{
|
{
|
||||||
m_numTuneableComponents = m_numScoreComponents;
|
m_numTuneableComponents = m_numScoreComponents;
|
||||||
Initialize(line);
|
Initialize(line);
|
||||||
@ -197,5 +199,21 @@ FeatureFunction
|
|||||||
::CleanUpAfterSentenceProcessing(ttasksptr const& ttask)
|
::CleanUpAfterSentenceProcessing(ttasksptr const& ttask)
|
||||||
{ CleanUpAfterSentenceProcessing(*(ttask->GetSource().get())); }
|
{ CleanUpAfterSentenceProcessing(*(ttask->GetSource().get())); }
|
||||||
|
|
||||||
|
size_t
|
||||||
|
FeatureFunction
|
||||||
|
::GetIndex() const
|
||||||
|
{ return m_index; }
|
||||||
|
|
||||||
|
|
||||||
|
/// set index
|
||||||
|
// @return index of the next FF
|
||||||
|
size_t
|
||||||
|
FeatureFunction
|
||||||
|
::SetIndex(size_t const idx)
|
||||||
|
{
|
||||||
|
m_index = idx;
|
||||||
|
return this->GetNumScoreComponents() + idx;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -43,6 +43,7 @@ protected:
|
|||||||
bool m_requireSortingAfterSourceContext;
|
bool m_requireSortingAfterSourceContext;
|
||||||
size_t m_verbosity;
|
size_t m_verbosity;
|
||||||
size_t m_numScoreComponents;
|
size_t m_numScoreComponents;
|
||||||
|
size_t m_index; // index into vector covering ALL feature function values
|
||||||
std::vector<bool> m_tuneableComponents;
|
std::vector<bool> m_tuneableComponents;
|
||||||
size_t m_numTuneableComponents;
|
size_t m_numTuneableComponents;
|
||||||
//In case there's multiple producers with the same description
|
//In case there's multiple producers with the same description
|
||||||
@ -114,6 +115,8 @@ public:
|
|||||||
|
|
||||||
virtual std::vector<float> DefaultWeights() const;
|
virtual std::vector<float> DefaultWeights() const;
|
||||||
|
|
||||||
|
size_t GetIndex() const;
|
||||||
|
size_t SetIndex(size_t const idx);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void
|
virtual void
|
||||||
|
@ -221,7 +221,7 @@ void PhraseOrientationFeature::LookaheadScore(const OrientationPhraseProperty *o
|
|||||||
ScoreComponentCollection &scoreBreakdown,
|
ScoreComponentCollection &scoreBreakdown,
|
||||||
bool subtract) const
|
bool subtract) const
|
||||||
{
|
{
|
||||||
size_t ffScoreIndex = scoreBreakdown.GetIndexes(this).first;
|
size_t ffScoreIndex = m_index;
|
||||||
|
|
||||||
std::vector<float> scoresL2R;
|
std::vector<float> scoresL2R;
|
||||||
scoresL2R.push_back( TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono()) );
|
scoresL2R.push_back( TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono()) );
|
||||||
|
@ -23,8 +23,6 @@ void UnalignedWordCountFeature::EvaluateInIsolation(const Phrase &source
|
|||||||
, ScoreComponentCollection &scoreBreakdown
|
, ScoreComponentCollection &scoreBreakdown
|
||||||
, ScoreComponentCollection &estimatedFutureScore) const
|
, ScoreComponentCollection &estimatedFutureScore) const
|
||||||
{
|
{
|
||||||
const size_t ffScoreIndex(scoreBreakdown.GetIndexes(this).first);
|
|
||||||
|
|
||||||
const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignTerm();
|
const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignTerm();
|
||||||
const size_t sourceLength = source.GetSize();
|
const size_t sourceLength = source.GetSize();
|
||||||
const size_t targetLength = targetPhrase.GetSize();
|
const size_t targetLength = targetPhrase.GetSize();
|
||||||
@ -57,8 +55,8 @@ void UnalignedWordCountFeature::EvaluateInIsolation(const Phrase &source
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
scoreBreakdown.PlusEquals(ffScoreIndex, sourceUnalignedCount);
|
scoreBreakdown.PlusEquals(m_index, sourceUnalignedCount);
|
||||||
scoreBreakdown.PlusEquals(ffScoreIndex+1, targetUnalignedCount);
|
scoreBreakdown.PlusEquals(m_index+1, targetUnalignedCount);
|
||||||
|
|
||||||
IFFEATUREVERBOSE(2) {
|
IFFEATUREVERBOSE(2) {
|
||||||
FEATUREVERBOSE(2, source << std::endl);
|
FEATUREVERBOSE(2, source << std::endl);
|
||||||
|
@ -790,7 +790,7 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
|
|||||||
prev_approx_label -= prev->GetApproximateScoreLabel();
|
prev_approx_label -= prev->GetApproximateScoreLabel();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
size_t ff_idx = accumulator->GetIndexes(this).first;
|
size_t ff_idx = m_index; // accumulator->GetIndexes(this).first;
|
||||||
|
|
||||||
accumulator->PlusEquals(ff_idx, prev_approx_head);
|
accumulator->PlusEquals(ff_idx, prev_approx_head);
|
||||||
accumulator->PlusEquals(ff_idx+1, prev_approx_label);
|
accumulator->PlusEquals(ff_idx+1, prev_approx_label);
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
// $Id$
|
// $Id$
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <boost/algorithm/string/predicate.hpp>
|
#include <boost/algorithm/string/predicate.hpp>
|
||||||
|
#include <boost/foreach.hpp>
|
||||||
|
#include <boost/format.hpp>
|
||||||
#include "util/exception.hh"
|
#include "util/exception.hh"
|
||||||
#include "ScoreComponentCollection.h"
|
#include "ScoreComponentCollection.h"
|
||||||
#include "StaticData.h"
|
#include "StaticData.h"
|
||||||
@ -47,7 +49,7 @@ std::ostream& operator<<(std::ostream& os, const ScorePair& rhs)
|
|||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
ScoreComponentCollection::ScoreIndexMap ScoreComponentCollection::s_scoreIndexes;
|
//ScoreComponentCollection::ScoreIndexMap ScoreComponentCollection::s_scoreIndexes;
|
||||||
size_t ScoreComponentCollection::s_denseVectorSize = 0;
|
size_t ScoreComponentCollection::s_denseVectorSize = 0;
|
||||||
|
|
||||||
ScoreComponentCollection::
|
ScoreComponentCollection::
|
||||||
@ -58,15 +60,14 @@ ScoreComponentCollection()
|
|||||||
|
|
||||||
void
|
void
|
||||||
ScoreComponentCollection::
|
ScoreComponentCollection::
|
||||||
RegisterScoreProducer(const FeatureFunction* scoreProducer)
|
RegisterScoreProducer(FeatureFunction* scoreProducer)
|
||||||
{
|
{
|
||||||
size_t start = s_denseVectorSize;
|
size_t start = s_denseVectorSize;
|
||||||
size_t end = start + scoreProducer->GetNumScoreComponents();
|
s_denseVectorSize = scoreProducer->SetIndex(s_denseVectorSize);
|
||||||
VERBOSE(1, "FeatureFunction: "
|
VERBOSE(1, "FeatureFunction: "
|
||||||
<< scoreProducer->GetScoreProducerDescription()
|
<< scoreProducer->GetScoreProducerDescription()
|
||||||
<< " start: " << start << " end: " << (end-1) << endl);
|
<< " start: " << start
|
||||||
s_scoreIndexes[scoreProducer] = pair<size_t,size_t>(start,end);
|
<< " end: " << (s_denseVectorSize-1) << endl);
|
||||||
s_denseVectorSize = end;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -191,21 +192,23 @@ void ScoreComponentCollection::Save(ostream& out, bool multiline) const
|
|||||||
sep = "=";
|
sep = "=";
|
||||||
linesep = " ";
|
linesep = " ";
|
||||||
}
|
}
|
||||||
ScoreIndexMap::const_iterator iter = s_scoreIndexes.begin();
|
|
||||||
for (; iter != s_scoreIndexes.end(); ++iter ) {
|
|
||||||
string name = iter->first->GetScoreProducerDescription();
|
|
||||||
IndexPair ip = iter->second; // feature indices
|
|
||||||
if (ip.second-ip.first == 1) {
|
|
||||||
out << name << sep << m_scores[ip.first] << linesep;
|
|
||||||
} else {
|
|
||||||
for (size_t i=ip.first; i < ip.second; ++i) {
|
|
||||||
ostringstream fullname;
|
|
||||||
fullname << name << "_" << (i + 1 - ip.first);
|
|
||||||
out << fullname.str() << sep << m_scores[i] << linesep;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
std::vector<FeatureFunction*> const& all_ff
|
||||||
|
= FeatureFunction::GetFeatureFunctions();
|
||||||
|
BOOST_FOREACH(FeatureFunction const* ff, all_ff)
|
||||||
|
{
|
||||||
|
string name = ff->GetScoreProducerDescription();
|
||||||
|
size_t i = ff->GetIndex();
|
||||||
|
if (ff->GetNumScoreComponents() == 1)
|
||||||
|
out << name << sep << m_scores[i] << linesep;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
size_t stop = i + ff->GetNumScoreComponents();
|
||||||
|
boost::format fmt("%s_%d");
|
||||||
|
for (size_t k = 1; i < stop; ++i, ++k)
|
||||||
|
out << fmt % name % k << sep << m_scores[i] << linesep;
|
||||||
|
}
|
||||||
|
}
|
||||||
// write sparse features
|
// write sparse features
|
||||||
m_scores.write(out,sep,linesep);
|
m_scores.write(out,sep,linesep);
|
||||||
}
|
}
|
||||||
@ -242,8 +245,8 @@ void
|
|||||||
ScoreComponentCollection::
|
ScoreComponentCollection::
|
||||||
Assign(const FeatureFunction* sp, const std::vector<float>& scores)
|
Assign(const FeatureFunction* sp, const std::vector<float>& scores)
|
||||||
{
|
{
|
||||||
IndexPair indexes = GetIndexes(sp);
|
size_t numScores = sp->GetNumScoreComponents();
|
||||||
size_t numScores = indexes.second - indexes.first;
|
size_t offset = sp->GetIndex();
|
||||||
|
|
||||||
if (scores.size() != numScores) {
|
if (scores.size() != numScores) {
|
||||||
UTIL_THROW(util::Exception, "Feature function "
|
UTIL_THROW(util::Exception, "Feature function "
|
||||||
@ -253,7 +256,7 @@ Assign(const FeatureFunction* sp, const std::vector<float>& scores)
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < scores.size(); ++i) {
|
for (size_t i = 0; i < scores.size(); ++i) {
|
||||||
m_scores[i + indexes.first] = scores[i];
|
m_scores[i + offset] = scores[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -95,24 +95,24 @@ private:
|
|||||||
FVector m_scores;
|
FVector m_scores;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef std::pair<size_t,size_t> IndexPair;
|
// typedef std::pair<size_t,size_t> IndexPair;
|
||||||
private:
|
private:
|
||||||
typedef std::map<const FeatureFunction*,IndexPair> ScoreIndexMap;
|
// typedef std::map<const FeatureFunction*,IndexPair> ScoreIndexMap;
|
||||||
static ScoreIndexMap s_scoreIndexes;
|
// static ScoreIndexMap s_scoreIndexes;
|
||||||
static size_t s_denseVectorSize;
|
static size_t s_denseVectorSize;
|
||||||
public:
|
public:
|
||||||
static IndexPair GetIndexes(const FeatureFunction* sp) {
|
// static IndexPair GetIndexes(const FeatureFunction* sp) {
|
||||||
ScoreIndexMap::const_iterator indexIter = s_scoreIndexes.find(sp);
|
// ScoreIndexMap::const_iterator indexIter = s_scoreIndexes.find(sp);
|
||||||
if (indexIter == s_scoreIndexes.end()) {
|
// if (indexIter == s_scoreIndexes.end()) {
|
||||||
std::stringstream strme;
|
// std::stringstream strme;
|
||||||
strme << "ERROR: FeatureFunction: " << sp->GetScoreProducerDescription() <<
|
// strme << "ERROR: FeatureFunction: " << sp->GetScoreProducerDescription() <<
|
||||||
" not registered with ScoreIndexMap" << std::endl;
|
// " not registered with ScoreIndexMap" << std::endl;
|
||||||
strme << "You must call ScoreComponentCollection.RegisterScoreProducer() " <<
|
// strme << "You must call ScoreComponentCollection.RegisterScoreProducer() " <<
|
||||||
" for every FeatureFunction" << std::endl;
|
// " for every FeatureFunction" << std::endl;
|
||||||
UTIL_THROW2(strme.str());
|
// UTIL_THROW2(strme.str());
|
||||||
}
|
// }
|
||||||
return indexIter->second;
|
// return indexIter->second;
|
||||||
}
|
// }
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static void ResetCounter() {
|
static void ResetCounter() {
|
||||||
@ -136,7 +136,7 @@ public:
|
|||||||
* Register a ScoreProducer with a fixed number of scores, so that it can
|
* Register a ScoreProducer with a fixed number of scores, so that it can
|
||||||
* be allocated space in the dense part of the feature vector.
|
* be allocated space in the dense part of the feature vector.
|
||||||
**/
|
**/
|
||||||
static void RegisterScoreProducer(const FeatureFunction* scoreProducer);
|
static void RegisterScoreProducer(FeatureFunction* scoreProducer);
|
||||||
|
|
||||||
/** Load from file */
|
/** Load from file */
|
||||||
bool Load(const std::string& filename) {
|
bool Load(const std::string& filename) {
|
||||||
@ -229,22 +229,23 @@ public:
|
|||||||
//! Add scores from a single ScoreProducer only
|
//! Add scores from a single ScoreProducer only
|
||||||
//! The length of scores must be equal to the number of score components
|
//! The length of scores must be equal to the number of score components
|
||||||
//! produced by sp
|
//! produced by sp
|
||||||
void PlusEquals(const FeatureFunction* sp, const ScoreComponentCollection& scores) {
|
void
|
||||||
IndexPair indexes = GetIndexes(sp);
|
PlusEquals(const FeatureFunction* sp,
|
||||||
for (size_t i = indexes.first; i < indexes.second; ++i) {
|
const ScoreComponentCollection& scores) {
|
||||||
m_scores[i] += scores.m_scores[i];
|
size_t i = sp->GetIndex();
|
||||||
}
|
size_t stop = i + sp->GetNumScoreComponents();
|
||||||
|
for (;i < stop; ++i) m_scores[i] += scores.m_scores[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
//! Add scores from a single FeatureFunction only
|
//! Add scores from a single FeatureFunction only
|
||||||
//! The length of scores must be equal to the number of score components
|
//! The length of scores must be equal to the number of score components
|
||||||
//! produced by sp
|
//! produced by sp
|
||||||
void PlusEquals(const FeatureFunction* sp, const std::vector<float>& scores) {
|
void PlusEquals(const FeatureFunction* sp, const std::vector<float>& scores) {
|
||||||
IndexPair indexes = GetIndexes(sp);
|
UTIL_THROW_IF2(scores.size() != sp->GetNumScoreComponents(),
|
||||||
UTIL_THROW_IF2(scores.size() != indexes.second - indexes.first,
|
|
||||||
"Number of scores is incorrect");
|
"Number of scores is incorrect");
|
||||||
|
size_t offset = sp->GetIndex();
|
||||||
for (size_t i = 0; i < scores.size(); ++i) {
|
for (size_t i = 0; i < scores.size(); ++i) {
|
||||||
m_scores[i + indexes.first] += scores[i];
|
m_scores[i + offset] += scores[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -252,10 +253,9 @@ public:
|
|||||||
//! to add the score from a single ScoreProducer that produces
|
//! to add the score from a single ScoreProducer that produces
|
||||||
//! a single value
|
//! a single value
|
||||||
void PlusEquals(const FeatureFunction* sp, float score) {
|
void PlusEquals(const FeatureFunction* sp, float score) {
|
||||||
IndexPair indexes = GetIndexes(sp);
|
UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
|
||||||
UTIL_THROW_IF2(1 != indexes.second - indexes.first,
|
|
||||||
"Number of scores is incorrect");
|
"Number of scores is incorrect");
|
||||||
m_scores[indexes.first] += score;
|
m_scores[sp->GetIndex()] += score;
|
||||||
}
|
}
|
||||||
|
|
||||||
//For features which have an unbounded number of components
|
//For features which have an unbounded number of components
|
||||||
@ -287,10 +287,10 @@ public:
|
|||||||
//! to add the score from a single ScoreProducer that produces
|
//! to add the score from a single ScoreProducer that produces
|
||||||
//! a single value
|
//! a single value
|
||||||
void Assign(const FeatureFunction* sp, float score) {
|
void Assign(const FeatureFunction* sp, float score) {
|
||||||
IndexPair indexes = GetIndexes(sp);
|
|
||||||
UTIL_THROW_IF2(1 != indexes.second - indexes.first,
|
UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
|
||||||
"Feature function must must only contain 1 score");
|
"Feature function must must only contain 1 score");
|
||||||
m_scores[indexes.first] = score;
|
m_scores[sp->GetIndex()] = score;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assign score by index
|
// Assign score by index
|
||||||
@ -329,9 +329,9 @@ public:
|
|||||||
size_t components = sp->GetNumScoreComponents();
|
size_t components = sp->GetNumScoreComponents();
|
||||||
|
|
||||||
std::vector<float> res(components);
|
std::vector<float> res(components);
|
||||||
IndexPair indexes = GetIndexes(sp);
|
size_t offset = sp->GetIndex();
|
||||||
for (size_t i = 0; i < res.size(); ++i) {
|
for (size_t i = 0; i < res.size(); ++i) {
|
||||||
res[i] = m_scores[i + indexes.first];
|
res[i] = m_scores[i + offset];
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
@ -364,18 +364,17 @@ public:
|
|||||||
m_scores.capMin(minValue);
|
m_scores.capMin(minValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<size_t,size_t> GetIndexesForProducer(const FeatureFunction* sp) const {
|
// std::pair<size_t,size_t> GetIndexesForProducer(const FeatureFunction* sp) const {
|
||||||
IndexPair indexPair = GetIndexes(sp);
|
// IndexPair indexPair = GetIndexes(sp);
|
||||||
return indexPair;
|
// return indexPair;
|
||||||
}
|
// }
|
||||||
|
|
||||||
//! if a FeatureFunction produces a single score (for example, a language model score)
|
//! if a FeatureFunction produces a single score (for example, a language model score)
|
||||||
//! this will return it. If not, this method will throw
|
//! this will return it. If not, this method will throw
|
||||||
float GetScoreForProducer(const FeatureFunction* sp) const {
|
float GetScoreForProducer(const FeatureFunction* sp) const {
|
||||||
IndexPair indexes = GetIndexes(sp);
|
UTIL_THROW_IF2(sp->GetNumScoreComponents() != 1,
|
||||||
UTIL_THROW_IF2(indexes.second - indexes.first != 1,
|
|
||||||
"Feature function must must only contain 1 score");
|
"Feature function must must only contain 1 score");
|
||||||
return m_scores[indexes.first];
|
return m_scores[sp->GetIndex()];
|
||||||
}
|
}
|
||||||
|
|
||||||
//For features which have an unbounded number of components
|
//For features which have an unbounded number of components
|
||||||
|
@ -101,12 +101,13 @@ int main(int argc, char* argv[])
|
|||||||
Phrase const& phr = static_cast<Phrase const&>(*(*trg)[i]);
|
Phrase const& phr = static_cast<Phrase const&>(*(*trg)[i]);
|
||||||
cout << setw(3) << ++k << " " << phr << endl;
|
cout << setw(3) << ++k << " " << phr << endl;
|
||||||
ScoreComponentCollection const& scc = (*trg)[i]->GetScoreBreakdown();
|
ScoreComponentCollection const& scc = (*trg)[i]->GetScoreBreakdown();
|
||||||
ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT);
|
size_t start = PT->GetIndex();
|
||||||
|
size_t stop = start + PT->GetNumScoreComponents();
|
||||||
FVector const& scores = scc.GetScoresVector();
|
FVector const& scores = scc.GetScoresVector();
|
||||||
cout << " ";
|
cout << " ";
|
||||||
for (size_t k = idx.first; k < idx.second; ++k)
|
for (size_t k = start; k < stop; ++k)
|
||||||
{
|
{
|
||||||
size_t j = k-idx.first;
|
size_t j = k-start;
|
||||||
float f = (mmsapt ? mmsapt->isLogVal(j) ? exp(scores[k]) : scores[k]
|
float f = (mmsapt ? mmsapt->isLogVal(j) ? exp(scores[k]) : scores[k]
|
||||||
: scores[k] < 0 ? exp(scores[k]) : scores[k]);
|
: scores[k] < 0 ? exp(scores[k]) : scores[k]);
|
||||||
string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f";
|
string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f";
|
||||||
|
Loading…
Reference in New Issue
Block a user