mosesdecoder/moses/src/ScoreComponentCollection.h

211 lines
7.0 KiB
C
Raw Normal View History

// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#ifndef moses_ScoreComponentCollection_h
#define moses_ScoreComponentCollection_h
#include <numeric>
#include <cassert>
#include "LMList.h"
#include "ScoreProducer.h"
#include "ScoreIndexManager.h"
#include "FeatureVector.h"
#include "TypeDef.h"
#include "Util.h"
namespace Moses
{
/*** An unweighted collection of scores for a translation or step in a translation.
*
* In the factored phrase-based models that are implemented by moses, there are a set of
* scores that come from a variety of sources (translation probabilities, language model
* probablilities, distortion probabilities, generation probabilities). Furthermore, while
* some of these scores may be 0, this number is fixed (and generally quite small, ie, less
* than 15), for a given model.
*
* The values contained in ScoreComponentCollection objects are unweighted scores (log-probs).
*
* ScoreComponentCollection objects can be added and subtracted, which makes them appropriate
* to be the datatype used to return the result of a score computations (in this case they will
* have most values set to zero, except for the ones that are results of the indivudal computation
* this will then be added into the "running total" in the Hypothesis. In fact, for a score
* to be tracked in the hypothesis (and thus to participate in the decoding process), a class
* representing that score must extend the ScoreProducer abstract base class. For an example
* refer to the DistortionScoreProducer class.
*/
class ScoreComponentCollection {
friend std::ostream& operator<<(std::ostream& os, const ScoreComponentCollection& rhs);
friend class ScoreIndexManager;
private:
FVector m_scores;
const ScoreIndexManager* m_sim;
public:
//! Create a new score collection with all values set to 0.0
ScoreComponentCollection();
//! Clone a score collection
ScoreComponentCollection(const ScoreComponentCollection& rhs)
: m_scores(rhs.m_scores)
, m_sim(rhs.m_sim)
{}
inline size_t size() const { return m_scores.size(); }
float operator[](size_t x) const { return m_scores[m_sim->GetFeatureName(x)]; }
//! Set all values to 0.0
void ZeroAll()
{
m_scores.clear();
}
void MultiplyEquals(float scalar);
void MultiplyEquals(const ScoreComponentCollection& rhs);
//! add the score in rhs
void PlusEquals(const ScoreComponentCollection& rhs)
{
m_scores += rhs.m_scores;
}
//! subtract the score in rhs
void MinusEquals(const ScoreComponentCollection& rhs)
{
m_scores -= rhs.m_scores;
}
//! Add scores from a single ScoreProducer only
//! The length of scores must be equal to the number of score components
//! produced by sp
void PlusEquals(const ScoreProducer* sp, const std::vector<float>& scores);
Feature function overhaul. Each feature function is computed in one of three ways: 1) Stateless feature functions from the phrase table/generation table: these are computed when the TranslationOption is created. They become part of the ScoreBreakdown object contained in the TranslationOption and are added to the feature value vector when a hypothesis is extended. 2) Stateless feature functions that are computed during state exploration. Currently, only WordPenalty falls into this category, but these functions implement a method Evaluate which do does not receive a Hypothesis or any contextual information. 3) Stateful feature functions: these features receive the arc information (translation option), compute some value and then return some context information. The context information created by a particular feature function is passed back to it as the previous context when a hypothesis originating at the node where the previous edge terminates is created. States in the search space may be recombined if the context information is identical. The context information must be stored in an object implementing the FFState interface. TODO: 1) the command line interface / MERT interface needs to go to named parameters that are otherwise opaque 2) StatefulFeatureFunction's Evaluate method should just take a TranslationOption and a context object. It is not good that it takes a hypothesis, because then people may be tempted to access information about the "previous" hypothesis without "declaring" this dependency. 3) Future cost estimates should be handled using feature functions. All stateful feature functions need some kind of future cost estimate. 4) Philipp's poor-man's cube pruning is broken. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2087 1f5c12ca-751b-0410-a591-d2e778427230
2009-02-06 18:43:06 +03:00
//! Add scores from a single ScoreProducer only
//! The length of scores must be equal to the number of score components
//! produced by sp
void PlusEquals(const ScoreProducer* sp, const ScoreComponentCollection& scores)
{
size_t i = m_sim->GetBeginIndex(sp->GetScoreBookkeepingID());
const size_t end = m_sim->GetEndIndex(sp->GetScoreBookkeepingID());
for (; i < end; ++i)
{
m_scores[m_sim->GetFeatureName(i)] += scores.m_scores[m_sim->GetFeatureName(i)];
Feature function overhaul. Each feature function is computed in one of three ways: 1) Stateless feature functions from the phrase table/generation table: these are computed when the TranslationOption is created. They become part of the ScoreBreakdown object contained in the TranslationOption and are added to the feature value vector when a hypothesis is extended. 2) Stateless feature functions that are computed during state exploration. Currently, only WordPenalty falls into this category, but these functions implement a method Evaluate which do does not receive a Hypothesis or any contextual information. 3) Stateful feature functions: these features receive the arc information (translation option), compute some value and then return some context information. The context information created by a particular feature function is passed back to it as the previous context when a hypothesis originating at the node where the previous edge terminates is created. States in the search space may be recombined if the context information is identical. The context information must be stored in an object implementing the FFState interface. TODO: 1) the command line interface / MERT interface needs to go to named parameters that are otherwise opaque 2) StatefulFeatureFunction's Evaluate method should just take a TranslationOption and a context object. It is not good that it takes a hypothesis, because then people may be tempted to access information about the "previous" hypothesis without "declaring" this dependency. 3) Future cost estimates should be handled using feature functions. All stateful feature functions need some kind of future cost estimate. 4) Philipp's poor-man's cube pruning is broken. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2087 1f5c12ca-751b-0410-a591-d2e778427230
2009-02-06 18:43:06 +03:00
}
}
//! Special version PlusEquals(ScoreProducer, vector<float>)
//! to add the score from a single ScoreProducer that produces
//! a single value
void PlusEquals(const ScoreProducer* sp, float score)
{
assert(1 == sp->GetNumScoreComponents());
const size_t i = m_sim->GetBeginIndex(sp->GetScoreBookkeepingID());
m_scores[m_sim->GetFeatureName(i)] += score;
}
void Assign(const ScoreProducer* sp, const std::vector<float>& scores)
{
assert(scores.size() == sp->GetNumScoreComponents());
size_t i = m_sim->GetBeginIndex(sp->GetScoreBookkeepingID());
for (std::vector<float>::const_iterator vi = scores.begin();
vi != scores.end(); ++vi)
{
m_scores[m_sim->GetFeatureName(i++)] = *vi;
}
}
void Assign(const ScoreComponentCollection &copy)
{
m_scores = copy.m_scores;
}
//! Special version PlusEquals(ScoreProducer, vector<float>)
//! to add the score from a single ScoreProducer that produces
//! a single value
void Assign(const ScoreProducer* sp, float score)
{
assert(1 == sp->GetNumScoreComponents());
const size_t i = m_sim->GetBeginIndex(sp->GetScoreBookkeepingID());
m_scores[m_sim->GetFeatureName(i)] = score;
}
void Assign(size_t index, float score)
{
m_scores[m_sim->GetFeatureName(index)] = score;
}
//! Used to find the weighted total of scores. rhs should contain a vector of weights
//! of the same length as the number of scores.
float InnerProduct(const std::vector<float>& rhs) const
{
return m_scores.inner_product(rhs);
}
float InnerProduct(const ScoreComponentCollection& rhs) const
{
return m_scores.inner_product(rhs.m_scores);
}
float PartialInnerProduct(const ScoreProducer* sp, const std::vector<float>& rhs) const
{
std::vector<float> lhs = GetScoresForProducer(sp);
assert(lhs.size() == rhs.size());
return std::inner_product(lhs.begin(), lhs.end(), rhs.begin(), 0.0f);
}
//! return a vector of all the scores associated with a certain ScoreProducer
std::vector<float> GetScoresForProducer(const ScoreProducer* sp) const
{
size_t id = sp->GetScoreBookkeepingID();
const size_t begin = m_sim->GetBeginIndex(id);
const size_t end = m_sim->GetEndIndex(id);
std::vector<float> res(end-begin);
size_t j = 0;
for (size_t i = begin; i < end; i++) {
res[j++] = m_scores[m_sim->GetFeatureName(i)];
}
return res;
}
//! if a ScoreProducer produces a single score (for example, a language model score)
//! this will return it. If not, this method will throw
float GetScoreForProducer(const ScoreProducer* sp) const
{
size_t id = sp->GetScoreBookkeepingID();
const size_t begin = m_sim->GetBeginIndex(id);
#ifndef NDEBUG
const size_t end = m_sim->GetEndIndex(id);
assert(end-begin == 1);
#endif
return m_scores[m_sim->GetFeatureName(begin)];
}
float GetWeightedScore() const;
void ZeroAllLM(const LMList& lmList);
void PlusEqualsAllLM(const LMList& lmList, const ScoreComponentCollection& rhs);
};
}
#endif