added reporting of sparse features in n-best list

git-svn-id: http://svn.statmt.org/repository/mira@3926 cc96ff50-19ce-11e0-b349-13d7f0bd23df
This commit is contained in:
pkoehn 2011-08-07 00:58:56 +00:00 committed by Ondrej Bojar
parent b919b862b8
commit 42e10d010b
10 changed files with 94 additions and 57 deletions

View File

@ -41,6 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "TrellisPathList.h"
#include "StaticData.h"
#include "DummyScoreProducers.h"
#include "FeatureVector.h"
#include "InputFileStream.h"
using namespace std;
@ -310,9 +311,6 @@ void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, b
}
}
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId)
{
const StaticData &staticData = StaticData::Instance();
@ -336,38 +334,8 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con
}
out << " |||";
std::string lastName = "";
const vector<const StatefulFeatureFunction*>& sff =
system->GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ )
{
if( labeledOutput && lastName != sff[i]->GetScoreProducerWeightShortName() )
{
lastName = sff[i]->GetScoreProducerWeightShortName();
out << " " << lastName << ":";
}
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( sff[i] );
for (size_t j = 0; j<scores.size(); ++j)
{
out << " " << scores[j];
}
}
const vector<const StatelessFeatureFunction*>& slf =
system->GetStatelessFeatureFunctions();
for( size_t i=0; i<slf.size(); i++ )
{
if( labeledOutput && lastName != slf[i]->GetScoreProducerWeightShortName() )
{
lastName = slf[i]->GetScoreProducerWeightShortName();
out << " " << lastName << ":";
}
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( slf[i] );
for (size_t j = 0; j<scores.size(); ++j)
{
out << " " << scores[j];
}
}
// print scores with feature names
OutputAllFeatureScores( out, system, path );
// translation components
if (StaticData::Instance().GetInputType()==SentenceInput){
@ -482,8 +450,62 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con
out << endl;
}
out << std::flush;
}
out <<std::flush;
void OutputAllFeatureScores( std::ostream& out, const TranslationSystem* system, const TrellisPath &path ) {
std::string lastName = "";
const vector<const StatefulFeatureFunction*>& sff = system->GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ )
OutputFeatureScores( out, path, sff[i], lastName );
const vector<const StatelessFeatureFunction*>& slf = system->GetStatelessFeatureFunctions();
for( size_t i=0; i<slf.size(); i++ )
OutputFeatureScores( out, path, slf[i], lastName );
}
void OutputFeatureScores( std::ostream& out, const TrellisPath &path, const FeatureFunction *ff, std::string &lastName )
{
const StaticData &staticData = StaticData::Instance();
bool labeledOutput = staticData.IsLabeledNBestList();
// regular features (not sparse)
if (ff->GetNumScoreComponents() != ScoreProducer::unlimited) {
if( labeledOutput && lastName != ff->GetScoreProducerWeightShortName() )
{
lastName = ff->GetScoreProducerWeightShortName();
out << " " << lastName << ":";
}
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( ff );
for (size_t j = 0; j<scores.size(); ++j)
{
out << " " << scores[j];
}
}
// sparse features
else {
const FVector scores = path.GetScoreBreakdown().GetVectorForProducer( ff );
// report weighted aggregate
if (! ff->GetSparseFeatureReporting()) {
const FVector &weights = staticData.GetAllWeights().GetScoresVector();
if (labeledOutput)
out << " " << ff->GetScoreProducerWeightShortName() << ":";
out << " " << scores.inner_product(weights);
}
// report each feature
else {
for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
if (i->second != 0) { // do not report zero-valued features
if (labeledOutput)
out << " " << i->first << ":";
out << " " << i->second;
}
}
}
}
}
void OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>& solutions,long translationId) {

View File

@ -117,6 +117,8 @@ bool ReadInput(IOWrapper &ioWrapper, Moses::InputTypeEnum inputType, Moses::Inpu
void OutputSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder ,bool reportSegmentation, bool reportAllFactors);
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>&,
const TranslationSystem* system, long translationId);
void OutputAllFeatureScores(std::ostream& out, const TranslationSystem* system, const TrellisPath &path);
void OutputFeatureScores(std::ostream& out, const TrellisPath &path, const FeatureFunction *ff, std::string &lastName);
void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId);
void OutputBestHypo(const std::vector<Moses::Word>& mbrBestHypo, long /*translationId*/,
bool reportSegmentation, bool reportAllFactors, std::ostream& out);

View File

@ -65,7 +65,6 @@ void fix(std::ostream& stream, size_t size) {
stream.precision(size);
}
/**
* Makes sure output goes in the correct order.
**/
@ -74,7 +73,6 @@ class OutputCollector {
OutputCollector(std::ostream* outStream= &cout, std::ostream* debugStream=&cerr) :
m_nextOutput(0),m_outStream(outStream),m_debugStream(debugStream) {}
/**
* Write or cache the output, as appropriate.
**/

View File

@ -92,12 +92,8 @@ namespace Moses {
}
};
class ProxyFVector;
/**
* A sparse feature (or weight) vector.
**/
@ -120,10 +116,8 @@ namespace Moses {
static FName DEFAULT_NAME;
static const FValue DEFAULT;
void clear();
bool hasNonDefaultValue(FName name) const { return m_features.find(name) != m_features.end();}
/** Load from file - each line should be 'root[_name] value' */
@ -144,8 +138,6 @@ namespace Moses {
FValue inner_product(const FVector& rhs) const;
friend class ProxyFVector;
/**arithmetic */

View File

@ -249,7 +249,6 @@ void Manager::CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct) co
{
ret.Add(path);
}
if(onlyDistinct)
{

View File

@ -143,8 +143,8 @@ Parameter::Parameter()
AddParam("phrase-boundary-source-feature", "Source factors for phrase boundary feature");
AddParam("phrase-boundary-target-feature", "Target factors for phrase boundary feature");
AddParam("phrase-length-feature", "Binary features for source length, target length, both of each phrase");
AddParam("show-weights", "print feature weights and exit");
AddParam("report-sparse-features", "Indicate which sparse feature functions should report detailed scores in n-best, instead of aggregate");
AddParam("show-weights", "print feature weights and exit");
}
Parameter::~Parameter()

View File

@ -24,6 +24,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <numeric>
#include <cassert>
#include <sstream>
#ifdef MPI_ENABLE
#include <boost/serialization/access.hpp>
@ -79,7 +80,7 @@ public:
return m_scores.load(filename);
}
FVector GetScoresVector()
FVector GetScoresVector() const
{
return m_scores;
}
@ -216,6 +217,20 @@ public:
return res;
}
//! get subset of scores that belong to a certain sparse ScoreProducer
FVector GetVectorForProducer(const ScoreProducer* sp) const
{
FVector fv;
std::string prefix = sp->GetScoreProducerWeightShortName() + FName::SEP;
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
std::stringstream name;
name << i->first;
if (name.str().substr( 0, prefix.length() ).compare( prefix ) == 0)
fv[i->first] = i->second;
}
return fv;
}
void ApplyLog(size_t baseOfLog) {
m_scores.applyLog(baseOfLog);
}
@ -289,11 +304,9 @@ public:
}
BOOST_SERIALIZATION_SPLIT_MEMBER()
#endif
};

View File

@ -13,7 +13,8 @@ namespace Moses
multiset<string> ScoreProducer::description_counts;
const size_t ScoreProducer::unlimited = -1;
ScoreProducer::ScoreProducer(const std::string& description)
ScoreProducer::ScoreProducer(const std::string& description)
: m_reportSparseFeatures(false)
{
description_counts.insert(description);
size_t count = description_counts.count(description);

View File

@ -21,6 +21,7 @@ class ScoreProducer
private:
mutable std::vector<FName> m_names; //for features with fixed number of values
std::string m_description;
bool m_reportSparseFeatures;
//In case there's multiple producers with the same description
static std::multiset<std::string> description_counts;
ScoreProducer(const ScoreProducer&); // don't implement
@ -51,6 +52,8 @@ public:
virtual bool IsStateless() const = 0;
void SetSparseFeatureReporting() { m_reportSparseFeatures = true; }
bool GetSparseFeatureReporting() const { return m_reportSparseFeatures; }
};

View File

@ -231,7 +231,7 @@ bool StaticData::LoadData(Parameter *parameter)
// print all factors of output translations
SetBooleanParameter( &m_reportAllFactorsNBest, "report-all-factors-in-n-best", false );
//
// caching of translation options
if (m_inputType == SentenceInput)
{
SetBooleanParameter( &m_useTransOptCache, "use-persistent-cache", true );
@ -462,6 +462,15 @@ bool StaticData::LoadData(Parameter *parameter)
if (!LoadPhraseBoundaryFeature()) return false;
if (!LoadPhraseLengthFeature()) return false;
// report individual sparse features in n-best list
if (m_parameter->GetParam("report-sparse-features").size() > 0) {
for(size_t i=0; i<m_parameter->GetParam("report-sparse-features").size(); i++) {
const std::string &name = m_parameter->GetParam("report-sparse-features")[i];
if (m_phraseLengthFeature && name.compare(m_phraseLengthFeature->GetScoreProducerWeightShortName()) == 0)
m_phraseLengthFeature->SetSparseFeatureReporting();
}
}
//configure the translation systems with these tables
vector<string> tsConfig = m_parameter->GetParam("translation-systems");
if (!tsConfig.size()) {
@ -555,9 +564,8 @@ bool StaticData::LoadData(Parameter *parameter)
if (m_phraseLengthFeature) {
m_translationSystems.find(config[0])->second.AddFeatureFunction(m_phraseLengthFeature);
}
}
//Load extra feature weights
//NB: These are common to all translation systems (at the moment!)
vector<string> extraWeightConfig = m_parameter->GetParam("weight-file");
@ -577,7 +585,6 @@ bool StaticData::LoadData(Parameter *parameter)
m_allWeights.PlusEquals(extraWeights);
}
return true;
}