mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-11 19:27:11 +03:00
added reporting of sparse features in n-best list
git-svn-id: http://svn.statmt.org/repository/mira@3926 cc96ff50-19ce-11e0-b349-13d7f0bd23df
This commit is contained in:
parent
b919b862b8
commit
42e10d010b
@ -41,6 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "TrellisPathList.h"
|
||||
#include "StaticData.h"
|
||||
#include "DummyScoreProducers.h"
|
||||
#include "FeatureVector.h"
|
||||
#include "InputFileStream.h"
|
||||
|
||||
using namespace std;
|
||||
@ -310,9 +311,6 @@ void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, b
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
@ -336,38 +334,8 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con
|
||||
}
|
||||
out << " |||";
|
||||
|
||||
std::string lastName = "";
|
||||
const vector<const StatefulFeatureFunction*>& sff =
|
||||
system->GetStatefulFeatureFunctions();
|
||||
for( size_t i=0; i<sff.size(); i++ )
|
||||
{
|
||||
if( labeledOutput && lastName != sff[i]->GetScoreProducerWeightShortName() )
|
||||
{
|
||||
lastName = sff[i]->GetScoreProducerWeightShortName();
|
||||
out << " " << lastName << ":";
|
||||
}
|
||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( sff[i] );
|
||||
for (size_t j = 0; j<scores.size(); ++j)
|
||||
{
|
||||
out << " " << scores[j];
|
||||
}
|
||||
}
|
||||
|
||||
const vector<const StatelessFeatureFunction*>& slf =
|
||||
system->GetStatelessFeatureFunctions();
|
||||
for( size_t i=0; i<slf.size(); i++ )
|
||||
{
|
||||
if( labeledOutput && lastName != slf[i]->GetScoreProducerWeightShortName() )
|
||||
{
|
||||
lastName = slf[i]->GetScoreProducerWeightShortName();
|
||||
out << " " << lastName << ":";
|
||||
}
|
||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( slf[i] );
|
||||
for (size_t j = 0; j<scores.size(); ++j)
|
||||
{
|
||||
out << " " << scores[j];
|
||||
}
|
||||
}
|
||||
// print scores with feature names
|
||||
OutputAllFeatureScores( out, system, path );
|
||||
|
||||
// translation components
|
||||
if (StaticData::Instance().GetInputType()==SentenceInput){
|
||||
@ -482,8 +450,62 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con
|
||||
out << endl;
|
||||
}
|
||||
|
||||
out << std::flush;
|
||||
}
|
||||
|
||||
out <<std::flush;
|
||||
void OutputAllFeatureScores( std::ostream& out, const TranslationSystem* system, const TrellisPath &path ) {
|
||||
std::string lastName = "";
|
||||
const vector<const StatefulFeatureFunction*>& sff = system->GetStatefulFeatureFunctions();
|
||||
for( size_t i=0; i<sff.size(); i++ )
|
||||
OutputFeatureScores( out, path, sff[i], lastName );
|
||||
|
||||
const vector<const StatelessFeatureFunction*>& slf = system->GetStatelessFeatureFunctions();
|
||||
for( size_t i=0; i<slf.size(); i++ )
|
||||
OutputFeatureScores( out, path, slf[i], lastName );
|
||||
}
|
||||
|
||||
void OutputFeatureScores( std::ostream& out, const TrellisPath &path, const FeatureFunction *ff, std::string &lastName )
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
bool labeledOutput = staticData.IsLabeledNBestList();
|
||||
|
||||
// regular features (not sparse)
|
||||
if (ff->GetNumScoreComponents() != ScoreProducer::unlimited) {
|
||||
if( labeledOutput && lastName != ff->GetScoreProducerWeightShortName() )
|
||||
{
|
||||
lastName = ff->GetScoreProducerWeightShortName();
|
||||
out << " " << lastName << ":";
|
||||
}
|
||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( ff );
|
||||
for (size_t j = 0; j<scores.size(); ++j)
|
||||
{
|
||||
out << " " << scores[j];
|
||||
}
|
||||
}
|
||||
|
||||
// sparse features
|
||||
else {
|
||||
const FVector scores = path.GetScoreBreakdown().GetVectorForProducer( ff );
|
||||
|
||||
// report weighted aggregate
|
||||
if (! ff->GetSparseFeatureReporting()) {
|
||||
const FVector &weights = staticData.GetAllWeights().GetScoresVector();
|
||||
if (labeledOutput)
|
||||
out << " " << ff->GetScoreProducerWeightShortName() << ":";
|
||||
out << " " << scores.inner_product(weights);
|
||||
}
|
||||
|
||||
// report each feature
|
||||
else {
|
||||
for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
|
||||
if (i->second != 0) { // do not report zero-valued features
|
||||
if (labeledOutput)
|
||||
out << " " << i->first << ":";
|
||||
out << " " << i->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>& solutions,long translationId) {
|
||||
|
@ -117,6 +117,8 @@ bool ReadInput(IOWrapper &ioWrapper, Moses::InputTypeEnum inputType, Moses::Inpu
|
||||
void OutputSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder ,bool reportSegmentation, bool reportAllFactors);
|
||||
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>&,
|
||||
const TranslationSystem* system, long translationId);
|
||||
void OutputAllFeatureScores(std::ostream& out, const TranslationSystem* system, const TrellisPath &path);
|
||||
void OutputFeatureScores(std::ostream& out, const TrellisPath &path, const FeatureFunction *ff, std::string &lastName);
|
||||
void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId);
|
||||
void OutputBestHypo(const std::vector<Moses::Word>& mbrBestHypo, long /*translationId*/,
|
||||
bool reportSegmentation, bool reportAllFactors, std::ostream& out);
|
||||
|
@ -65,7 +65,6 @@ void fix(std::ostream& stream, size_t size) {
|
||||
stream.precision(size);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Makes sure output goes in the correct order.
|
||||
**/
|
||||
@ -74,7 +73,6 @@ class OutputCollector {
|
||||
OutputCollector(std::ostream* outStream= &cout, std::ostream* debugStream=&cerr) :
|
||||
m_nextOutput(0),m_outStream(outStream),m_debugStream(debugStream) {}
|
||||
|
||||
|
||||
/**
|
||||
* Write or cache the output, as appropriate.
|
||||
**/
|
||||
|
@ -92,12 +92,8 @@ namespace Moses {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class ProxyFVector;
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* A sparse feature (or weight) vector.
|
||||
**/
|
||||
@ -120,10 +116,8 @@ namespace Moses {
|
||||
static FName DEFAULT_NAME;
|
||||
static const FValue DEFAULT;
|
||||
|
||||
|
||||
void clear();
|
||||
|
||||
|
||||
bool hasNonDefaultValue(FName name) const { return m_features.find(name) != m_features.end();}
|
||||
|
||||
/** Load from file - each line should be 'root[_name] value' */
|
||||
@ -144,8 +138,6 @@ namespace Moses {
|
||||
|
||||
FValue inner_product(const FVector& rhs) const;
|
||||
|
||||
|
||||
|
||||
friend class ProxyFVector;
|
||||
|
||||
/**arithmetic */
|
||||
|
@ -249,7 +249,6 @@ void Manager::CalcNBest(size_t count, TrellisPathList &ret,bool onlyDistinct) co
|
||||
{
|
||||
ret.Add(path);
|
||||
}
|
||||
|
||||
|
||||
if(onlyDistinct)
|
||||
{
|
||||
|
@ -143,8 +143,8 @@ Parameter::Parameter()
|
||||
AddParam("phrase-boundary-source-feature", "Source factors for phrase boundary feature");
|
||||
AddParam("phrase-boundary-target-feature", "Target factors for phrase boundary feature");
|
||||
AddParam("phrase-length-feature", "Binary features for source length, target length, both of each phrase");
|
||||
|
||||
AddParam("show-weights", "print feature weights and exit");
|
||||
AddParam("report-sparse-features", "Indicate which sparse feature functions should report detailed scores in n-best, instead of aggregate");
|
||||
AddParam("show-weights", "print feature weights and exit");
|
||||
}
|
||||
|
||||
Parameter::~Parameter()
|
||||
|
@ -24,6 +24,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
#include <numeric>
|
||||
#include <cassert>
|
||||
#include <sstream>
|
||||
|
||||
#ifdef MPI_ENABLE
|
||||
#include <boost/serialization/access.hpp>
|
||||
@ -79,7 +80,7 @@ public:
|
||||
return m_scores.load(filename);
|
||||
}
|
||||
|
||||
FVector GetScoresVector()
|
||||
FVector GetScoresVector() const
|
||||
{
|
||||
return m_scores;
|
||||
}
|
||||
@ -216,6 +217,20 @@ public:
|
||||
return res;
|
||||
}
|
||||
|
||||
//! get subset of scores that belong to a certain sparse ScoreProducer
|
||||
FVector GetVectorForProducer(const ScoreProducer* sp) const
|
||||
{
|
||||
FVector fv;
|
||||
std::string prefix = sp->GetScoreProducerWeightShortName() + FName::SEP;
|
||||
for(FVector::FNVmap::const_iterator i = m_scores.cbegin(); i != m_scores.cend(); i++) {
|
||||
std::stringstream name;
|
||||
name << i->first;
|
||||
if (name.str().substr( 0, prefix.length() ).compare( prefix ) == 0)
|
||||
fv[i->first] = i->second;
|
||||
}
|
||||
return fv;
|
||||
}
|
||||
|
||||
void ApplyLog(size_t baseOfLog) {
|
||||
m_scores.applyLog(baseOfLog);
|
||||
}
|
||||
@ -289,11 +304,9 @@ public:
|
||||
|
||||
}
|
||||
|
||||
|
||||
BOOST_SERIALIZATION_SPLIT_MEMBER()
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
@ -13,7 +13,8 @@ namespace Moses
|
||||
multiset<string> ScoreProducer::description_counts;
|
||||
const size_t ScoreProducer::unlimited = -1;
|
||||
|
||||
ScoreProducer::ScoreProducer(const std::string& description)
|
||||
ScoreProducer::ScoreProducer(const std::string& description)
|
||||
: m_reportSparseFeatures(false)
|
||||
{
|
||||
description_counts.insert(description);
|
||||
size_t count = description_counts.count(description);
|
||||
|
@ -21,6 +21,7 @@ class ScoreProducer
|
||||
private:
|
||||
mutable std::vector<FName> m_names; //for features with fixed number of values
|
||||
std::string m_description;
|
||||
bool m_reportSparseFeatures;
|
||||
//In case there's multiple producers with the same description
|
||||
static std::multiset<std::string> description_counts;
|
||||
ScoreProducer(const ScoreProducer&); // don't implement
|
||||
@ -51,6 +52,8 @@ public:
|
||||
|
||||
virtual bool IsStateless() const = 0;
|
||||
|
||||
void SetSparseFeatureReporting() { m_reportSparseFeatures = true; }
|
||||
bool GetSparseFeatureReporting() const { return m_reportSparseFeatures; }
|
||||
};
|
||||
|
||||
|
||||
|
@ -231,7 +231,7 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
// print all factors of output translations
|
||||
SetBooleanParameter( &m_reportAllFactorsNBest, "report-all-factors-in-n-best", false );
|
||||
|
||||
//
|
||||
// caching of translation options
|
||||
if (m_inputType == SentenceInput)
|
||||
{
|
||||
SetBooleanParameter( &m_useTransOptCache, "use-persistent-cache", true );
|
||||
@ -462,6 +462,15 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
if (!LoadPhraseBoundaryFeature()) return false;
|
||||
if (!LoadPhraseLengthFeature()) return false;
|
||||
|
||||
// report individual sparse features in n-best list
|
||||
if (m_parameter->GetParam("report-sparse-features").size() > 0) {
|
||||
for(size_t i=0; i<m_parameter->GetParam("report-sparse-features").size(); i++) {
|
||||
const std::string &name = m_parameter->GetParam("report-sparse-features")[i];
|
||||
if (m_phraseLengthFeature && name.compare(m_phraseLengthFeature->GetScoreProducerWeightShortName()) == 0)
|
||||
m_phraseLengthFeature->SetSparseFeatureReporting();
|
||||
}
|
||||
}
|
||||
|
||||
//configure the translation systems with these tables
|
||||
vector<string> tsConfig = m_parameter->GetParam("translation-systems");
|
||||
if (!tsConfig.size()) {
|
||||
@ -555,9 +564,8 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
if (m_phraseLengthFeature) {
|
||||
m_translationSystems.find(config[0])->second.AddFeatureFunction(m_phraseLengthFeature);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//Load extra feature weights
|
||||
//NB: These are common to all translation systems (at the moment!)
|
||||
vector<string> extraWeightConfig = m_parameter->GetParam("weight-file");
|
||||
@ -577,7 +585,6 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
m_allWeights.PlusEquals(extraWeights);
|
||||
}
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user