mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-11-09 16:04:41 +03:00
Merge branch 'master' into hieu
This commit is contained in:
commit
4bf5c04b1b
@ -1264,12 +1264,12 @@
|
||||
<link>
|
||||
<name>FF/SoftMatchingFeature.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-9-PROJECT_LOC/home/s0565741/workspace/github/mosesdecoder/moses/FF/SoftMatchingFeature.cpp</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SoftMatchingFeature.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/SoftMatchingFeature.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-9-PROJECT_LOC/home/s0565741/workspace/github/mosesdecoder/moses/FF/SoftMatchingFeature.h</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SoftMatchingFeature.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/SourceWordDeletionFeature.cpp</name>
|
||||
@ -1304,12 +1304,12 @@
|
||||
<link>
|
||||
<name>FF/SyntaxConstraintFeature.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-9-PROJECT_LOC/home/s0565741/workspace/github/mosesdecoder/moses/FF/SyntaxConstraintFeature.cpp</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SyntaxConstraintFeature.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/SyntaxConstraintFeature.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-9-PROJECT_LOC/home/s0565741/workspace/github/mosesdecoder/moses/FF/SyntaxConstraintFeature.h</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SyntaxConstraintFeature.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/TargetBigramFeature.cpp</name>
|
||||
|
@ -223,12 +223,18 @@ static void ShowWeights()
|
||||
if (ff->IsTuneable()) {
|
||||
PrintFeatureWeight(ff);
|
||||
}
|
||||
else {
|
||||
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < slf.size(); ++i) {
|
||||
const StatelessFeatureFunction *ff = slf[i];
|
||||
if (ff->IsTuneable()) {
|
||||
PrintFeatureWeight(ff);
|
||||
}
|
||||
else {
|
||||
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -495,12 +495,18 @@ static void ShowWeights()
|
||||
if (ff->IsTuneable()) {
|
||||
PrintFeatureWeight(ff);
|
||||
}
|
||||
else {
|
||||
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < slf.size(); ++i) {
|
||||
const StatelessFeatureFunction *ff = slf[i];
|
||||
if (ff->IsTuneable()) {
|
||||
PrintFeatureWeight(ff);
|
||||
}
|
||||
else {
|
||||
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include "moses/FF/CoveredReferenceFeature.h"
|
||||
#include "moses/FF/SyntaxConstraintFeature.h"
|
||||
#include "moses/FF/SoftMatchingFeature.h"
|
||||
#include "moses/FF/HyperParameterAsWeight.h"
|
||||
|
||||
#include "moses/FF/SkeletonStatelessFF.h"
|
||||
#include "moses/FF/SkeletonStatefulFF.h"
|
||||
@ -108,7 +109,7 @@ template <class F> void FeatureFactory::DefaultSetup(F *feature)
|
||||
// if it's tuneable, ini file MUST have weights
|
||||
// even it it's not tuneable, people can still set the weights in the ini file
|
||||
static_data.SetWeights(feature, weights);
|
||||
} else {
|
||||
} else if (feature->GetNumScoreComponents() > 0) {
|
||||
std::vector<float> defaultWeights = feature->DefaultWeights();
|
||||
static_data.SetWeights(feature, defaultWeights);
|
||||
}
|
||||
@ -175,6 +176,7 @@ FeatureRegistry::FeatureRegistry()
|
||||
MOSES_FNAME(ExternalFeature);
|
||||
MOSES_FNAME(SyntaxConstraintFeature);
|
||||
MOSES_FNAME(SoftMatchingFeature);
|
||||
MOSES_FNAME(HyperParameterAsWeight);
|
||||
|
||||
MOSES_FNAME(SkeletonStatelessFF);
|
||||
MOSES_FNAME(SkeletonStatefulFF);
|
||||
|
@ -14,6 +14,22 @@ class HyperParameterAsWeight : public StatelessFeatureFunction
|
||||
public:
|
||||
HyperParameterAsWeight(const std::string &line);
|
||||
|
||||
virtual bool IsUseable(const FactorMask &mask) const
|
||||
{ return true; }
|
||||
|
||||
virtual void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{}
|
||||
|
||||
virtual void Evaluate(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
|
||||
virtual void Evaluate(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
|
@ -11,17 +11,24 @@ namespace Moses
|
||||
SoftMatchingFeature::SoftMatchingFeature(const std::string &line)
|
||||
: StatelessFeatureFunction(0, line)
|
||||
{
|
||||
std::cerr << "Initializing SoftMatchingFeature.." << std::endl;
|
||||
|
||||
for (size_t i = 0; i < m_args.size(); ++i) {
|
||||
const std::vector<std::string> &args = m_args[i];
|
||||
if (args[0] == "path") {
|
||||
const std::string filePath = args[1];
|
||||
Load(filePath);
|
||||
}
|
||||
} // for
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
void SoftMatchingFeature::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
std::cerr << "setting: " << this->GetScoreProducerDescription() << " - " << key << "\n";
|
||||
if (key == "tuneable") {
|
||||
m_tuneable = Scan<bool>(value);
|
||||
} else if (key == "filterable") { //ignore
|
||||
} else if (key == "path") {
|
||||
const std::string filePath = value;
|
||||
Load(filePath);
|
||||
} else {
|
||||
UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool SoftMatchingFeature::Load(const std::string& filePath)
|
||||
{
|
||||
|
||||
@ -103,6 +110,5 @@ const std::string& SoftMatchingFeature::GetFeatureName(const Word& LHS, const Wo
|
||||
return m_soft_matching_cache.find(key)->second;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@ -48,6 +48,7 @@ public:
|
||||
}
|
||||
|
||||
const std::string& GetFeatureName(const Word& LHS, const Word& RHS) const;
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
private:
|
||||
std::map<Word, std::set<Word> > m_soft_matches; // map LHS of old rule to RHS of new rle
|
||||
|
@ -75,7 +75,9 @@ class SyntaxConstraintFeature : public StatefulFeatureFunction
|
||||
{
|
||||
public:
|
||||
SyntaxConstraintFeature(const std::string &line)
|
||||
:StatefulFeatureFunction(0, line) {}
|
||||
:StatefulFeatureFunction(0, line) {
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
virtual const FFState* EmptyHypothesisState(const InputType &input) const {
|
||||
return new TreeState(TreePointer());
|
||||
|
@ -115,6 +115,9 @@ public:
|
||||
}
|
||||
|
||||
std::vector<float> GetWeights(const std::string &name);
|
||||
std::map<std::string, std::vector<float> > GetAllWeights() const {
|
||||
return m_weights;
|
||||
}
|
||||
std::set<std::string> GetWeightNames() const;
|
||||
|
||||
const PARAM_MAP &GetParams() const {
|
||||
|
@ -534,6 +534,7 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
|
||||
if (!LoadDecodeGraphs()) return false;
|
||||
|
||||
|
||||
if (!CheckWeights()) {
|
||||
return false;
|
||||
}
|
||||
@ -555,6 +556,9 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
m_allWeights.PlusEquals(extraWeights);
|
||||
}
|
||||
|
||||
//Load sparse features from config (overrules weight file)
|
||||
LoadSparseWeightsFromConfig();
|
||||
|
||||
// alternate weight settings
|
||||
if (m_parameter->GetParam("alternate-weight-setting").size() > 0) {
|
||||
if (!LoadAlternateWeightSettings()) {
|
||||
@ -933,11 +937,13 @@ void StaticData::LoadFeatureFunctions()
|
||||
bool StaticData::CheckWeights() const
|
||||
{
|
||||
set<string> weightNames = m_parameter->GetWeightNames();
|
||||
set<string> featureNames;
|
||||
|
||||
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
|
||||
for (size_t i = 0; i < ffs.size(); ++i) {
|
||||
const FeatureFunction &ff = *ffs[i];
|
||||
const string &descr = ff.GetScoreProducerDescription();
|
||||
featureNames.insert(descr);
|
||||
|
||||
set<string>::iterator iter = weightNames.find(descr);
|
||||
if (iter == weightNames.end()) {
|
||||
@ -947,6 +953,21 @@ bool StaticData::CheckWeights() const
|
||||
}
|
||||
}
|
||||
|
||||
//sparse features
|
||||
if (!weightNames.empty()) {
|
||||
set<string>::iterator iter;
|
||||
for (iter = weightNames.begin(); iter != weightNames.end(); ) {
|
||||
string fname = (*iter).substr(0, (*iter).find("_"));
|
||||
cerr << fname << "\n";
|
||||
if (featureNames.find(fname) != featureNames.end()) {
|
||||
weightNames.erase(iter++);
|
||||
}
|
||||
else {
|
||||
++iter;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!weightNames.empty()) {
|
||||
cerr << "The following weights have no feature function. Maybe incorrectly spelt weights: ";
|
||||
set<string>::iterator iter;
|
||||
@ -959,6 +980,29 @@ bool StaticData::CheckWeights() const
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void StaticData::LoadSparseWeightsFromConfig() {
|
||||
set<string> featureNames;
|
||||
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
|
||||
for (size_t i = 0; i < ffs.size(); ++i) {
|
||||
const FeatureFunction &ff = *ffs[i];
|
||||
const string &descr = ff.GetScoreProducerDescription();
|
||||
featureNames.insert(descr);
|
||||
}
|
||||
|
||||
std::map<std::string, std::vector<float> > weights = m_parameter->GetAllWeights();
|
||||
std::map<std::string, std::vector<float> >::iterator iter;
|
||||
for (iter = weights.begin(); iter != weights.end(); ++iter) {
|
||||
// this indicates that it is sparse feature
|
||||
if (featureNames.find(iter->first) == featureNames.end()) {
|
||||
UTIL_THROW_IF2(iter->second.size() != 1, "ERROR: only one weight per sparse feature allowed: " << iter->first);
|
||||
m_allWeights.Assign(iter->first, iter->second[0]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**! Read in settings for alternative weights */
|
||||
bool StaticData::LoadAlternateWeightSettings()
|
||||
{
|
||||
|
@ -715,6 +715,7 @@ public:
|
||||
|
||||
void LoadFeatureFunctions();
|
||||
bool CheckWeights() const;
|
||||
void LoadSparseWeightsFromConfig();
|
||||
bool LoadWeightSettings();
|
||||
bool LoadAlternateWeightSettings();
|
||||
|
||||
|
@ -138,7 +138,7 @@ string unescape(const string& str)
|
||||
s += string("[");
|
||||
} else if (name == "ket") {
|
||||
s += string("]");
|
||||
} else if (name == "bar") {
|
||||
} else if (name == "bar" || name == "#124") {
|
||||
s += string("|");
|
||||
} else if (name == "amp") {
|
||||
s += string("&");
|
||||
|
@ -1278,6 +1278,7 @@ sub get_featlist_from_file {
|
||||
# read feature list
|
||||
my @names = ();
|
||||
my @startvalues = ();
|
||||
my @untuneables = ();
|
||||
open my $fh, '<', $featlistfn or die "Can't read $featlistfn : $!";
|
||||
my $nr = 0;
|
||||
my @errs = ();
|
||||
@ -1296,6 +1297,10 @@ sub get_featlist_from_file {
|
||||
push @startvalues, $value;
|
||||
}
|
||||
}
|
||||
elsif (/^(\S+) UNTUNEABLE$/) {
|
||||
my ($longname) = ($1);
|
||||
push @untuneables, $longname;
|
||||
}
|
||||
}
|
||||
close $fh;
|
||||
|
||||
@ -1303,7 +1308,7 @@ sub get_featlist_from_file {
|
||||
warn join("", @errs);
|
||||
exit 1;
|
||||
}
|
||||
return {"names"=>\@names, "values"=>\@startvalues};
|
||||
return {"names"=>\@names, "values"=>\@startvalues, "untuneables"=>\@untuneables};
|
||||
}
|
||||
|
||||
|
||||
@ -1353,6 +1358,8 @@ sub create_config {
|
||||
my $bleu_achieved = shift; # just for verbosity
|
||||
my $sparse_weights_file = shift; # only defined when optimizing sparse features
|
||||
|
||||
my @keep_weights = ();
|
||||
|
||||
for (my $i = 0; $i < scalar(@{$featlist->{"names"}}); $i++) {
|
||||
my $name = $featlist->{"names"}->[$i];
|
||||
my $val = $featlist->{"values"}->[$i];
|
||||
@ -1415,6 +1422,13 @@ sub create_config {
|
||||
# leave weights 'til last. We're changing it
|
||||
while ($line = <$ini_fh>) {
|
||||
last if $line =~ /^\[/;
|
||||
if ($line =~ /^([^_=\s]+)/) {
|
||||
for( @{$featlist->{"untuneables"}} ){
|
||||
if ($1 eq $_ ) {# if weight is untuneable, copy it into new config
|
||||
push @keep_weights, $line;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
elsif (defined($P{$parameter})) {
|
||||
@ -1461,6 +1475,10 @@ sub create_config {
|
||||
}
|
||||
print $out "$outStr\n";
|
||||
|
||||
for (@keep_weights) {
|
||||
print $out $_;
|
||||
}
|
||||
|
||||
close $ini_fh;
|
||||
close $out;
|
||||
print STDERR "Saved: $outfn\n";
|
||||
|
Loading…
Reference in New Issue
Block a user