Merge branch 'master' into hieu

This commit is contained in:
Hieu Hoang 2014-01-21 14:10:57 +00:00
commit 4bf5c04b1b
13 changed files with 123 additions and 18 deletions

View File

@ -1264,12 +1264,12 @@
<link>
<name>FF/SoftMatchingFeature.cpp</name>
<type>1</type>
<locationURI>PARENT-9-PROJECT_LOC/home/s0565741/workspace/github/mosesdecoder/moses/FF/SoftMatchingFeature.cpp</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SoftMatchingFeature.cpp</locationURI>
</link>
<link>
<name>FF/SoftMatchingFeature.h</name>
<type>1</type>
<locationURI>PARENT-9-PROJECT_LOC/home/s0565741/workspace/github/mosesdecoder/moses/FF/SoftMatchingFeature.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SoftMatchingFeature.h</locationURI>
</link>
<link>
<name>FF/SourceWordDeletionFeature.cpp</name>
@ -1304,12 +1304,12 @@
<link>
<name>FF/SyntaxConstraintFeature.cpp</name>
<type>1</type>
<locationURI>PARENT-9-PROJECT_LOC/home/s0565741/workspace/github/mosesdecoder/moses/FF/SyntaxConstraintFeature.cpp</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SyntaxConstraintFeature.cpp</locationURI>
</link>
<link>
<name>FF/SyntaxConstraintFeature.h</name>
<type>1</type>
<locationURI>PARENT-9-PROJECT_LOC/home/s0565741/workspace/github/mosesdecoder/moses/FF/SyntaxConstraintFeature.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SyntaxConstraintFeature.h</locationURI>
</link>
<link>
<name>FF/TargetBigramFeature.cpp</name>

View File

@ -223,12 +223,18 @@ static void ShowWeights()
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
}
else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}
for (size_t i = 0; i < slf.size(); ++i) {
const StatelessFeatureFunction *ff = slf[i];
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
}
else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}
}

View File

@ -495,12 +495,18 @@ static void ShowWeights()
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
}
else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}
for (size_t i = 0; i < slf.size(); ++i) {
const StatelessFeatureFunction *ff = slf[i];
if (ff->IsTuneable()) {
PrintFeatureWeight(ff);
}
else {
cout << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl;
}
}
}

View File

@ -36,6 +36,7 @@
#include "moses/FF/CoveredReferenceFeature.h"
#include "moses/FF/SyntaxConstraintFeature.h"
#include "moses/FF/SoftMatchingFeature.h"
#include "moses/FF/HyperParameterAsWeight.h"
#include "moses/FF/SkeletonStatelessFF.h"
#include "moses/FF/SkeletonStatefulFF.h"
@ -108,7 +109,7 @@ template <class F> void FeatureFactory::DefaultSetup(F *feature)
// if it's tuneable, ini file MUST have weights
// even it it's not tuneable, people can still set the weights in the ini file
static_data.SetWeights(feature, weights);
} else {
} else if (feature->GetNumScoreComponents() > 0) {
std::vector<float> defaultWeights = feature->DefaultWeights();
static_data.SetWeights(feature, defaultWeights);
}
@ -175,6 +176,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(ExternalFeature);
MOSES_FNAME(SyntaxConstraintFeature);
MOSES_FNAME(SoftMatchingFeature);
MOSES_FNAME(HyperParameterAsWeight);
MOSES_FNAME(SkeletonStatelessFF);
MOSES_FNAME(SkeletonStatefulFF);

View File

@ -14,6 +14,22 @@ class HyperParameterAsWeight : public StatelessFeatureFunction
public:
HyperParameterAsWeight(const std::string &line);
virtual bool IsUseable(const FactorMask &mask) const
{ return true; }
virtual void Evaluate(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
virtual void Evaluate(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{}
virtual void Evaluate(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}

View File

@ -11,17 +11,24 @@ namespace Moses
SoftMatchingFeature::SoftMatchingFeature(const std::string &line)
: StatelessFeatureFunction(0, line)
{
std::cerr << "Initializing SoftMatchingFeature.." << std::endl;
for (size_t i = 0; i < m_args.size(); ++i) {
const std::vector<std::string> &args = m_args[i];
if (args[0] == "path") {
const std::string filePath = args[1];
Load(filePath);
}
} // for
ReadParameters();
}
void SoftMatchingFeature::SetParameter(const std::string& key, const std::string& value)
{
std::cerr << "setting: " << this->GetScoreProducerDescription() << " - " << key << "\n";
if (key == "tuneable") {
m_tuneable = Scan<bool>(value);
} else if (key == "filterable") { //ignore
} else if (key == "path") {
const std::string filePath = value;
Load(filePath);
} else {
UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
}
}
bool SoftMatchingFeature::Load(const std::string& filePath)
{
@ -103,6 +110,5 @@ const std::string& SoftMatchingFeature::GetFeatureName(const Word& LHS, const Wo
return m_soft_matching_cache.find(key)->second;
}
}

View File

@ -48,6 +48,7 @@ public:
}
const std::string& GetFeatureName(const Word& LHS, const Word& RHS) const;
void SetParameter(const std::string& key, const std::string& value);
private:
std::map<Word, std::set<Word> > m_soft_matches; // map LHS of old rule to RHS of new rle

View File

@ -75,7 +75,9 @@ class SyntaxConstraintFeature : public StatefulFeatureFunction
{
public:
SyntaxConstraintFeature(const std::string &line)
:StatefulFeatureFunction(0, line) {}
:StatefulFeatureFunction(0, line) {
ReadParameters();
}
virtual const FFState* EmptyHypothesisState(const InputType &input) const {
return new TreeState(TreePointer());

View File

@ -115,6 +115,9 @@ public:
}
std::vector<float> GetWeights(const std::string &name);
std::map<std::string, std::vector<float> > GetAllWeights() const {
return m_weights;
}
std::set<std::string> GetWeightNames() const;
const PARAM_MAP &GetParams() const {

View File

@ -534,6 +534,7 @@ bool StaticData::LoadData(Parameter *parameter)
if (!LoadDecodeGraphs()) return false;
if (!CheckWeights()) {
return false;
}
@ -555,6 +556,9 @@ bool StaticData::LoadData(Parameter *parameter)
m_allWeights.PlusEquals(extraWeights);
}
//Load sparse features from config (overrules weight file)
LoadSparseWeightsFromConfig();
// alternate weight settings
if (m_parameter->GetParam("alternate-weight-setting").size() > 0) {
if (!LoadAlternateWeightSettings()) {
@ -933,11 +937,13 @@ void StaticData::LoadFeatureFunctions()
bool StaticData::CheckWeights() const
{
set<string> weightNames = m_parameter->GetWeightNames();
set<string> featureNames;
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
for (size_t i = 0; i < ffs.size(); ++i) {
const FeatureFunction &ff = *ffs[i];
const string &descr = ff.GetScoreProducerDescription();
featureNames.insert(descr);
set<string>::iterator iter = weightNames.find(descr);
if (iter == weightNames.end()) {
@ -947,6 +953,21 @@ bool StaticData::CheckWeights() const
}
}
//sparse features
if (!weightNames.empty()) {
set<string>::iterator iter;
for (iter = weightNames.begin(); iter != weightNames.end(); ) {
string fname = (*iter).substr(0, (*iter).find("_"));
cerr << fname << "\n";
if (featureNames.find(fname) != featureNames.end()) {
weightNames.erase(iter++);
}
else {
++iter;
}
}
}
if (!weightNames.empty()) {
cerr << "The following weights have no feature function. Maybe incorrectly spelt weights: ";
set<string>::iterator iter;
@ -959,6 +980,29 @@ bool StaticData::CheckWeights() const
return true;
}
void StaticData::LoadSparseWeightsFromConfig() {
set<string> featureNames;
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
for (size_t i = 0; i < ffs.size(); ++i) {
const FeatureFunction &ff = *ffs[i];
const string &descr = ff.GetScoreProducerDescription();
featureNames.insert(descr);
}
std::map<std::string, std::vector<float> > weights = m_parameter->GetAllWeights();
std::map<std::string, std::vector<float> >::iterator iter;
for (iter = weights.begin(); iter != weights.end(); ++iter) {
// this indicates that it is sparse feature
if (featureNames.find(iter->first) == featureNames.end()) {
UTIL_THROW_IF2(iter->second.size() != 1, "ERROR: only one weight per sparse feature allowed: " << iter->first);
m_allWeights.Assign(iter->first, iter->second[0]);
}
}
}
/**! Read in settings for alternative weights */
bool StaticData::LoadAlternateWeightSettings()
{

View File

@ -715,6 +715,7 @@ public:
void LoadFeatureFunctions();
bool CheckWeights() const;
void LoadSparseWeightsFromConfig();
bool LoadWeightSettings();
bool LoadAlternateWeightSettings();

View File

@ -138,7 +138,7 @@ string unescape(const string& str)
s += string("[");
} else if (name == "ket") {
s += string("]");
} else if (name == "bar") {
} else if (name == "bar" || name == "#124") {
s += string("|");
} else if (name == "amp") {
s += string("&");

View File

@ -1278,6 +1278,7 @@ sub get_featlist_from_file {
# read feature list
my @names = ();
my @startvalues = ();
my @untuneables = ();
open my $fh, '<', $featlistfn or die "Can't read $featlistfn : $!";
my $nr = 0;
my @errs = ();
@ -1296,6 +1297,10 @@ sub get_featlist_from_file {
push @startvalues, $value;
}
}
elsif (/^(\S+) UNTUNEABLE$/) {
my ($longname) = ($1);
push @untuneables, $longname;
}
}
close $fh;
@ -1303,7 +1308,7 @@ sub get_featlist_from_file {
warn join("", @errs);
exit 1;
}
return {"names"=>\@names, "values"=>\@startvalues};
return {"names"=>\@names, "values"=>\@startvalues, "untuneables"=>\@untuneables};
}
@ -1353,6 +1358,8 @@ sub create_config {
my $bleu_achieved = shift; # just for verbosity
my $sparse_weights_file = shift; # only defined when optimizing sparse features
my @keep_weights = ();
for (my $i = 0; $i < scalar(@{$featlist->{"names"}}); $i++) {
my $name = $featlist->{"names"}->[$i];
my $val = $featlist->{"values"}->[$i];
@ -1415,6 +1422,13 @@ sub create_config {
# leave weights 'til last. We're changing it
while ($line = <$ini_fh>) {
last if $line =~ /^\[/;
if ($line =~ /^([^_=\s]+)/) {
for( @{$featlist->{"untuneables"}} ){
if ($1 eq $_ ) {# if weight is untuneable, copy it into new config
push @keep_weights, $line;
}
}
}
}
}
elsif (defined($P{$parameter})) {
@ -1461,6 +1475,10 @@ sub create_config {
}
print $out "$outStr\n";
for (@keep_weights) {
print $out $_;
}
close $ini_fh;
close $out;
print STDERR "Saved: $outfn\n";