tuneable-components config parameter for feature functions

This commit is contained in:
Matthias Huck 2015-02-09 13:52:05 +00:00
parent ce80e53b30
commit 53ce063214
6 changed files with 179 additions and 51 deletions

View File

@ -50,6 +50,7 @@ FeatureFunction(const std::string& line)
, m_verbosity(std::numeric_limits<std::size_t>::max())
, m_numScoreComponents(1)
{
m_numTuneableComponents = m_numScoreComponents;
Initialize(line);
}
@ -61,6 +62,7 @@ FeatureFunction(size_t numScoreComponents,
, m_verbosity(std::numeric_limits<std::size_t>::max())
, m_numScoreComponents(numScoreComponents)
{
m_numTuneableComponents = m_numScoreComponents;
Initialize(line);
}
@ -95,6 +97,7 @@ void FeatureFunction::ParseLine(const std::string &line)
if (args[0] == "num-features") {
m_numScoreComponents = Scan<size_t>(args[1]);
m_numTuneableComponents = m_numScoreComponents;
} else if (args[0] == "name") {
m_description = args[1];
} else {
@ -120,13 +123,17 @@ void FeatureFunction::SetParameter(const std::string& key, const std::string& va
{
if (key == "tuneable") {
m_tuneable = Scan<bool>(value);
} else if (key == "tuneable-components") {
UTIL_THROW_IF2(!m_tuneable, GetScoreProducerDescription()
<< ": tuneable-components cannot be set if tuneable=false");
SetTuneableComponents(value);
} else if (key == "require-sorting-after-source-context") {
m_requireSortingAfterSourceContext = Scan<bool>(value);
} else if (key == "verbosity") {
m_verbosity = Scan<size_t>(value);
} else if (key == "filterable") { //ignore
} else {
UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
UTIL_THROW2(GetScoreProducerDescription() << ": Unknown argument " << key << "=" << value);
}
}
@ -142,7 +149,27 @@ void FeatureFunction::ReadParameters()
std::vector<float> FeatureFunction::DefaultWeights() const
{
UTIL_THROW(util::Exception, "No default weights");
UTIL_THROW2(GetScoreProducerDescription() << ": No default weights");
}
void FeatureFunction::SetTuneableComponents(const std::string& value)
{
std::vector<std::string> toks = Tokenize(value,",");
UTIL_THROW_IF2(toks.empty(), GetScoreProducerDescription()
<< ": Empty tuneable-components");
UTIL_THROW_IF2(toks.size()!=m_numScoreComponents, GetScoreProducerDescription()
<< ": tuneable-components value has to be a comma-separated list of "
<< m_numScoreComponents << " boolean values");
m_tuneableComponents.resize(m_numScoreComponents);
m_numTuneableComponents = m_numScoreComponents;
for (size_t i = 0; i < toks.size(); ++i) {
m_tuneableComponents[i] = Scan<bool>(toks[i]);
if (!m_tuneableComponents[i]) {
--m_numTuneableComponents;
}
}
}
}

View File

@ -39,6 +39,8 @@ protected:
bool m_requireSortingAfterSourceContext;
size_t m_verbosity;
size_t m_numScoreComponents;
std::vector<bool> m_tuneableComponents;
size_t m_numTuneableComponents;
//In case there's multiple producers with the same description
static std::multiset<std::string> description_counts;
@ -90,6 +92,17 @@ public:
return m_tuneable;
}
virtual bool HasTuneableComponents() const {
return m_numTuneableComponents;
}
virtual bool IsTuneableComponent(size_t i) const {
if (m_numTuneableComponents == m_numScoreComponents) {
return true;
}
return m_tuneableComponents[i];
}
virtual bool RequireSortingAfterSourceContext() const {
return m_requireSortingAfterSourceContext;
}
@ -151,6 +164,7 @@ public:
virtual void SetParameter(const std::string& key, const std::string& value);
virtual void ReadParameters();
virtual void SetTuneableComponents(const std::string& value);
};
}

View File

@ -970,11 +970,13 @@ void Parameter::WeightOverwrite()
// should only be on 1 line
UTIL_THROW_IF2(vec.size() != 1,
"Weight override should only be on 1 line");
"weight-overwrite should only be on 1 line");
string name("");
vector<float> weights;
vector<string> toks = Tokenize(vec[0]);
size_t cnt = 0;
const std::vector<float>* oldWeights = NULL;
for (size_t i = 0; i < toks.size(); ++i) {
const string &tok = toks[i];
@ -988,10 +990,24 @@ void Parameter::WeightOverwrite()
}
name = tok.substr(0, tok.size() - 1);
std::map<std::string, std::vector<float> >::const_iterator found = m_weights.find(name);
if (found!=m_weights.end()) {
oldWeights = &(found->second);
} else {
oldWeights = NULL;
}
cnt = 0;
} else {
// a weight for curr ff
float weight = Scan<float>(toks[i]);
weights.push_back(weight);
if (toks[i] == "x") {
UTIL_THROW_IF2(!oldWeights || cnt>=oldWeights->size(),
"Keeping previous weight failed in weight-overwrite");
weights.push_back(oldWeights->at(cnt));
} else {
float weight = Scan<float>(toks[i]);
weights.push_back(weight);
}
++cnt;
}
}

View File

@ -332,14 +332,16 @@ void ScoreComponentCollection::OutputFeatureScores( std::ostream& out
bool labeledOutput = staticData.IsLabeledNBestList();
// regular features (not sparse)
if (ff->GetNumScoreComponents() != 0) {
if (ff->HasTuneableComponents()) {
if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) {
lastName = ff->GetScoreProducerDescription();
out << " " << lastName << "=";
}
vector<float> scores = GetScoresForProducer( ff );
for (size_t j = 0; j<scores.size(); ++j) {
out << " " << scores[j];
if (ff->IsTuneableComponent(j)) {
out << " " << scores[j];
}
}
}

View File

@ -348,7 +348,11 @@ void PrintFeatureWeight(const FeatureFunction* ff)
size_t numScoreComps = ff->GetNumScoreComponents();
vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
for (size_t i = 0; i < numScoreComps; ++i) {
cout << " " << values[i];
if (ff->IsTuneableComponent(i)) {
cout << " " << values[i];
} else {
cout << " UNTUNEABLECOMPONENT";
}
}
cout << endl;

View File

@ -576,8 +576,9 @@ if ($___FILTER_PHRASE_TABLE) {
my $featlist = get_featlist_from_moses($___CONFIG);
$featlist = insert_ranges_to_featlist($featlist, $___RANGES);
# Mark which features are disabled:
# Mark which features are disabled
if (defined $___ACTIVATE_FEATURES) {
$featlist->{"enabled"} = undef;
my %enabled = map { ($_, 1) } split /[, ]+/, $___ACTIVATE_FEATURES;
my %cnt;
for (my $i = 0; $i < scalar(@{$featlist->{"names"}}); $i++) {
@ -1145,6 +1146,8 @@ if($___RETURN_BEST_DEV) {
}
my $best_featlist = get_featlist_from_file("run$bestit.dense");
$best_featlist->{"untuneables"} = $featlist->{"untuneables"};
$best_featlist->{"allcomponentsuntuneable"} = $featlist->{"allcomponentsuntuneable"};
$best_featlist->{"skippeduntuneablecomponents"} = $featlist->{"skippeduntuneablecomponents"};
create_config($___CONFIG_ORIG, "./moses.ini", $best_featlist,
$bestit, $bestbleu, $best_sparse_file);
}
@ -1235,10 +1238,26 @@ sub run_decoder {
}
# moses now does not seem accept "-tm X -tm Y" but needs "-tm X Y"
my %model_weights;
my $valcnt = 0;
my $offset = 0;
for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
my $name = $featlist->{"names"}->[$i];
$model_weights{$name} = "$name=" if !defined $model_weights{$name};
if (!defined $model_weights{$name}) {
$model_weights{$name} = "$name=";
$valcnt = 0;
while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) {
#$model_weights{$name} .= sprintf " %.6f", $oldvalues{$name}{$valcnt+$offset};
$model_weights{$name} .= sprintf " x";
$offset++;
}
}
$model_weights{$name} .= sprintf " %.6f", $vals[$i];
$valcnt++;
while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) {
#$model_weights{$name} .= sprintf " %.6f", $oldvalues{$name}{$valcnt+$offset};
$model_weights{$name} .= sprintf " x";
$offset++;
}
}
my $decoder_config = "";
$decoder_config = "-weight-overwrite '" . join(" ", values %model_weights) ."'" unless $___USE_CONFIG_WEIGHTS_FIRST && $run==1;
@ -1362,8 +1381,11 @@ sub get_featlist_from_file {
my @names = ();
my @startvalues = ();
my @untuneables = ();
my @allcomponentsuntuneable = ();
my %skippeduntuneablecomponents = ();
open my $fh, '<', $featlistfn or die "Can't read $featlistfn : $!";
my $nr = 0;
my $i = 0;
my @errs = ();
while (<$fh>) {
$nr++;
@ -1373,11 +1395,25 @@ sub get_featlist_from_file {
next if (!defined($valuesStr));
my @values = split(/ /, $valuesStr);
foreach my $value (@values) {
push @errs, "$featlistfn:$nr:Bad initial value of $longname: $value\n"
if $value !~ /^[+-]?[0-9.\-e]+$/;
push @names, $longname;
push @startvalues, $value;
my $valcnt = 0;
my $hastuneablecomponent = 0;
foreach my $value (@values) {
if ($value =~ /^UNTUNEABLECOMPONENT$/) {
$skippeduntuneablecomponents{$longname}{$valcnt} = 1;
$i++;
$valcnt++;
} elsif ($value =~ /^[+-]?[0-9.\-e]+$/) {
push @names, $longname;
push @startvalues, $value;
$i++;
$valcnt++;
$hastuneablecomponent = 1;
} else {
push @errs, "$featlistfn:$nr:Bad initial value of $longname: $value\n"
}
}
if (!$hastuneablecomponent) {
push @allcomponentsuntuneable, $longname;
}
}
elsif (/^(\S+) UNTUNEABLE$/) {
@ -1391,7 +1427,7 @@ sub get_featlist_from_file {
warn join("", @errs);
exit 1;
}
return {"names"=>\@names, "values"=>\@startvalues, "untuneables"=>\@untuneables};
return {"names"=>\@names, "values"=>\@startvalues, "untuneables"=>\@untuneables, "allcomponentsuntuneable"=>\@allcomponentsuntuneable, "skippeduntuneablecomponents"=>\%skippeduntuneablecomponents};
}
@ -1487,6 +1523,8 @@ sub create_config {
print $out "# We were before running iteration $iteration\n";
print $out "# finished ".`date`;
my %oldvalues = ();
my $line = <$ini_fh>;
while(1) {
last unless $line;
@ -1501,34 +1539,51 @@ sub create_config {
# parameter name
my $parameter = $1;
if ($parameter eq "weight") {
# leave weights 'til last. We're changing it
while ($line = <$ini_fh>) {
last if $line =~ /^\[/;
if ($line =~ /^([^_=\s]+)/) {
for( @{$featlist->{"untuneables"}} ){
if ($1 eq $_ ) {# if weight is untuneable, copy it into new config
push @keep_weights, $line;
}
}
}
}
}
elsif (defined($P{$parameter})) {
# found a param (thread, verbose etc) that we're overriding. Leave to the end
while ($line = <$ini_fh>) {
last if $line =~ /^\[/;
}
}
else {
# unchanged parameter, write old
print $out "[$parameter]\n";
while ($line = <$ini_fh>) {
last if $line =~ /^\[/;
print $out $line;
}
}
}
if ($parameter eq "weight") {
# leave weights 'til last. We're changing it
while ($line = <$ini_fh>) {
last if $line =~ /^\[/;
if ($line =~ /^(\S+)= (.+)$/) {
for( @{$featlist->{"untuneables"}} ){
if ($1 eq $_ ) {# if weight is untuneable, copy it into new config
push @keep_weights, $line;
}
}
for( @{$featlist->{"allcomponentsuntuneable"}} ){
if ($1 eq $_ ) {# if all dense weights are untuneable, copy it into new config
push @keep_weights, $line;
}
}
my ($longname, $valuesStr) = ($1, $2);
next if (!defined($valuesStr));
print $valuesStr;
my @values = split(/ /, $valuesStr);
my $valcnt = 0;
foreach my $value (@values) {
if ($value =~ /^[+-]?[0-9.\-e]+$/) {
$oldvalues{$longname}{$valcnt} = $value;
}
$valcnt++;
}
}
}
}
elsif (defined($P{$parameter})) {
# found a param (thread, verbose etc) that we're overriding. Leave to the end
while ($line = <$ini_fh>) {
last if $line =~ /^\[/;
}
}
else {
# unchanged parameter, write old
print $out "[$parameter]\n";
while ($line = <$ini_fh>) {
last if $line =~ /^\[/;
print $out $line;
}
}
}
# write all additional parameters
foreach my $parameter (keys %P) {
@ -1543,20 +1598,30 @@ sub create_config {
my $prevName = "";
my $outStr = "";
my $valcnt = 0;
my $offset = 0;
for (my $i = 0; $i < scalar(@{$featlist->{"names"}}); $i++) {
my $name = $featlist->{"names"}->[$i];
my $val = $featlist->{"values"}->[$i];
if ($prevName eq $name) {
$outStr .= " $val";
if ($prevName ne $name) {
print $out "$outStr\n";
$valcnt = 0;
$outStr = "$name=";
$prevName = $name;
while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) {
$outStr .= " $oldvalues{$name}{$valcnt+$offset}";
$offset++;
}
}
else {
print $out "$outStr\n";
$outStr = "$name= $val";
$prevName = $name;
$outStr .= " $val";
$valcnt++;
while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) {
$outStr .= " $oldvalues{$name}{$valcnt+$offset}";
$offset++;
}
}
print $out "$outStr\n";
print $out "$outStr\n";
for (@keep_weights) {
print $out $_;