mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
tuneable-components config parameter for feature functions
This commit is contained in:
parent
ce80e53b30
commit
53ce063214
@ -50,6 +50,7 @@ FeatureFunction(const std::string& line)
|
||||
, m_verbosity(std::numeric_limits<std::size_t>::max())
|
||||
, m_numScoreComponents(1)
|
||||
{
|
||||
m_numTuneableComponents = m_numScoreComponents;
|
||||
Initialize(line);
|
||||
}
|
||||
|
||||
@ -61,6 +62,7 @@ FeatureFunction(size_t numScoreComponents,
|
||||
, m_verbosity(std::numeric_limits<std::size_t>::max())
|
||||
, m_numScoreComponents(numScoreComponents)
|
||||
{
|
||||
m_numTuneableComponents = m_numScoreComponents;
|
||||
Initialize(line);
|
||||
}
|
||||
|
||||
@ -95,6 +97,7 @@ void FeatureFunction::ParseLine(const std::string &line)
|
||||
|
||||
if (args[0] == "num-features") {
|
||||
m_numScoreComponents = Scan<size_t>(args[1]);
|
||||
m_numTuneableComponents = m_numScoreComponents;
|
||||
} else if (args[0] == "name") {
|
||||
m_description = args[1];
|
||||
} else {
|
||||
@ -120,13 +123,17 @@ void FeatureFunction::SetParameter(const std::string& key, const std::string& va
|
||||
{
|
||||
if (key == "tuneable") {
|
||||
m_tuneable = Scan<bool>(value);
|
||||
} else if (key == "tuneable-components") {
|
||||
UTIL_THROW_IF2(!m_tuneable, GetScoreProducerDescription()
|
||||
<< ": tuneable-components cannot be set if tuneable=false");
|
||||
SetTuneableComponents(value);
|
||||
} else if (key == "require-sorting-after-source-context") {
|
||||
m_requireSortingAfterSourceContext = Scan<bool>(value);
|
||||
} else if (key == "verbosity") {
|
||||
m_verbosity = Scan<size_t>(value);
|
||||
} else if (key == "filterable") { //ignore
|
||||
} else {
|
||||
UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
|
||||
UTIL_THROW2(GetScoreProducerDescription() << ": Unknown argument " << key << "=" << value);
|
||||
}
|
||||
}
|
||||
|
||||
@ -142,7 +149,27 @@ void FeatureFunction::ReadParameters()
|
||||
|
||||
std::vector<float> FeatureFunction::DefaultWeights() const
|
||||
{
|
||||
UTIL_THROW(util::Exception, "No default weights");
|
||||
UTIL_THROW2(GetScoreProducerDescription() << ": No default weights");
|
||||
}
|
||||
|
||||
void FeatureFunction::SetTuneableComponents(const std::string& value)
|
||||
{
|
||||
std::vector<std::string> toks = Tokenize(value,",");
|
||||
UTIL_THROW_IF2(toks.empty(), GetScoreProducerDescription()
|
||||
<< ": Empty tuneable-components");
|
||||
UTIL_THROW_IF2(toks.size()!=m_numScoreComponents, GetScoreProducerDescription()
|
||||
<< ": tuneable-components value has to be a comma-separated list of "
|
||||
<< m_numScoreComponents << " boolean values");
|
||||
|
||||
m_tuneableComponents.resize(m_numScoreComponents);
|
||||
m_numTuneableComponents = m_numScoreComponents;
|
||||
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
m_tuneableComponents[i] = Scan<bool>(toks[i]);
|
||||
if (!m_tuneableComponents[i]) {
|
||||
--m_numTuneableComponents;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -39,6 +39,8 @@ protected:
|
||||
bool m_requireSortingAfterSourceContext;
|
||||
size_t m_verbosity;
|
||||
size_t m_numScoreComponents;
|
||||
std::vector<bool> m_tuneableComponents;
|
||||
size_t m_numTuneableComponents;
|
||||
//In case there's multiple producers with the same description
|
||||
static std::multiset<std::string> description_counts;
|
||||
|
||||
@ -90,6 +92,17 @@ public:
|
||||
return m_tuneable;
|
||||
}
|
||||
|
||||
virtual bool HasTuneableComponents() const {
|
||||
return m_numTuneableComponents;
|
||||
}
|
||||
|
||||
virtual bool IsTuneableComponent(size_t i) const {
|
||||
if (m_numTuneableComponents == m_numScoreComponents) {
|
||||
return true;
|
||||
}
|
||||
return m_tuneableComponents[i];
|
||||
}
|
||||
|
||||
virtual bool RequireSortingAfterSourceContext() const {
|
||||
return m_requireSortingAfterSourceContext;
|
||||
}
|
||||
@ -151,6 +164,7 @@ public:
|
||||
|
||||
virtual void SetParameter(const std::string& key, const std::string& value);
|
||||
virtual void ReadParameters();
|
||||
virtual void SetTuneableComponents(const std::string& value);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -970,11 +970,13 @@ void Parameter::WeightOverwrite()
|
||||
|
||||
// should only be on 1 line
|
||||
UTIL_THROW_IF2(vec.size() != 1,
|
||||
"Weight override should only be on 1 line");
|
||||
"weight-overwrite should only be on 1 line");
|
||||
|
||||
string name("");
|
||||
vector<float> weights;
|
||||
vector<string> toks = Tokenize(vec[0]);
|
||||
size_t cnt = 0;
|
||||
const std::vector<float>* oldWeights = NULL;
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
const string &tok = toks[i];
|
||||
|
||||
@ -988,10 +990,24 @@ void Parameter::WeightOverwrite()
|
||||
}
|
||||
|
||||
name = tok.substr(0, tok.size() - 1);
|
||||
std::map<std::string, std::vector<float> >::const_iterator found = m_weights.find(name);
|
||||
if (found!=m_weights.end()) {
|
||||
oldWeights = &(found->second);
|
||||
} else {
|
||||
oldWeights = NULL;
|
||||
}
|
||||
cnt = 0;
|
||||
} else {
|
||||
// a weight for curr ff
|
||||
float weight = Scan<float>(toks[i]);
|
||||
weights.push_back(weight);
|
||||
if (toks[i] == "x") {
|
||||
UTIL_THROW_IF2(!oldWeights || cnt>=oldWeights->size(),
|
||||
"Keeping previous weight failed in weight-overwrite");
|
||||
weights.push_back(oldWeights->at(cnt));
|
||||
} else {
|
||||
float weight = Scan<float>(toks[i]);
|
||||
weights.push_back(weight);
|
||||
}
|
||||
++cnt;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -332,14 +332,16 @@ void ScoreComponentCollection::OutputFeatureScores( std::ostream& out
|
||||
bool labeledOutput = staticData.IsLabeledNBestList();
|
||||
|
||||
// regular features (not sparse)
|
||||
if (ff->GetNumScoreComponents() != 0) {
|
||||
if (ff->HasTuneableComponents()) {
|
||||
if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) {
|
||||
lastName = ff->GetScoreProducerDescription();
|
||||
out << " " << lastName << "=";
|
||||
}
|
||||
vector<float> scores = GetScoresForProducer( ff );
|
||||
for (size_t j = 0; j<scores.size(); ++j) {
|
||||
out << " " << scores[j];
|
||||
if (ff->IsTuneableComponent(j)) {
|
||||
out << " " << scores[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -348,7 +348,11 @@ void PrintFeatureWeight(const FeatureFunction* ff)
|
||||
size_t numScoreComps = ff->GetNumScoreComponents();
|
||||
vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
|
||||
for (size_t i = 0; i < numScoreComps; ++i) {
|
||||
cout << " " << values[i];
|
||||
if (ff->IsTuneableComponent(i)) {
|
||||
cout << " " << values[i];
|
||||
} else {
|
||||
cout << " UNTUNEABLECOMPONENT";
|
||||
}
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
|
@ -576,8 +576,9 @@ if ($___FILTER_PHRASE_TABLE) {
|
||||
my $featlist = get_featlist_from_moses($___CONFIG);
|
||||
$featlist = insert_ranges_to_featlist($featlist, $___RANGES);
|
||||
|
||||
# Mark which features are disabled:
|
||||
# Mark which features are disabled
|
||||
if (defined $___ACTIVATE_FEATURES) {
|
||||
$featlist->{"enabled"} = undef;
|
||||
my %enabled = map { ($_, 1) } split /[, ]+/, $___ACTIVATE_FEATURES;
|
||||
my %cnt;
|
||||
for (my $i = 0; $i < scalar(@{$featlist->{"names"}}); $i++) {
|
||||
@ -1145,6 +1146,8 @@ if($___RETURN_BEST_DEV) {
|
||||
}
|
||||
my $best_featlist = get_featlist_from_file("run$bestit.dense");
|
||||
$best_featlist->{"untuneables"} = $featlist->{"untuneables"};
|
||||
$best_featlist->{"allcomponentsuntuneable"} = $featlist->{"allcomponentsuntuneable"};
|
||||
$best_featlist->{"skippeduntuneablecomponents"} = $featlist->{"skippeduntuneablecomponents"};
|
||||
create_config($___CONFIG_ORIG, "./moses.ini", $best_featlist,
|
||||
$bestit, $bestbleu, $best_sparse_file);
|
||||
}
|
||||
@ -1235,10 +1238,26 @@ sub run_decoder {
|
||||
}
|
||||
# moses now does not seem accept "-tm X -tm Y" but needs "-tm X Y"
|
||||
my %model_weights;
|
||||
my $valcnt = 0;
|
||||
my $offset = 0;
|
||||
for(my $i=0; $i<scalar(@{$featlist->{"names"}}); $i++) {
|
||||
my $name = $featlist->{"names"}->[$i];
|
||||
$model_weights{$name} = "$name=" if !defined $model_weights{$name};
|
||||
if (!defined $model_weights{$name}) {
|
||||
$model_weights{$name} = "$name=";
|
||||
$valcnt = 0;
|
||||
while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) {
|
||||
#$model_weights{$name} .= sprintf " %.6f", $oldvalues{$name}{$valcnt+$offset};
|
||||
$model_weights{$name} .= sprintf " x";
|
||||
$offset++;
|
||||
}
|
||||
}
|
||||
$model_weights{$name} .= sprintf " %.6f", $vals[$i];
|
||||
$valcnt++;
|
||||
while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) {
|
||||
#$model_weights{$name} .= sprintf " %.6f", $oldvalues{$name}{$valcnt+$offset};
|
||||
$model_weights{$name} .= sprintf " x";
|
||||
$offset++;
|
||||
}
|
||||
}
|
||||
my $decoder_config = "";
|
||||
$decoder_config = "-weight-overwrite '" . join(" ", values %model_weights) ."'" unless $___USE_CONFIG_WEIGHTS_FIRST && $run==1;
|
||||
@ -1362,8 +1381,11 @@ sub get_featlist_from_file {
|
||||
my @names = ();
|
||||
my @startvalues = ();
|
||||
my @untuneables = ();
|
||||
my @allcomponentsuntuneable = ();
|
||||
my %skippeduntuneablecomponents = ();
|
||||
open my $fh, '<', $featlistfn or die "Can't read $featlistfn : $!";
|
||||
my $nr = 0;
|
||||
my $i = 0;
|
||||
my @errs = ();
|
||||
while (<$fh>) {
|
||||
$nr++;
|
||||
@ -1373,11 +1395,25 @@ sub get_featlist_from_file {
|
||||
next if (!defined($valuesStr));
|
||||
|
||||
my @values = split(/ /, $valuesStr);
|
||||
foreach my $value (@values) {
|
||||
push @errs, "$featlistfn:$nr:Bad initial value of $longname: $value\n"
|
||||
if $value !~ /^[+-]?[0-9.\-e]+$/;
|
||||
push @names, $longname;
|
||||
push @startvalues, $value;
|
||||
my $valcnt = 0;
|
||||
my $hastuneablecomponent = 0;
|
||||
foreach my $value (@values) {
|
||||
if ($value =~ /^UNTUNEABLECOMPONENT$/) {
|
||||
$skippeduntuneablecomponents{$longname}{$valcnt} = 1;
|
||||
$i++;
|
||||
$valcnt++;
|
||||
} elsif ($value =~ /^[+-]?[0-9.\-e]+$/) {
|
||||
push @names, $longname;
|
||||
push @startvalues, $value;
|
||||
$i++;
|
||||
$valcnt++;
|
||||
$hastuneablecomponent = 1;
|
||||
} else {
|
||||
push @errs, "$featlistfn:$nr:Bad initial value of $longname: $value\n"
|
||||
}
|
||||
}
|
||||
if (!$hastuneablecomponent) {
|
||||
push @allcomponentsuntuneable, $longname;
|
||||
}
|
||||
}
|
||||
elsif (/^(\S+) UNTUNEABLE$/) {
|
||||
@ -1391,7 +1427,7 @@ sub get_featlist_from_file {
|
||||
warn join("", @errs);
|
||||
exit 1;
|
||||
}
|
||||
return {"names"=>\@names, "values"=>\@startvalues, "untuneables"=>\@untuneables};
|
||||
return {"names"=>\@names, "values"=>\@startvalues, "untuneables"=>\@untuneables, "allcomponentsuntuneable"=>\@allcomponentsuntuneable, "skippeduntuneablecomponents"=>\%skippeduntuneablecomponents};
|
||||
}
|
||||
|
||||
|
||||
@ -1487,6 +1523,8 @@ sub create_config {
|
||||
print $out "# We were before running iteration $iteration\n";
|
||||
print $out "# finished ".`date`;
|
||||
|
||||
my %oldvalues = ();
|
||||
|
||||
my $line = <$ini_fh>;
|
||||
while(1) {
|
||||
last unless $line;
|
||||
@ -1501,34 +1539,51 @@ sub create_config {
|
||||
# parameter name
|
||||
my $parameter = $1;
|
||||
|
||||
if ($parameter eq "weight") {
|
||||
# leave weights 'til last. We're changing it
|
||||
while ($line = <$ini_fh>) {
|
||||
last if $line =~ /^\[/;
|
||||
if ($line =~ /^([^_=\s]+)/) {
|
||||
for( @{$featlist->{"untuneables"}} ){
|
||||
if ($1 eq $_ ) {# if weight is untuneable, copy it into new config
|
||||
push @keep_weights, $line;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
elsif (defined($P{$parameter})) {
|
||||
# found a param (thread, verbose etc) that we're overriding. Leave to the end
|
||||
while ($line = <$ini_fh>) {
|
||||
last if $line =~ /^\[/;
|
||||
}
|
||||
}
|
||||
else {
|
||||
# unchanged parameter, write old
|
||||
print $out "[$parameter]\n";
|
||||
while ($line = <$ini_fh>) {
|
||||
last if $line =~ /^\[/;
|
||||
print $out $line;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($parameter eq "weight") {
|
||||
# leave weights 'til last. We're changing it
|
||||
while ($line = <$ini_fh>) {
|
||||
last if $line =~ /^\[/;
|
||||
if ($line =~ /^(\S+)= (.+)$/) {
|
||||
for( @{$featlist->{"untuneables"}} ){
|
||||
if ($1 eq $_ ) {# if weight is untuneable, copy it into new config
|
||||
push @keep_weights, $line;
|
||||
}
|
||||
}
|
||||
for( @{$featlist->{"allcomponentsuntuneable"}} ){
|
||||
if ($1 eq $_ ) {# if all dense weights are untuneable, copy it into new config
|
||||
push @keep_weights, $line;
|
||||
}
|
||||
}
|
||||
|
||||
my ($longname, $valuesStr) = ($1, $2);
|
||||
next if (!defined($valuesStr));
|
||||
print $valuesStr;
|
||||
my @values = split(/ /, $valuesStr);
|
||||
my $valcnt = 0;
|
||||
foreach my $value (@values) {
|
||||
if ($value =~ /^[+-]?[0-9.\-e]+$/) {
|
||||
$oldvalues{$longname}{$valcnt} = $value;
|
||||
}
|
||||
$valcnt++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
elsif (defined($P{$parameter})) {
|
||||
# found a param (thread, verbose etc) that we're overriding. Leave to the end
|
||||
while ($line = <$ini_fh>) {
|
||||
last if $line =~ /^\[/;
|
||||
}
|
||||
}
|
||||
else {
|
||||
# unchanged parameter, write old
|
||||
print $out "[$parameter]\n";
|
||||
while ($line = <$ini_fh>) {
|
||||
last if $line =~ /^\[/;
|
||||
print $out $line;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# write all additional parameters
|
||||
foreach my $parameter (keys %P) {
|
||||
@ -1543,20 +1598,30 @@ sub create_config {
|
||||
|
||||
my $prevName = "";
|
||||
my $outStr = "";
|
||||
my $valcnt = 0;
|
||||
my $offset = 0;
|
||||
for (my $i = 0; $i < scalar(@{$featlist->{"names"}}); $i++) {
|
||||
my $name = $featlist->{"names"}->[$i];
|
||||
my $val = $featlist->{"values"}->[$i];
|
||||
|
||||
if ($prevName eq $name) {
|
||||
$outStr .= " $val";
|
||||
if ($prevName ne $name) {
|
||||
print $out "$outStr\n";
|
||||
$valcnt = 0;
|
||||
$outStr = "$name=";
|
||||
$prevName = $name;
|
||||
while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) {
|
||||
$outStr .= " $oldvalues{$name}{$valcnt+$offset}";
|
||||
$offset++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
print $out "$outStr\n";
|
||||
$outStr = "$name= $val";
|
||||
$prevName = $name;
|
||||
$outStr .= " $val";
|
||||
$valcnt++;
|
||||
while (defined $featlist->{"skippeduntuneablecomponents"}->{$name}{$valcnt+$offset}) {
|
||||
$outStr .= " $oldvalues{$name}{$valcnt+$offset}";
|
||||
$offset++;
|
||||
}
|
||||
}
|
||||
print $out "$outStr\n";
|
||||
print $out "$outStr\n";
|
||||
|
||||
for (@keep_weights) {
|
||||
print $out $_;
|
||||
|
Loading…
Reference in New Issue
Block a user