same spec for dedicated script as for train-model.perl and filter-model-given-input.pl

This commit is contained in:
Philipp Koehn 2014-12-21 01:37:05 +00:00
parent 831f947874
commit 59fdb3d99c

View File

@ -2,17 +2,40 @@
use strict;
my $THRESHOLD = $ARGV[0];
die("please specify threshold (e.g., 0.00001)") unless defined($THRESHOLD) || $THRESHOLD > 0;
my %MIN_SCORE;
# legacy: same threshold for direct and indirect phrase translation probabilities
if ($ARGV[0] =~ /^[\d\.]+$/) {
$MIN_SCORE{0} = $ARGV[0];
$MIN_SCORE{2} = $ARGV[2];
}
# advanced: field:threshold,field:threshold
# recommended use is "2:0.0001"
else {
foreach (split(/,/,$ARGV[0])) {
my ($id,$score) = split(/:/);
if ($score == 0) {
die("error in spec $_ (full spec $ARGV[0])");
}
$MIN_SCORE{$id} = $score;
print STDERR "score $id must be at least $score\n";
}
}
die("please specify threshold (e.g., 0.0001)") unless scalar keys %MIN_SCORE;
my ($filtered,$total) = (0,0);
while(my $line = <STDIN>) {
my @ITEM = split(/ \|\|\| /,$line);
my @SCORE = split(/ /,$ITEM[2]);
$total++;
if ($SCORE[0] < $THRESHOLD || $SCORE[2] < $THRESHOLD) {
$filtered++;
next;
my $filter_this = 0;
foreach my $key (keys %MIN_SCORE) {
if ($SCORE[$key] < $MIN_SCORE{$key}) {
$filter_this++;
}
}
if ($filter_this) {
$filtered++;
next;
}
print $line;
}