added thread options for filter-model-given-input.pl

This commit is contained in:
alvations 2015-04-24 18:57:28 +02:00
parent e5feb1a73e
commit 585784f62a

8
scripts/training/filter-model-given-input.pl Executable file → Normal file
View File

@ -36,6 +36,7 @@ my $ZCAT = "gzip -cd";
# get optional parameters
my $opt_hierarchical = 0;
my $binarizer = undef;
my $threads = 1; # Default is single-thread
my $syntax_filter_cmd = "$SCRIPTS_ROOTDIR/../bin/filter-rule-table hierarchical";
my $min_score = undef;
my $opt_min_non_initial_rule_count = undef;
@ -53,6 +54,7 @@ GetOptions(
"SyntaxFilterCmd=s" => \$syntax_filter_cmd,
"tempdir=s" => \$tempdir,
"MinScore=s" => \$min_score,
"threads" => \$threads,
"MinNonInitialRuleCount=i" => \$opt_min_non_initial_rule_count, # DEPRECATED
) or exit(1);
@ -404,8 +406,8 @@ for(my $i=0;$i<=$#TABLE;$i++) {
# ... phrase translation model
elsif ($binarizer =~ /processPhraseTableMin/) {
#compact phrase table
##my $cmd = "$catcmd $mid_file | LC_ALL=C sort -T $tempdir > $mid_file.sorted && $binarizer -in $mid_file.sorted -out $new_file -nscores $TABLE_WEIGHTS[$i] && rm $mid_file.sorted";
my $cmd = "$binarizer -in <($catcmd $mid_file | LC_ALL=C sort -T $tempdir) -out $new_file -nscores $TABLE_WEIGHTS[$i] -encoding None";
##my $cmd = "$catcmd $mid_file | LC_ALL=C sort -T $tempdir > $mid_file.sorted && $binarizer -in $mid_file.sorted -out $new_file -nscores $TABLE_WEIGHTS[$i] -threads $threads && rm $mid_file.sorted";
my $cmd = "$binarizer -in <($catcmd $mid_file | LC_ALL=C sort -T $tempdir) -out $new_file -nscores $TABLE_WEIGHTS[$i] -threads $threads -encoding None";
safesystem($cmd) or die "Can't binarize";
} elsif ($binarizer =~ /CreateOnDiskPt/) {
my $cmd = "$binarizer $mid_file $new_file.bin";
@ -426,7 +428,7 @@ for(my $i=0;$i<=$#TABLE;$i++) {
$lexbin =~ s/PhraseTable/LexicalTable/;
my $cmd;
if ($lexbin =~ /processLexicalTableMin/) {
$cmd = "$catcmd $mid_file | LC_ALL=C sort -T $tempdir > $mid_file.sorted && $lexbin -in $mid_file.sorted -out $new_file && rm $mid_file.sorted";
$cmd = "$catcmd $mid_file | LC_ALL=C sort -T $tempdir > $mid_file.sorted && $lexbin -in $mid_file.sorted -out $new_file -threads $threads && rm $mid_file.sorted";
} else {
$lexbin =~ s/^\s*(\S+)\s.+/$1/; # no options
$cmd = "$lexbin -in $mid_file -out $new_file";