minor fixes

This commit is contained in:
phikoehn 2012-04-12 00:25:57 +01:00
parent 70800c2012
commit 2d47a5637b
7 changed files with 16 additions and 20 deletions

View File

@ -1,2 +1,3 @@
cluster: townhill seville hermes lion seville sannox lutzow frontend
multicore-8: tyr thor odin crom saxnot vali vili freyja bragi hoenir
multicore-24: syn hel skaol saga

View File

@ -52,8 +52,9 @@ truecase
rerun-on-change: input-truecaser output-truecaser
default-name: corpus/truecased
pass-unless: input-truecaser output-truecaser
template-if: input-truecaser IN.$input-extension OUT.$input-extension -model IN1.$input-extension
template-if: output-truecaser IN.$output-extension OUT.$output-extension -model IN1.$output-extension
template-if: input-truecaser IN.$input-extension OUT.$input-extension -model IN1.$input-extension
template-if: output-truecaser IN.$output-extension OUT.$output-extension -model IN1.$output-extension
parallelizable: yes
lowercase
in: truecased-stem
out: lowercased-stem

View File

@ -5,6 +5,7 @@
use strict;
use Getopt::Long "GetOptions";
use FindBin qw($Bin);
$SIG{CHLD} = "IGNORE"; # no zombies
my $host = `hostname`; chop($host);
print STDERR "STARTING UP AS PROCESS $$ ON $host AT ".`date`;
@ -123,7 +124,10 @@ sub init_agenda_graph() {
."(its all gone blank...) show\n"
."showpage\n";
close(PS);
`convert $graph_file.ps $graph_file.png`;
$SIG{CHLD} = undef;
`convert -alpha off $graph_file.ps $graph_file.png`;
$SIG{CHLD} = "IGNORE"; # no zombies
if (!$NO_GRAPH && !fork) {
# use ghostview by default, it it is installed
@ -1290,7 +1294,8 @@ sub check_if_crashed {
'error','killed','core dumped','can\'t read',
'no such file or directory','unknown option',
'died at','exit code','permission denied',
"Can't locate") {
'segmentation fault','abort',
'can\'t locate') {
if (/$pattern/i) {
my $not_error = 0;
if (defined($NOT_ERROR{&defined_step_id($i)})) {
@ -2480,12 +2485,7 @@ sub define_template {
my $extra = join(" ",@EXTRA);
if (&backoff_and_get(&extend_local_name($module,$set,$command))) {
if ($command eq "input-tokenizer") {
$cmd .= "\$$command -r $VERSION -o $out < $in > $out $extra\n";
}
else {
$cmd .= "\$$command < $in > $out $extra\n";
}
}
else {
$cmd .= "ln -s $in $out\n";
@ -2555,7 +2555,7 @@ sub define_template {
$cmd =~ s/OUT/$output/g;
$cmd =~ s/VERSION/$VERSION/g;
print "\tcmd is $cmd\n" if $VERBOSE;
while ($cmd =~ /^([\S\s]*)\$([^\s\/]+)([\S\s]*)$/) {
while ($cmd =~ /^([\S\s]*)\$([^\s\/\"\']+)([\S\s]*)$/) {
my ($pre,$variable,$post) = ($1,$2,$3);
$cmd = $pre
. &check_backoff_and_get(&extend_local_name($module,$set,$variable))

View File

@ -2,9 +2,6 @@
use strict;
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
while(<STDIN>) {
s/\&bar;/\|/g;
s/\&lt;/\</g;

View File

@ -2,9 +2,6 @@
use strict;
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
while(<STDIN>) {
chop;

View File

@ -35,7 +35,7 @@ while (defined $_) {
print STDERR "." if $nr % 10000 == 0;
print STDERR "($nr)" if $nr % 100000 == 0;
chomp;
s/ +/ /g; s/^ //; s/ $//;
s/\s+/ /g; s/^ //; s/ $//;
my @intokens = split / /;
# load lines of corresponding streams and ensure equal number of words
my @lines_of_extratoks;
@ -44,7 +44,7 @@ while (defined $_) {
die "Additional factor file $addfactors[$factor] contains too few sentences!"
if !defined $line;
chomp($line);
$line =~ s/ +/ /g; $line =~ s/^ //; $line =~ s/ $//;
$line =~ s/\s+/ /g; $line =~ s/^ //; $line =~ s/ $//;
my @toks = split / /, $line;
die "Incompatible number of words in factor $factor on line $nr. ($#toks != $#intokens)"
if $#toks != $#intokens;

View File

@ -6,8 +6,8 @@ my ($size,$in,$out) = @ARGV;
open(IN,$in);
open(OUT,">$out");
binmode(IN, ":UTF8");
binmode(OUT, ":UTF8");
binmode(IN, ":utf8");
binmode(OUT, ":utf8");
while(<IN>) {
my $first = 1;