diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl.missing_bin_dir index d5026a112..27559d220 100755 --- a/scripts/training/train-model.perl.missing_bin_dir +++ b/scripts/training/train-model.perl.missing_bin_dir @@ -1437,29 +1437,36 @@ sub score_phrase_phrase_extract { $CORE_SCORE_OPTIONS .= " --NoLex" if $NO_LEX; my $substep = 1; + my $isParent = 1; + my @children; + for my $direction ("f2e","e2f") { - next if $___CONTINUE && -e "$ttable_file.half.$direction"; - next if $___CONTINUE && $direction eq "e2f" && -e "$ttable_file.half.e2f.sorted"; - my $inverse = ""; - my $extract_filename = $extract_file; - if ($direction eq "e2f") { - $inverse = " --Inverse"; - $extract_filename = $extract_file.".inv"; - } - my $extract = "$extract_filename.sorted"; + my $pid = fork(); + + if ($pid == 0) + { + next if $___CONTINUE && -e "$ttable_file.half.$direction"; + next if $___CONTINUE && $direction eq "e2f" && -e "$ttable_file.half.e2f.sorted"; + my $inverse = ""; + my $extract_filename = $extract_file; + if ($direction eq "e2f") { + $inverse = " --Inverse"; + $extract_filename = $extract_file.".inv"; + } + my $extract = "$extract_filename.sorted"; - if (!($___CONTINUE && -e "$extract_filename.sorted")) { - # sorting - print STDERR "(6.".($substep++).") sorting $direction @ ".`date`; - if (-e "$extract_filename.gz") { - safesystem("gunzip < $extract_filename.gz | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR > $extract_filename.sorted") or die("ERROR"); - } - else { - safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $extract_filename > $extract_filename.sorted") or die("ERROR"); - } - } + if (!($___CONTINUE && -e "$extract_filename.sorted")) { + # sorting + print STDERR "(6.".($substep++).") sorting $direction @ ".`date`; + if (-e "$extract_filename.gz") { + safesystem("gunzip < $extract_filename.gz | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR > $extract_filename.sorted") or die("ERROR"); + } + else { + safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $extract_filename > $extract_filename.sorted") or die("ERROR"); + } + } - print STDERR "(6.".($substep++).") creating table half $ttable_file.half.$direction @ ".`date`; + print STDERR "(6.".($substep++).") creating table half $ttable_file.half.$direction @ ".`date`; my $cmd = "$PHRASE_SCORE $extract $lexical_file.$direction $ttable_file.half.$direction $inverse"; $cmd .= " --Hierarchical" if $_HIERARCHICAL; @@ -1473,14 +1480,35 @@ sub score_phrase_phrase_extract { print $cmd."\n"; safesystem($cmd) or die "ERROR: Scoring of phrases failed"; if (! $debug) { safesystem("rm -f $extract") or die("ERROR"); } + + # sorting inverse phrase-table-half to sync up with regular one + print STDERR "(6.5) sorting inverse e2f table@ ".`date`; + if ($direction eq "e2f" && ! ($___CONTINUE && -e "$ttable_file.half.e2f.sorted")) { + safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $ttable_file.half.e2f > $ttable_file.half.e2f.sorted") or die("ERROR"); + if (! $debug) { safesystem("rm -f $ttable_file.half.e2f") or die("ERROR"); } + } + + exit(); + } + else + { # parent + push(@children, $pid); + } + } - # sorting inverse phrase-table-half to sync up with regular one - print STDERR "(6.5) sorting inverse e2f table@ ".`date`; - if (! ($___CONTINUE && -e "$ttable_file.half.e2f.sorted")) { - safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $ttable_file.half.e2f > $ttable_file.half.e2f.sorted") or die("ERROR"); - if (! $debug) { safesystem("rm -f $ttable_file.half.e2f") or die("ERROR"); } + # wait for everything is finished + if ($isParent) + { + foreach (@children) { + waitpid($_, 0); + } } + else + { + die "shouldn't be here"; + } + # merging the two halves print STDERR "(6.6) consolidating the two halves @ ".`date`;