parallel scoring

This commit is contained in:
Hieu Hoang 2012-02-08 17:25:42 +07:00
parent 3ed71773f2
commit 250b31be44

View File

@ -1437,29 +1437,36 @@ sub score_phrase_phrase_extract {
$CORE_SCORE_OPTIONS .= " --NoLex" if $NO_LEX;
my $substep = 1;
my $isParent = 1;
my @children;
for my $direction ("f2e","e2f") {
next if $___CONTINUE && -e "$ttable_file.half.$direction";
next if $___CONTINUE && $direction eq "e2f" && -e "$ttable_file.half.e2f.sorted";
my $inverse = "";
my $extract_filename = $extract_file;
if ($direction eq "e2f") {
$inverse = " --Inverse";
$extract_filename = $extract_file.".inv";
}
my $extract = "$extract_filename.sorted";
my $pid = fork();
if ($pid == 0)
{
next if $___CONTINUE && -e "$ttable_file.half.$direction";
next if $___CONTINUE && $direction eq "e2f" && -e "$ttable_file.half.e2f.sorted";
my $inverse = "";
my $extract_filename = $extract_file;
if ($direction eq "e2f") {
$inverse = " --Inverse";
$extract_filename = $extract_file.".inv";
}
my $extract = "$extract_filename.sorted";
if (!($___CONTINUE && -e "$extract_filename.sorted")) {
# sorting
print STDERR "(6.".($substep++).") sorting $direction @ ".`date`;
if (-e "$extract_filename.gz") {
safesystem("gunzip < $extract_filename.gz | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR > $extract_filename.sorted") or die("ERROR");
}
else {
safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $extract_filename > $extract_filename.sorted") or die("ERROR");
}
}
if (!($___CONTINUE && -e "$extract_filename.sorted")) {
# sorting
print STDERR "(6.".($substep++).") sorting $direction @ ".`date`;
if (-e "$extract_filename.gz") {
safesystem("gunzip < $extract_filename.gz | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR > $extract_filename.sorted") or die("ERROR");
}
else {
safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $extract_filename > $extract_filename.sorted") or die("ERROR");
}
}
print STDERR "(6.".($substep++).") creating table half $ttable_file.half.$direction @ ".`date`;
print STDERR "(6.".($substep++).") creating table half $ttable_file.half.$direction @ ".`date`;
my $cmd = "$PHRASE_SCORE $extract $lexical_file.$direction $ttable_file.half.$direction $inverse";
$cmd .= " --Hierarchical" if $_HIERARCHICAL;
@ -1473,14 +1480,35 @@ sub score_phrase_phrase_extract {
print $cmd."\n";
safesystem($cmd) or die "ERROR: Scoring of phrases failed";
if (! $debug) { safesystem("rm -f $extract") or die("ERROR"); }
# sorting inverse phrase-table-half to sync up with regular one
print STDERR "(6.5) sorting inverse e2f table@ ".`date`;
if ($direction eq "e2f" && ! ($___CONTINUE && -e "$ttable_file.half.e2f.sorted")) {
safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $ttable_file.half.e2f > $ttable_file.half.e2f.sorted") or die("ERROR");
if (! $debug) { safesystem("rm -f $ttable_file.half.e2f") or die("ERROR"); }
}
exit();
}
else
{ # parent
push(@children, $pid);
}
}
# sorting inverse phrase-table-half to sync up with regular one
print STDERR "(6.5) sorting inverse e2f table@ ".`date`;
if (! ($___CONTINUE && -e "$ttable_file.half.e2f.sorted")) {
safesystem("LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR $ttable_file.half.e2f > $ttable_file.half.e2f.sorted") or die("ERROR");
if (! $debug) { safesystem("rm -f $ttable_file.half.e2f") or die("ERROR"); }
# wait for everything is finished
if ($isParent)
{
foreach (@children) {
waitpid($_, 0);
}
}
else
{
die "shouldn't be here";
}
# merging the two halves
print STDERR "(6.6) consolidating the two halves @ ".`date`;