mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-05 02:22:21 +03:00
if giza returns sentences that have different lengths in different directions (due to truncation or other errors), don't silenty fail. print a blank line instead.
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1562 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
fa31d83421
commit
25750c6555
@ -15,6 +15,8 @@ while ($w=shift @ARGV){
|
|||||||
$cnt=shift(@ARGV),next if $w eq "-c";
|
$cnt=shift(@ARGV),next if $w eq "-c";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
my $lc = 0;
|
||||||
|
|
||||||
if (!$dir || !inv){
|
if (!$dir || !inv){
|
||||||
print "usage: giza2bal.pl [-c <count-file>] -d <dir-align-file> -i <inv-align-file>\n";
|
print "usage: giza2bal.pl [-c <count-file>] -d <dir-align-file> -i <inv-align-file>\n";
|
||||||
print "input files can be also commands, e.g. -d \"gunzip -c file.gz\"\n";
|
print "input files can be also commands, e.g. -d \"gunzip -c file.gz\"\n";
|
||||||
@ -49,26 +51,35 @@ sub ReadBiAlign{
|
|||||||
chop($t2=<$fd2>);
|
chop($t2=<$fd2>);
|
||||||
|
|
||||||
@a=@b=();
|
@a=@b=();
|
||||||
|
$lc++;
|
||||||
|
|
||||||
#get target statistics
|
#get target statistics
|
||||||
$n=1;
|
$n=1;
|
||||||
$t1=~s/NULL \(\{(( \d+)*) \}\)//;
|
$t1=~s/NULL \(\{((\s+\d+)*)\s+\}\)//;
|
||||||
while ($t1=~s/(\S+) \(\{(( \d+)*) \}\)//){
|
while ($t1=~s/(\S+)\s+\(\{((\s+\d+)*)\s+\}\)//){
|
||||||
grep($a[$_]=$n,split(/ /,$2));
|
grep($a[$_]=$n,split(/\s+/,$2));
|
||||||
$n++;
|
$n++;
|
||||||
}
|
}
|
||||||
|
|
||||||
$m=1;
|
$m=1;
|
||||||
$t2=~s/NULL \(\{(( \d+)*) \}\)//;
|
$t2=~s/NULL \(\{((\s+\d+)*)\s+\}\)//;
|
||||||
while ($t2=~s/(\S+) \(\{(( \d+)*) \}\)//){
|
while ($t2=~s/(\S+)\s+\(\{((\s+\d+)*)\s+\}\)//){
|
||||||
grep($b[$_]=$m,split(/ /,$2));
|
grep($b[$_]=$m,split(/\s+/,$2));
|
||||||
$m++;
|
$m++;
|
||||||
}
|
}
|
||||||
|
|
||||||
$M=split(/ /,$s1);
|
$M=split(/\s+/,$s1);
|
||||||
$N=split(/ /,$s2);
|
$N=split(/\s+/,$s2);
|
||||||
|
|
||||||
return 0 if $m != ($M+1) || $n != ($N+1);
|
if ($m != ($M+1) || $n != ($N+1)) {
|
||||||
|
print STDERR "Sentence mismatch error! Line #$lc\n";
|
||||||
|
$s1 = "ALIGN_ERR";
|
||||||
|
$s2 = "ALIGN_ERR";
|
||||||
|
@a=(); @b=();
|
||||||
|
for ($j=1;$j<2;$j++){ $a[$j]=1; }
|
||||||
|
for ($i=1;$i<2;$i++){ $b[$i]=1; }
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
for ($j=1;$j<$m;$j++){
|
for ($j=1;$j<$m;$j++){
|
||||||
$a[$j]=0 if !$a[$j];
|
$a[$j]=0 if !$a[$j];
|
||||||
@ -94,6 +105,7 @@ while(!eof(DIR)){
|
|||||||
print $#b," $tgt \# @b[1..$#b]\n";
|
print $#b," $tgt \# @b[1..$#b]\n";
|
||||||
}
|
}
|
||||||
else{
|
else{
|
||||||
|
print "\n";
|
||||||
print STDERR "." if !(++$skip % 1000);
|
print STDERR "." if !(++$skip % 1000);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user