mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-11-10 10:59:21 +03:00
47 lines
1.0 KiB
Perl
Executable File
47 lines
1.0 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
|
|
use Getopt::Std;
|
|
getopts('q');
|
|
|
|
$target = shift;
|
|
$source = shift;
|
|
$align = shift or die "
|
|
Usage: extract-singletons.perl target source align
|
|
|
|
";
|
|
open(TARGET,$target) or die "Error: unable to open target file \"$target\"!\n";
|
|
open(SOURCE,$source) or die "Error: unable to open source file \"$source\"!\n";
|
|
open(ALIGN,$align) or die "Error: unable to open alignment file \"$align\"!\n";
|
|
|
|
while (<TARGET>) {
|
|
unless (defined $opt_q) {
|
|
print STDERR "\r$M" if ++$M%1000 == 0;
|
|
}
|
|
@T = split;
|
|
$_ = <SOURCE>;
|
|
@S = split;
|
|
$_ = <ALIGN>;
|
|
@A = split;
|
|
|
|
my(@source_links,@target_links);
|
|
for( $i=0; $i<=$#A; $i+=2 ) {
|
|
$target_links[$A[$i]]++;
|
|
$source_links[$A[$i+1]]++;
|
|
}
|
|
|
|
for( $i=0; $i<=$#A; $i+=2 ) {
|
|
if ($target_links[$A[$i]] == 1 && $source_links[$A[$i+1]] == 1 &&
|
|
$T[$A[$i]] eq $S[$A[$i+1]])
|
|
{
|
|
$count{$S[$A[$i+1]]}++; # Print this if it only occurs here
|
|
}
|
|
else {
|
|
$count{$S[$A[$i+1]]}+=2; # Don't print this
|
|
}
|
|
}
|
|
}
|
|
|
|
foreach $w (sort keys %count) {
|
|
print "$w\n" if $count{$w}==1;
|
|
}
|