mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 11:28:48 +03:00
factor for oov is 0, not <unk> - interferes with source input. Add extra argument to lowercase input words or not
This commit is contained in:
parent
a402523ef5
commit
a3bd695cd4
@ -2,7 +2,7 @@
|
||||
|
||||
use strict;
|
||||
|
||||
my ($cluster_file,$in,$out,$tmp) = @ARGV;
|
||||
my ($lowercase, $cluster_file,$in,$out,$tmp) = @ARGV;
|
||||
|
||||
my $CLUSTER = &read_cluster_from_mkcls($cluster_file);
|
||||
|
||||
@ -17,7 +17,10 @@ while(<IN>) {
|
||||
s/ $//;
|
||||
my $first = 1;
|
||||
foreach my $word (split) {
|
||||
my $cluster = defined($$CLUSTER{$word}) ? $$CLUSTER{$word} : "<unk>";
|
||||
if ($lowercase) {
|
||||
$word = lc($word);
|
||||
}
|
||||
my $cluster = defined($$CLUSTER{$word}) ? $$CLUSTER{$word} : "0";
|
||||
print OUT " " unless $first;
|
||||
print OUT $cluster;
|
||||
$first = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user