mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 05:55:02 +03:00
Allowing the truecaser to work on uncased ASR input, pass the -a flag
This commit is contained in:
parent
9266d65304
commit
501c51947b
@ -8,11 +8,14 @@ binmode(STDIN, ":utf8");
|
|||||||
binmode(STDOUT, ":utf8");
|
binmode(STDOUT, ":utf8");
|
||||||
|
|
||||||
# apply switches
|
# apply switches
|
||||||
my ($MODEL, $UNBUFFERED);
|
# ASR input has no case, make sure it is lowercase, and make sure known are cased eg. 'i' to be uppercased even if i is known
|
||||||
die("truecase.perl --model MODEL [-b] < in > out")
|
my ($MODEL, $UNBUFFERED, $ASR);
|
||||||
unless &GetOptions('model=s' => \$MODEL,'b|unbuffered' => \$UNBUFFERED)
|
die("truecase.perl --model MODEL [-b] [-a] < in > out")
|
||||||
|
unless &GetOptions('model=s' => \$MODEL,'b|unbuffered' => \$UNBUFFERED, 'a|asr' => \$ASR)
|
||||||
&& defined($MODEL);
|
&& defined($MODEL);
|
||||||
if (defined($UNBUFFERED) && $UNBUFFERED) { $|=1; }
|
if (defined($UNBUFFERED) && $UNBUFFERED) { $|=1; }
|
||||||
|
my $asr = 0;
|
||||||
|
if (defined($ASR) && $ASR) { $asr = 1; }
|
||||||
|
|
||||||
my (%BEST,%KNOWN);
|
my (%BEST,%KNOWN);
|
||||||
open(MODEL,$MODEL) || die("ERROR: could not open '$MODEL'");
|
open(MODEL,$MODEL) || die("ERROR: could not open '$MODEL'");
|
||||||
@ -20,10 +23,12 @@ binmode(MODEL, ":utf8");
|
|||||||
while(<MODEL>) {
|
while(<MODEL>) {
|
||||||
my ($word,@OPTIONS) = split;
|
my ($word,@OPTIONS) = split;
|
||||||
$BEST{ lc($word) } = $word;
|
$BEST{ lc($word) } = $word;
|
||||||
|
if ($asr == 0) {
|
||||||
$KNOWN{ $word } = 1;
|
$KNOWN{ $word } = 1;
|
||||||
for(my $i=1;$i<$#OPTIONS;$i+=2) {
|
for(my $i=1;$i<$#OPTIONS;$i+=2) {
|
||||||
$KNOWN{ $OPTIONS[$i] } = 1;
|
$KNOWN{ $OPTIONS[$i] } = 1;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
close(MODEL);
|
close(MODEL);
|
||||||
|
|
||||||
@ -49,6 +54,9 @@ while(<STDIN>) {
|
|||||||
$word = $$WORD[$i];
|
$word = $$WORD[$i];
|
||||||
$otherfactors = "";
|
$otherfactors = "";
|
||||||
}
|
}
|
||||||
|
if ($asr){
|
||||||
|
$word = lc($word); #make sure ASR output is not uc
|
||||||
|
}
|
||||||
|
|
||||||
if ($sentence_start && defined($BEST{lc($word)})) {
|
if ($sentence_start && defined($BEST{lc($word)})) {
|
||||||
print $BEST{lc($word)}; # truecase sentence start
|
print $BEST{lc($word)}; # truecase sentence start
|
||||||
|
Loading…
Reference in New Issue
Block a user