mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-08-17 23:40:50 +03:00
duplicated functionality with ems/support/lmplz-wrapper.perl
This commit is contained in:
parent
ab01d30687
commit
c15f3ef068
@ -1,41 +0,0 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
# Compatible with sri LM-creating script, eg.
|
||||
# ngram-count -order 5 -interpolate -wbdiscount -unk -text corpus.txt -lm lm.txt
|
||||
# To use it in the EMS, add this to the [LM] section
|
||||
# lm-training = "$moses-script-dir/generic/trainlm-lmplz.perl -lmplz $lmplz"
|
||||
# settings = "-T $working-dir/tmp -S 10G"
|
||||
# Also, make sure that $lmplz is defined (in the [LM] or [GENERAL] section.
|
||||
# It should point to the binary file
|
||||
# lmplz = /home/waziz/workspace/github/moses/bin/lmplz
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use FindBin qw($RealBin);
|
||||
use Getopt::Long qw/GetOptionsFromArray/;
|
||||
#use Getopt::Long;
|
||||
Getopt::Long::Configure("pass_through", "no_ignore_case");
|
||||
|
||||
my $order = 3; # order of language model (default trigram)
|
||||
my $corpus; # input text data
|
||||
my $lm; # generated language model
|
||||
my $lmplz; # bin directory of IRSTLM
|
||||
my $help = 0;
|
||||
|
||||
my @optconfig = (
|
||||
"-order=s" => \$order,
|
||||
"-text=s" => \$corpus,
|
||||
"-lm=s" => \$lm,
|
||||
"-lmplz=s" => \$lmplz,
|
||||
);
|
||||
|
||||
GetOptionsFromArray(\@ARGV, @optconfig);
|
||||
die("ERROR: please set text") unless defined($corpus);
|
||||
die("ERROR: please set lm") unless defined($lm);
|
||||
die("ERROR: please set lmplz") unless defined($lmplz);
|
||||
|
||||
my $settings = join(' ', @ARGV);
|
||||
my $cmd = "$lmplz --order $order $settings < $corpus > $lm";
|
||||
|
||||
print STDERR "EXECUTING $cmd\n";
|
||||
`$cmd`;
|
Loading…
Reference in New Issue
Block a user