mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-30 23:42:30 +03:00
Merge ../mosesdecoder into perf_moses2
This commit is contained in:
commit
83f2618514
@ -1,8 +1,8 @@
|
||||
#!/bin/bash
|
||||
# this script assumes that all 3rd-party dependencies are installed under ./opt
|
||||
# if not supplied otherwise, this script assumes that all 3rd-party dependencies are installed under ./opt
|
||||
# you can install all 3rd-party dependencies by running make -f contrib/Makefiles/install-dependencies.gmake
|
||||
|
||||
set -e -o pipefail
|
||||
opt=$(pwd)/opt
|
||||
./bjam --with-irstlm=$opt/irstlm-5.80.08 --with-boost=$opt --with-cmph=$opt --with-xmlrpc-c=$opt --with-mm --with-probing-pt -j$(getconf _NPROCESSORS_ONLN) $@
|
||||
OPT=${OPT:-$(pwd)/OPT}
|
||||
./bjam --with-irstlm=$OPT/irstlm-5.80.08 --with-boost=$OPT --with-cmph=$OPT --with-xmlrpc-c=$OPT --with-mm --with-probing-pt -j$(getconf _NPROCESSORS_ONLN) $@
|
||||
|
||||
|
@ -56,12 +56,12 @@ sourceforge = http://downloads.sourceforge.net/project
|
||||
nproc := $(shell getconf _NPROCESSORS_ONLN)
|
||||
sfget = mkdir -p '${TMP}' && cd '${TMP}' && wget -qO- ${URL} | tar xz
|
||||
configure-make-install = cd '$1' && ./configure --prefix='${PREFIX}'
|
||||
configure-make-install += && make -j$(getconf _NPROCESSORS_ONLN) && make install
|
||||
configure-make-install += && make -j${nproc} && make install
|
||||
|
||||
# XMLRPC-C for moses server
|
||||
xmlrpc: URL=$(sourceforge)/xmlrpc-c/Xmlrpc-c%20Super%20Stable/1.33.17/xmlrpc-c-1.33.17.tgz
|
||||
xmlrpc: TMP=$(CWD)/build/xmlrpc
|
||||
xmlrpc: PREFIX=${XMLRPC_PREFIX}
|
||||
xmlrpc: override PREFIX=${XMLRPC_PREFIX}
|
||||
xmlrpc: | $(call safepath,${XMLRPC_PREFIX}/bin/xmlrpc-c-config)
|
||||
$(call safepath,${XMLRPC_PREFIX}/bin/xmlrpc-c-config):
|
||||
$(sfget)
|
||||
@ -71,7 +71,7 @@ $(call safepath,${XMLRPC_PREFIX}/bin/xmlrpc-c-config):
|
||||
# CMPH for CompactPT
|
||||
cmph: URL=$(sourceforge)/cmph/cmph/cmph-2.0.tar.gz
|
||||
cmph: TMP=$(CWD)/build/cmph
|
||||
cmph: PREFIX=${CMPH_PREFIX}
|
||||
cmph: override PREFIX=${CMPH_PREFIX}
|
||||
cmph: | $(call safepath,${CMPH_PREFIX}/bin/cmph)
|
||||
$(call safepath,${CMPH_PREFIX}/bin/cmph):
|
||||
$(sfget)
|
||||
@ -82,20 +82,20 @@ $(call safepath,${CMPH_PREFIX}/bin/cmph):
|
||||
irstlm: URL=$(sourceforge)/irstlm/irstlm/irstlm-5.80/irstlm-5.80.08.tgz
|
||||
irstlm: TMP=$(CWD)/build/irstlm
|
||||
irstlm: VERSION=$(basename $(notdir $(irstlm_url)))
|
||||
irstlm: PREFIX=${IRSTLM_PREFIX}
|
||||
irstlm: override PREFIX=${IRSTLM_PREFIX}
|
||||
irstlm: | $(call safepath,$(IRSTLM_PREFIX)/bin/build-lm.sh)
|
||||
$(call safepath,$(IRSTLM_PREFIX)/bin/build-lm.sh):
|
||||
$(sfget)
|
||||
cd $$(find '${TMP}' -name trunk) && ./regenerate-makefiles.sh \
|
||||
&& ./configure --prefix='${PREFIX}' && make -j${shell getconf _NPROCESSORS_ONLN} && make install -j$(shell getconf _NPROCESSORS_ONLN)
|
||||
&& ./configure --prefix='${PREFIX}' && make -j${nproc} && make install -j${nproc}
|
||||
rm -rf ${TMP}
|
||||
|
||||
# boost
|
||||
boost: URL=http://sourceforge.net/projects/boost/files/boost/1.59.0/boost_1_59_0.tar.gz/download
|
||||
boost: TMP=$(CWD)/build/boost
|
||||
boost: PREFIX=${BOOST_PREFIX}
|
||||
boost: override PREFIX=${BOOST_PREFIX}
|
||||
boost: | $(call safepath,${BOOST_PREFIX}/include/boost)
|
||||
$(call safepath,${BOOST_PREFIX}/include/boost):
|
||||
$(sfget)
|
||||
cd '${TMP}/boost_1_59_0' && ./bootstrap.sh && ./b2 --prefix=${PREFIX} -j$(shell getconf _NPROCESSORS_ONLN) install
|
||||
cd '${TMP}/boost_1_59_0' && ./bootstrap.sh && ./b2 --prefix=${PREFIX} -j${nproc} install
|
||||
rm -rf ${TMP}
|
||||
|
@ -51,6 +51,12 @@ ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 4 100 2"
|
||||
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
|
||||
output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"
|
||||
|
||||
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
|
||||
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
|
||||
# "Farasa: A Fast and Furious Segmenter for Arabic"
|
||||
input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
|
||||
|
||||
# truecasers - comment out if you do not use the truecaser
|
||||
input-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
output-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
@ -389,7 +395,7 @@ alignment-symmetrization-method = grow-diag-final-and
|
||||
#
|
||||
#operation-sequence-model = "yes"
|
||||
#operation-sequence-model-order = 5
|
||||
#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% -T $working-dir/model/tmp'"
|
||||
#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% '"
|
||||
#
|
||||
# OR if you want to use with SRILM
|
||||
#
|
||||
|
@ -51,6 +51,11 @@ ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 4 100 2"
|
||||
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
|
||||
output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"
|
||||
|
||||
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
|
||||
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
|
||||
# "Farasa: A Fast and Furious Segmenter for Arabic"
|
||||
input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
|
||||
# truecasers - comment out if you do not use the truecaser
|
||||
input-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
output-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
|
@ -54,6 +54,11 @@ ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 4 100 2"
|
||||
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
|
||||
output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"
|
||||
|
||||
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
|
||||
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
|
||||
# "Farasa: A Fast and Furious Segmenter for Arabic"
|
||||
input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
|
||||
# truecasers - comment out if you do not use the truecaser
|
||||
input-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
output-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
|
@ -54,6 +54,11 @@ ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 4 100 2"
|
||||
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
|
||||
output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"
|
||||
|
||||
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
|
||||
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
|
||||
# "Farasa: A Fast and Furious Segmenter for Arabic"
|
||||
input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
|
||||
# truecasers - comment out if you do not use the truecaser
|
||||
input-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
output-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
|
@ -51,6 +51,11 @@ ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 4 100 2"
|
||||
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
|
||||
output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"
|
||||
|
||||
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
|
||||
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
|
||||
# "Farasa: A Fast and Furious Segmenter for Arabic"
|
||||
input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
|
||||
# truecasers - comment out if you do not use the truecaser
|
||||
input-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
output-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
|
@ -51,6 +51,11 @@ ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 4 100 2"
|
||||
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
|
||||
output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"
|
||||
|
||||
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
|
||||
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
|
||||
# "Farasa: A Fast and Furious Segmenter for Arabic"
|
||||
input-tokenizer = "$farasa-dir/farasa_moses.sh"
|
||||
|
||||
# truecasers - comment out if you do not use the truecaser
|
||||
input-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
output-truecaser = $moses-script-dir/recaser/truecase.perl
|
||||
|
@ -59,7 +59,7 @@ my $help=0;
|
||||
my $dbg=0;
|
||||
my $jobs=4;
|
||||
my $cache_model=undef;
|
||||
my $mosescmd="$ENV{MOSESBIN}/moses"; #decoder in use
|
||||
my $mosescmd="$ENV{MOSESBIN}/moses" if defined $ENV{"MOSESBIN"}; #decoder in use
|
||||
my $inputlist=undef;
|
||||
my $inputfile=undef;
|
||||
my $inputtype=0;
|
||||
@ -276,7 +276,7 @@ sub getNbestParameters(){
|
||||
#get parameters for search graph computation (possibly from configuration file)
|
||||
sub getSearchGraphParameters(){
|
||||
if (!$searchgraphlist){
|
||||
open (CFG, "$cfgfile");
|
||||
open (CFG, $cfgfile) or die "Can't read '$cfgfile'";
|
||||
while (chomp($_=<CFG>)){
|
||||
if (/^\[output-search-graph\]/ || /^\[osg\]/){
|
||||
my $tmp;
|
||||
@ -299,7 +299,7 @@ sub getSearchGraphParameters(){
|
||||
#get parameters for word graph computation (possibly from configuration file)
|
||||
sub getWordGraphParameters(){
|
||||
if (!$wordgraphlist){
|
||||
open (CFG, "$cfgfile");
|
||||
open (CFG, $cfgfile) or die "Can't read '$cfgfile'";
|
||||
while (chomp($_=<CFG>)){
|
||||
if (/^\[output-word-graph\]/ || /^\[owg\]/){
|
||||
my $tmp;
|
||||
@ -843,12 +843,14 @@ sub concatenate_nbest(){
|
||||
|
||||
#computing the length of each input file
|
||||
my @in=();
|
||||
open (IN, "${inputfile}.${splitpfx}${idx}.trans");
|
||||
open (IN, "${inputfile}.${splitpfx}${idx}.trans")
|
||||
or die "Failed to open '${inputfile}.${splitpfx}${idx}.trans'";
|
||||
@in=<IN>;
|
||||
close(IN);
|
||||
$inplength{$idx} = scalar(@in);
|
||||
|
||||
open (IN, "${nbestfile}.${splitpfx}${idx}");
|
||||
open (IN, "${nbestfile}.${splitpfx}${idx}")
|
||||
or die "Failed to open '${nbestfile}.${splitpfx}${idx}'";
|
||||
while (<IN>){
|
||||
my ($code,@extra)=split(/\|\|\|/,$_);
|
||||
$code += $offset;
|
||||
@ -1078,7 +1080,7 @@ sub safesystem {
|
||||
sub getPwdCmd(){
|
||||
my $pwdcmd="pwd";
|
||||
my $a;
|
||||
chomp($a=`which pawd 2> /dev/null | head -1 | awk '{print $1}'`);
|
||||
chomp($a=`which pawd 2> /dev/null | head -1 | awk '{print \$1}'`);
|
||||
if ($a && -e $a){ $pwdcmd=$a; }
|
||||
return $pwdcmd;
|
||||
}
|
||||
|
@ -252,7 +252,7 @@ sub safesystem {
|
||||
sub getPwdCmd(){
|
||||
my $pwdcmd="pwd";
|
||||
my $a;
|
||||
chomp($a=`which pawd 2> /dev/null | head -1 | awk '{print $1}'`);
|
||||
chomp($a=`which pawd 2> /dev/null | head -1 | awk '{print \$1}'`);
|
||||
if ($a && -e $a){ $pwdcmd=$a; }
|
||||
return $pwdcmd;
|
||||
}
|
||||
|
@ -123,7 +123,7 @@ sub clone_file_or_die {
|
||||
my $src = shift;
|
||||
my $tgt = shift;
|
||||
|
||||
my $src = resolve($src); # resolve symlinks
|
||||
$src = resolve($src); # resolve symlinks
|
||||
|
||||
my $ok = 0;
|
||||
if ($symlink) {
|
||||
|
Loading…
Reference in New Issue
Block a user