Merge ../mosesdecoder into perf_moses2

This commit is contained in:
Hieu Hoang 2016-05-05 10:56:15 +01:00
commit 83f2618514
11 changed files with 52 additions and 19 deletions

View File

@ -1,8 +1,8 @@
#!/bin/bash
# this script assumes that all 3rd-party dependencies are installed under ./opt
# if not supplied otherwise, this script assumes that all 3rd-party dependencies are installed under ./opt
# you can install all 3rd-party dependencies by running make -f contrib/Makefiles/install-dependencies.gmake
set -e -o pipefail
opt=$(pwd)/opt
./bjam --with-irstlm=$opt/irstlm-5.80.08 --with-boost=$opt --with-cmph=$opt --with-xmlrpc-c=$opt --with-mm --with-probing-pt -j$(getconf _NPROCESSORS_ONLN) $@
OPT=${OPT:-$(pwd)/OPT}
./bjam --with-irstlm=$OPT/irstlm-5.80.08 --with-boost=$OPT --with-cmph=$OPT --with-xmlrpc-c=$OPT --with-mm --with-probing-pt -j$(getconf _NPROCESSORS_ONLN) $@

View File

@ -56,12 +56,12 @@ sourceforge = http://downloads.sourceforge.net/project
nproc := $(shell getconf _NPROCESSORS_ONLN)
sfget = mkdir -p '${TMP}' && cd '${TMP}' && wget -qO- ${URL} | tar xz
configure-make-install = cd '$1' && ./configure --prefix='${PREFIX}'
configure-make-install += && make -j$(getconf _NPROCESSORS_ONLN) && make install
configure-make-install += && make -j${nproc} && make install
# XMLRPC-C for moses server
xmlrpc: URL=$(sourceforge)/xmlrpc-c/Xmlrpc-c%20Super%20Stable/1.33.17/xmlrpc-c-1.33.17.tgz
xmlrpc: TMP=$(CWD)/build/xmlrpc
xmlrpc: PREFIX=${XMLRPC_PREFIX}
xmlrpc: override PREFIX=${XMLRPC_PREFIX}
xmlrpc: | $(call safepath,${XMLRPC_PREFIX}/bin/xmlrpc-c-config)
$(call safepath,${XMLRPC_PREFIX}/bin/xmlrpc-c-config):
$(sfget)
@ -71,7 +71,7 @@ $(call safepath,${XMLRPC_PREFIX}/bin/xmlrpc-c-config):
# CMPH for CompactPT
cmph: URL=$(sourceforge)/cmph/cmph/cmph-2.0.tar.gz
cmph: TMP=$(CWD)/build/cmph
cmph: PREFIX=${CMPH_PREFIX}
cmph: override PREFIX=${CMPH_PREFIX}
cmph: | $(call safepath,${CMPH_PREFIX}/bin/cmph)
$(call safepath,${CMPH_PREFIX}/bin/cmph):
$(sfget)
@ -82,20 +82,20 @@ $(call safepath,${CMPH_PREFIX}/bin/cmph):
irstlm: URL=$(sourceforge)/irstlm/irstlm/irstlm-5.80/irstlm-5.80.08.tgz
irstlm: TMP=$(CWD)/build/irstlm
irstlm: VERSION=$(basename $(notdir $(irstlm_url)))
irstlm: PREFIX=${IRSTLM_PREFIX}
irstlm: override PREFIX=${IRSTLM_PREFIX}
irstlm: | $(call safepath,$(IRSTLM_PREFIX)/bin/build-lm.sh)
$(call safepath,$(IRSTLM_PREFIX)/bin/build-lm.sh):
$(sfget)
cd $$(find '${TMP}' -name trunk) && ./regenerate-makefiles.sh \
&& ./configure --prefix='${PREFIX}' && make -j${shell getconf _NPROCESSORS_ONLN} && make install -j$(shell getconf _NPROCESSORS_ONLN)
&& ./configure --prefix='${PREFIX}' && make -j${nproc} && make install -j${nproc}
rm -rf ${TMP}
# boost
boost: URL=http://sourceforge.net/projects/boost/files/boost/1.59.0/boost_1_59_0.tar.gz/download
boost: TMP=$(CWD)/build/boost
boost: PREFIX=${BOOST_PREFIX}
boost: override PREFIX=${BOOST_PREFIX}
boost: | $(call safepath,${BOOST_PREFIX}/include/boost)
$(call safepath,${BOOST_PREFIX}/include/boost):
$(sfget)
cd '${TMP}/boost_1_59_0' && ./bootstrap.sh && ./b2 --prefix=${PREFIX} -j$(shell getconf _NPROCESSORS_ONLN) install
cd '${TMP}/boost_1_59_0' && ./bootstrap.sh && ./b2 --prefix=${PREFIX} -j${nproc} install
rm -rf ${TMP}

View File

@ -51,6 +51,12 @@ ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 4 100 2"
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
# "Farasa: A Fast and Furious Segmenter for Arabic"
input-tokenizer = "$farasa-dir/farasa_moses.sh"
# truecasers - comment out if you do not use the truecaser
input-truecaser = $moses-script-dir/recaser/truecase.perl
output-truecaser = $moses-script-dir/recaser/truecase.perl
@ -389,7 +395,7 @@ alignment-symmetrization-method = grow-diag-final-and
#
#operation-sequence-model = "yes"
#operation-sequence-model-order = 5
#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% -T $working-dir/model/tmp'"
#operation-sequence-model-settings = "-lmplz '$moses-src-dir/bin/lmplz -S 40% '"
#
# OR if you want to use with SRILM
#

View File

@ -51,6 +51,11 @@ ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 4 100 2"
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
# "Farasa: A Fast and Furious Segmenter for Arabic"
input-tokenizer = "$farasa-dir/farasa_moses.sh"
# truecasers - comment out if you do not use the truecaser
input-truecaser = $moses-script-dir/recaser/truecase.perl
output-truecaser = $moses-script-dir/recaser/truecase.perl

View File

@ -54,6 +54,11 @@ ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 4 100 2"
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
# "Farasa: A Fast and Furious Segmenter for Arabic"
input-tokenizer = "$farasa-dir/farasa_moses.sh"
# truecasers - comment out if you do not use the truecaser
input-truecaser = $moses-script-dir/recaser/truecase.perl
output-truecaser = $moses-script-dir/recaser/truecase.perl

View File

@ -54,6 +54,11 @@ ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 4 100 2"
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
# "Farasa: A Fast and Furious Segmenter for Arabic"
input-tokenizer = "$farasa-dir/farasa_moses.sh"
# truecasers - comment out if you do not use the truecaser
input-truecaser = $moses-script-dir/recaser/truecase.perl
output-truecaser = $moses-script-dir/recaser/truecase.perl

View File

@ -51,6 +51,11 @@ ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 4 100 2"
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
# "Farasa: A Fast and Furious Segmenter for Arabic"
input-tokenizer = "$farasa-dir/farasa_moses.sh"
# truecasers - comment out if you do not use the truecaser
input-truecaser = $moses-script-dir/recaser/truecase.perl
output-truecaser = $moses-script-dir/recaser/truecase.perl

View File

@ -51,6 +51,11 @@ ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 4 100 2"
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
output-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $output-extension"
# For Arabic tokenizer try Farasa (download: http://qatsdemo.cloudapp.net/farasa/)
# Abdelali, Darwish, Durrani, Mubarak (NAACL demo 2016)
# "Farasa: A Fast and Furious Segmenter for Arabic"
input-tokenizer = "$farasa-dir/farasa_moses.sh"
# truecasers - comment out if you do not use the truecaser
input-truecaser = $moses-script-dir/recaser/truecase.perl
output-truecaser = $moses-script-dir/recaser/truecase.perl

View File

@ -59,7 +59,7 @@ my $help=0;
my $dbg=0;
my $jobs=4;
my $cache_model=undef;
my $mosescmd="$ENV{MOSESBIN}/moses"; #decoder in use
my $mosescmd="$ENV{MOSESBIN}/moses" if defined $ENV{"MOSESBIN"}; #decoder in use
my $inputlist=undef;
my $inputfile=undef;
my $inputtype=0;
@ -276,7 +276,7 @@ sub getNbestParameters(){
#get parameters for search graph computation (possibly from configuration file)
sub getSearchGraphParameters(){
if (!$searchgraphlist){
open (CFG, "$cfgfile");
open (CFG, $cfgfile) or die "Can't read '$cfgfile'";
while (chomp($_=<CFG>)){
if (/^\[output-search-graph\]/ || /^\[osg\]/){
my $tmp;
@ -299,7 +299,7 @@ sub getSearchGraphParameters(){
#get parameters for word graph computation (possibly from configuration file)
sub getWordGraphParameters(){
if (!$wordgraphlist){
open (CFG, "$cfgfile");
open (CFG, $cfgfile) or die "Can't read '$cfgfile'";
while (chomp($_=<CFG>)){
if (/^\[output-word-graph\]/ || /^\[owg\]/){
my $tmp;
@ -843,12 +843,14 @@ sub concatenate_nbest(){
#computing the length of each input file
my @in=();
open (IN, "${inputfile}.${splitpfx}${idx}.trans");
open (IN, "${inputfile}.${splitpfx}${idx}.trans")
or die "Failed to open '${inputfile}.${splitpfx}${idx}.trans'";
@in=<IN>;
close(IN);
$inplength{$idx} = scalar(@in);
open (IN, "${nbestfile}.${splitpfx}${idx}");
open (IN, "${nbestfile}.${splitpfx}${idx}")
or die "Failed to open '${nbestfile}.${splitpfx}${idx}'";
while (<IN>){
my ($code,@extra)=split(/\|\|\|/,$_);
$code += $offset;
@ -1078,7 +1080,7 @@ sub safesystem {
sub getPwdCmd(){
my $pwdcmd="pwd";
my $a;
chomp($a=`which pawd 2> /dev/null | head -1 | awk '{print $1}'`);
chomp($a=`which pawd 2> /dev/null | head -1 | awk '{print \$1}'`);
if ($a && -e $a){ $pwdcmd=$a; }
return $pwdcmd;
}

View File

@ -252,7 +252,7 @@ sub safesystem {
sub getPwdCmd(){
my $pwdcmd="pwd";
my $a;
chomp($a=`which pawd 2> /dev/null | head -1 | awk '{print $1}'`);
chomp($a=`which pawd 2> /dev/null | head -1 | awk '{print \$1}'`);
if ($a && -e $a){ $pwdcmd=$a; }
return $pwdcmd;
}

View File

@ -123,7 +123,7 @@ sub clone_file_or_die {
my $src = shift;
my $tgt = shift;
my $src = resolve($src); # resolve symlinks
$src = resolve($src); # resolve symlinks
my $ok = 0;
if ($symlink) {