From d66d4c2d5f91de8318ee921cdc7d24bb24c00e10 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 25 Nov 2011 17:41:30 +0000 Subject: [PATCH 1/6] Install scripts iff --install-scripts is passed --- scripts/Jamfile | 230 ++++++++++++++++++++++++------------------------ 1 file changed, 117 insertions(+), 113 deletions(-) diff --git a/scripts/Jamfile b/scripts/Jamfile index 8ca5df8ae..fb97fffe5 100644 --- a/scripts/Jamfile +++ b/scripts/Jamfile @@ -3,17 +3,6 @@ import option ; build-project training ; -location = [ option.get "install-scripts" : $(TOP)/scripts/dist ] ; -location = $(location)$(GITTAG) ; - -#These two used to live in a tools directory. -install ghkm : training/phrase-extract/extract-ghkm//extract-ghkm : $(location)/training/phrase-extract/extract-ghkm/tools ; -install compactify : training/compact-rule-table//compactify : $(location)/training/compact-rule-table/tools ; - -install phrase-extract : training/phrase-extract//released-programs : $(location)/training/phrase-extract ; -install lexical-reordering : training/lexical-reordering//score : $(location)/training/lexical-reordering ; -install symal : training/symal//symal : $(location)/symal ; - with-giza = [ option.get "with-giza" ] ; if $(with-giza) { rule check-for-bin ( name ) { @@ -30,111 +19,126 @@ if $(with-giza) { check-for-bin GIZA++ ; check-for-bin snt2cooc.out ; check-for-bin mkcls ; - - install train-model : training//train-model.perl : $(location)/training ; } else { echo "If you want scripts/training/train-model.perl, pass --with-giza=/path/to/giza" ; constant WITH-GIZA : "no" ; - alias train-model ; } -install scripts : - analysis/README - analysis/sentence-by-sentence.pl - ems/experiment.machines - ems/experiment.meta - ems/experiment.perl - ems/example/config.basic - ems/example/config.factored - ems/example/config.hierarchical - ems/example/config.syntax - ems/example/config.toy - ems/example/data/nc-5k.en - ems/example/data/nc-5k.fr - ems/example/data/test-ref.en.sgm - ems/example/data/test-src.fr.sgm - ems/support/analysis.perl - ems/support/berkeley-process.sh - ems/support/berkeley-train.sh - ems/support/consolidate-training-data.perl - ems/support/generic-multicore-parallelizer.perl - ems/support/generic-parallelizer.perl - ems/support/input-from-sgm.perl - ems/support/interpolate-lm.perl - ems/support/reference-from-sgm.perl - ems/support/remove-segmenation-markup.perl - ems/support/report-experiment-scores.perl - ems/support/reuse-weights.perl - ems/support/run-command-on-multiple-refsets.perl - ems/support/wrap-xml.perl - ems/web/analysis.php - ems/web/analysis_diff.php - ems/web/comment.php - ems/web/diff.php - ems/web/index.php - ems/web/lib.php - ems/web/overview.php - ems/web/setup - ems/web/javascripts/builder.js - ems/web/javascripts/controls.js - ems/web/javascripts/dragdrop.js - ems/web/javascripts/effects.js - ems/web/javascripts/prototype.js - ems/web/javascripts/scriptaculous.js - ems/web/javascripts/slider.js - ems/web/javascripts/sound.js - ems/web/javascripts/unittest.js - generic/compound-splitter.perl - generic/extract-factors.pl - generic/lopar2pos.pl - generic/moses-parallel.pl - generic/mteval-v12.pl - generic/multi-bleu.perl - generic/qsub-wrapper.pl - README - tokenizer/detokenizer.perl - tokenizer/tokenizer.perl - tokenizer/lowercase.perl - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ru - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ro - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.de - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.fr - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.el - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.is - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.pt - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.nl - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.it - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sl - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sk - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.pl - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sv - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.es - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.en - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ca - training/absolutize_moses_model.pl - training/build-generation-table.perl - training/clean-corpus-n.perl - training/clone_moses_model.pl - training/filter-model-given-input.pl - training/filter-rule-table.py - training/zmert-moses.pl - training/mert-moses.pl - training/mert-moses-multi.pl - training/postprocess-lopar.perl - training/reduce_combine.pl - training/combine_factors.pl - training/symal/giza2bal.pl - training/wrappers/parse-de-bitpar.perl - training/wrappers/parse-en-collins.perl - training/wrappers/make-factor-en-pos.mxpost.perl - training/wrappers/make-factor-pos.tree-tagger.perl - training/wrappers/make-factor-stem.perl - recaser/train-recaser.perl - recaser/recase.perl - recaser/truecase.perl - recaser/detruecase.perl - recaser/train-truecaser.perl - : . $(location) ; +location = [ option.get "install-scripts" ] ; +if $(location) { + location = $(location)$(GITTAG) ; -alias install : ghkm compactify phrase-extract lexical-reordering symal scripts train-model ; -explicit install ghkm compactify phrase-extract lexical-reordering symal scripts train-model ; + #These two used to live in a tools directory. + install ghkm : training/phrase-extract/extract-ghkm//extract-ghkm : $(location)/training/phrase-extract/extract-ghkm/tools ; + install compactify : training/compact-rule-table//compactify : $(location)/training/compact-rule-table/tools ; + + install phrase-extract : training/phrase-extract//released-programs : $(location)/training/phrase-extract ; + install lexical-reordering : training/lexical-reordering//score : $(location)/training/lexical-reordering ; + install symal : training/symal//symal : $(location)/symal ; + + if $(WITH-GIZA) != no { + install train-model : training//train-model.perl : $(location)/training ; + } else { + alias train-model ; + } + + install scripts : + analysis/README + analysis/sentence-by-sentence.pl + ems/experiment.machines + ems/experiment.meta + ems/experiment.perl + ems/example/config.basic + ems/example/config.factored + ems/example/config.hierarchical + ems/example/config.syntax + ems/example/config.toy + ems/example/data/nc-5k.en + ems/example/data/nc-5k.fr + ems/example/data/test-ref.en.sgm + ems/example/data/test-src.fr.sgm + ems/support/analysis.perl + ems/support/berkeley-process.sh + ems/support/berkeley-train.sh + ems/support/consolidate-training-data.perl + ems/support/generic-multicore-parallelizer.perl + ems/support/generic-parallelizer.perl + ems/support/input-from-sgm.perl + ems/support/interpolate-lm.perl + ems/support/reference-from-sgm.perl + ems/support/remove-segmenation-markup.perl + ems/support/report-experiment-scores.perl + ems/support/reuse-weights.perl + ems/support/run-command-on-multiple-refsets.perl + ems/support/wrap-xml.perl + ems/web/analysis.php + ems/web/analysis_diff.php + ems/web/comment.php + ems/web/diff.php + ems/web/index.php + ems/web/lib.php + ems/web/overview.php + ems/web/setup + ems/web/javascripts/builder.js + ems/web/javascripts/controls.js + ems/web/javascripts/dragdrop.js + ems/web/javascripts/effects.js + ems/web/javascripts/prototype.js + ems/web/javascripts/scriptaculous.js + ems/web/javascripts/slider.js + ems/web/javascripts/sound.js + ems/web/javascripts/unittest.js + generic/compound-splitter.perl + generic/extract-factors.pl + generic/lopar2pos.pl + generic/moses-parallel.pl + generic/mteval-v12.pl + generic/multi-bleu.perl + generic/qsub-wrapper.pl + README + tokenizer/detokenizer.perl + tokenizer/tokenizer.perl + tokenizer/lowercase.perl + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ru + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ro + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.de + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.fr + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.el + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.is + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.pt + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.nl + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.it + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sl + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sk + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.pl + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sv + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.es + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.en + tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ca + training/absolutize_moses_model.pl + training/build-generation-table.perl + training/clean-corpus-n.perl + training/clone_moses_model.pl + training/filter-model-given-input.pl + training/filter-rule-table.py + training/zmert-moses.pl + training/mert-moses.pl + training/mert-moses-multi.pl + training/postprocess-lopar.perl + training/reduce_combine.pl + training/combine_factors.pl + training/symal/giza2bal.pl + training/wrappers/parse-de-bitpar.perl + training/wrappers/parse-en-collins.perl + training/wrappers/make-factor-en-pos.mxpost.perl + training/wrappers/make-factor-pos.tree-tagger.perl + training/wrappers/make-factor-stem.perl + recaser/train-recaser.perl + recaser/recase.perl + recaser/truecase.perl + recaser/detruecase.perl + recaser/train-truecaser.perl + : . $(location) ; + + alias install : ghkm compactify phrase-extract lexical-reordering symal scripts train-model ; +} From 1b9fcb2eccec3019f872f97ce6ea6d327b00369c Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 25 Nov 2011 17:46:48 +0000 Subject: [PATCH 2/6] Globs are good --- scripts/Jamfile | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/scripts/Jamfile b/scripts/Jamfile index fb97fffe5..5b474b225 100644 --- a/scripts/Jamfile +++ b/scripts/Jamfile @@ -96,25 +96,7 @@ if $(location) { generic/multi-bleu.perl generic/qsub-wrapper.pl README - tokenizer/detokenizer.perl - tokenizer/tokenizer.perl - tokenizer/lowercase.perl - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ru - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ro - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.de - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.fr - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.el - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.is - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.pt - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.nl - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.it - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sl - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sk - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.pl - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sv - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.es - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.en - tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ca + [ glob tokenizer/*.perl tokenizer/nonbreaking_prefixes/* ] training/absolutize_moses_model.pl training/build-generation-table.perl training/clean-corpus-n.perl @@ -133,11 +115,7 @@ if $(location) { training/wrappers/make-factor-en-pos.mxpost.perl training/wrappers/make-factor-pos.tree-tagger.perl training/wrappers/make-factor-stem.perl - recaser/train-recaser.perl - recaser/recase.perl - recaser/truecase.perl - recaser/detruecase.perl - recaser/train-truecaser.perl + [ glob recaser/*.perl ] : . $(location) ; alias install : ghkm compactify phrase-extract lexical-reordering symal scripts train-model ; From ab7469ee049e21d8981c5965d85f05e93fa137b9 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 25 Nov 2011 18:09:07 +0000 Subject: [PATCH 3/6] Minor commenting --- Jamroot | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Jamroot b/Jamroot index ff9de6e59..8b66e2f18 100644 --- a/Jamroot +++ b/Jamroot @@ -1,6 +1,5 @@ #BUILDING MOSES # -# #PACKAGES #Language models (optional): #--with-irstlm=/path/to/irstlm @@ -41,7 +40,7 @@ # debug-symbols=on|off include (default) or exclude debugging # information also known as -g # -#--notrace compiles without TRACE macros +# --notrace compiles without TRACE macros # # #CONTROLLING THE BUILD From 968bcc2b9c408c5c1d677bb46602b8bab66c603d Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 25 Nov 2011 18:14:14 +0000 Subject: [PATCH 4/6] Remove build instructions about scripts --- BUILD-INSTRUCTIONS.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/BUILD-INSTRUCTIONS.txt b/BUILD-INSTRUCTIONS.txt index 0cbdfa4cc..b8c91ea2a 100644 --- a/BUILD-INSTRUCTIONS.txt +++ b/BUILD-INSTRUCTIONS.txt @@ -18,11 +18,6 @@ http://hlt.fbk.eu/en/irstlm Ken's LM is included with the Moses distribution. -IMPORTANT: These instructions are for building the moses decoder ONLY, -the training and tuning SCRIPTS contained in scripts/ must be built -and installed separately. Also, they may require modification to -work in certain environments. - -------------------------------------------------------------------------- 1) Instructions for building with SRILM From 0ad9269b0f3b8a4bc6bd460a9f113e4db65d95f7 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 25 Nov 2011 18:18:16 +0000 Subject: [PATCH 5/6] Delete scripts from 2006 that nobody updated to use git --- validate_more_revisions.sh | 40 ------------------------------ validate_revision.sh | 50 -------------------------------------- 2 files changed, 90 deletions(-) delete mode 100755 validate_more_revisions.sh delete mode 100755 validate_revision.sh diff --git a/validate_more_revisions.sh b/validate_more_revisions.sh deleted file mode 100755 index 86a914dbe..000000000 --- a/validate_more_revisions.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -from=$1 -to=$2 - -logdir=./revision_status_log - -if [ "$from" == "" ] || [ "$to" == "" ]; then - cat < - will run ./validate_revision.sh for a sequence of revisions and - collect logfiles to "$logdir" -KONEC - exit 1 -fi - - -mkdir -p $logdir - -tmpdir=/tmp/validate-more-revisions-tmp -if [ -e $tmpdir ]; then - echo "$0 seems to be already running!" - echo "If this is a false alarm, remove our temp directory:" - echo " rm -rf $tmpdir" - exit 1; -fi - -mkdir -p $tmpdir - -# need to save a copy of the helper script validate_revision.sh, -# because previous releases might have missed it -cp ./validate_revision.sh $tmpdir/ - -for i in `seq $from $to`; do - echo "Validating $i..."; - $tmpdir/validate_revision.sh $i > $logdir/$i.log 2>&1 - tail -1 $logdir/$i.log | sed 's/^/ /' -done -rm -rf $tmpdir -echo "Finished validating, now at revision $to" diff --git a/validate_revision.sh b/validate_revision.sh deleted file mode 100755 index a29eeed7c..000000000 --- a/validate_revision.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash - -rev="$1" - -if [ "$rev" == "" ]; then - cat << KONEC -./validate_revision.sh - This will check, if the given revision was compilable (using irstlm). - These tasks will be performed: - svn update -r - compile and install irstlm to a temp directory - compile moses with irstlm - delete the temp directory -KONEC - exit 1; -fi - -tempdir=/tmp/validatemoses - -function die() { - rm -rf $tempdir - echo "$@" - exit 1 -} - -if svn status | grep '^[^\?]'; then - die "Will not go to a different revision, please synchronize with a revision in repository first" -fi - -svn up -r $rev || die "Failed to update to rev. $rev" -# dump the information -svn info - -./regenerate-makefiles.sh || die "Failed to regenerate makefiles in mosesdecoder" - - -cd irstlm || die "Failed to chdir to irstlm" -./regenerate-makefiles.sh || die "Failed to regenerate makefiles in irstlm" -./configure --prefix=$tempdir/irstlm || die "Failed to configure irstlm" -make clean || die "Failed to clean irstlm" -make || die "Failed to compile irstlm" -make install || die "Failed to install irstlm" -cd .. - -./configure --with-irstlm=$tempdir/irstlm || die "Failed to configure moses" -make clean || die "Failed to clean moses" -make || die "Failed to compile moses" - -rm -rf $tempdir || die "Failed to remove tempdir $tempdir" -echo "Moses successfully compiled" From a7222a322d32f21bb851002140b74848df4f9f16 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 25 Nov 2011 22:14:04 +0000 Subject: [PATCH 6/6] Regression tests expect mert in mert/ --- Jamroot | 1 + mert/Jamfile | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Jamroot b/Jamroot index 8b66e2f18..ff6651156 100644 --- a/Jamroot +++ b/Jamroot @@ -144,6 +144,7 @@ project : requirements build-project lm ; build-project util ; #Trigger instllation into legacy paths. +build-project mert ; build-project moses-cmd/src ; build-project moses-chart-cmd/src ; #Scripts have their own binaries diff --git a/mert/Jamfile b/mert/Jamfile index 2a5aff1ce..75cd02200 100644 --- a/mert/Jamfile +++ b/mert/Jamfile @@ -40,3 +40,5 @@ exe evaluator : evaluator.cpp mert_lib ; exe pro : pro.cpp mert_lib ..//boost_program_options ; alias programs : mert extractor evaluator pro ; + +install legacy : programs : . ;