Fix errors from multiline `` commands in transliteration Perl scripts

Replace the backslash-newline sequence with backslash-backslash-newline in
multiline backquote command strings.  i.e. replace expressions like this:

  `some-command \
    -option1 \
    -option2`;

with ones like this

  `some-command \\
    -option1 \\
    -option2`;

If I understand this right, the shell converts a backslash-newline sequence
to an empty string (i.e. it discards it), but Perl does not.  Unless the
backslash itself is escaped, using a backslash-newline in a Perl command
string results in errors in most instances.  By escaping the backslash, it
gets passed through to the shell where it is interpreted as intended.
This commit is contained in:
Phil Williams 2015-07-16 14:54:00 +01:00
parent 43300459b3
commit c83628a92b
3 changed files with 121 additions and 121 deletions

View File

@ -137,38 +137,38 @@ sub run_transliteration
print "Filter Table\n"; print "Filter Table\n";
`$MOSES_SRC/scripts/training/train-model.perl \ `$MOSES_SRC/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -first-step 9 \ -mgiza -mgiza-cpus 10 -dont-zip -first-step 9 \\
-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \ -external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \ -e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \\
-score-options '--KneserNey' \ -score-options '--KneserNey' \\
-phrase-translation-table $TRANSLIT_MODEL/model/phrase-table \ -phrase-translation-table $TRANSLIT_MODEL/model/phrase-table \\
-config $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini \ -config $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini \\
-lm 0:3:$TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini:8`; -lm 0:3:$TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini:8`;
`$MOSES_SRC/scripts/training/filter-model-given-input.pl \ `$MOSES_SRC/scripts/training/filter-model-given-input.pl \\
$TRANSLIT_MODEL/evaluation/$eval_file.filtered \ $TRANSLIT_MODEL/evaluation/$eval_file.filtered \\
$TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini \ $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini \\
$TRANSLIT_MODEL/evaluation/$eval_file \ $TRANSLIT_MODEL/evaluation/$eval_file \\
-Binarizer "$MOSES_SRC/bin/CreateOnDiskPt 1 1 4 100 2"`; -Binarizer "$MOSES_SRC/bin/CreateOnDiskPt 1 1 4 100 2"`;
`rm $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini`; `rm $TRANSLIT_MODEL/evaluation/$eval_file.moses.table.ini`;
print "Apply Filter\n"; print "Apply Filter\n";
`$MOSES_SRC/scripts/ems/support/substitute-filtered-tables-and-weights.perl \ `$MOSES_SRC/scripts/ems/support/substitute-filtered-tables-and-weights.perl \\
$TRANSLIT_MODEL/evaluation/$eval_file.filtered/moses.ini \ $TRANSLIT_MODEL/evaluation/$eval_file.filtered/moses.ini \\
$TRANSLIT_MODEL/model/moses.ini \ $TRANSLIT_MODEL/model/moses.ini \\
$TRANSLIT_MODEL/tuning/moses.tuned.ini \ $TRANSLIT_MODEL/tuning/moses.tuned.ini \\
$TRANSLIT_MODEL/evaluation/$eval_file.filtered.ini`; $TRANSLIT_MODEL/evaluation/$eval_file.filtered.ini`;
my $drop_stderr = $VERBOSE ? "" : " 2>/dev/null"; my $drop_stderr = $VERBOSE ? "" : " 2>/dev/null";
`$DECODER \ `$DECODER \\
-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 \ -search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 \\
-threads 16 -drop-unknown -distortion-limit 0 \ -threads 16 -drop-unknown -distortion-limit 0 \\
-n-best-list $TRANSLIT_MODEL/evaluation/$eval_file.op.nBest 1000 \ -n-best-list $TRANSLIT_MODEL/evaluation/$eval_file.op.nBest 1000 \\
distinct -f $TRANSLIT_MODEL/evaluation/$eval_file.filtered.ini \ distinct -f $TRANSLIT_MODEL/evaluation/$eval_file.filtered.ini \\
< $TRANSLIT_MODEL/evaluation/$eval_file \ < $TRANSLIT_MODEL/evaluation/$eval_file \\
> $TRANSLIT_MODEL/evaluation/$eval_file.op $drop_stderr`; > $TRANSLIT_MODEL/evaluation/$eval_file.op $drop_stderr`;
} }
@ -315,52 +315,52 @@ sub run_decoder
`mkdir $corpus_dir/evaluation`; `mkdir $corpus_dir/evaluation`;
`$MOSES_SRC/scripts/training/train-model.perl \ `$MOSES_SRC/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -first-step 9 \ -mgiza -mgiza-cpus 10 -dont-zip -first-step 9 \\
-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \ -external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \ -e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \\
-lmodel-oov-feature "yes" -post-decoding-translit "yes" \ -lmodel-oov-feature "yes" -post-decoding-translit "yes" \\
-phrase-translation-table $corpus_dir/model/phrase-table \ -phrase-translation-table $corpus_dir/model/phrase-table \\
-config $corpus_dir/model/moses.ini -lm 0:5:$LM_FILE:8`; -config $corpus_dir/model/moses.ini -lm 0:5:$LM_FILE:8`;
`touch $corpus_dir/evaluation/$OUTPUT_FILE_NAME.moses.table.ini`; `touch $corpus_dir/evaluation/$OUTPUT_FILE_NAME.moses.table.ini`;
`$MOSES_SRC/scripts/training/train-model.perl \ `$MOSES_SRC/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -first-step 9 \ -mgiza -mgiza-cpus 10 -dont-zip -first-step 9 \\
-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \ -external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \ -e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \\
-lmodel-oov-feature "yes" -post-decoding-translit "yes" \ -lmodel-oov-feature "yes" -post-decoding-translit "yes" \\
-phrase-translation-table $corpus_dir/model/phrase-table \ -phrase-translation-table $corpus_dir/model/phrase-table \\
-config $corpus_dir/evaluation/$OUTPUT_FILE_NAME.moses.table.ini \ -config $corpus_dir/evaluation/$OUTPUT_FILE_NAME.moses.table.ini \\
-lm 0:3:$corpus_dir/evaluation/$OUTPUT_FILE_NAME.moses.table.ini:8`; -lm 0:3:$corpus_dir/evaluation/$OUTPUT_FILE_NAME.moses.table.ini:8`;
`$MOSES_SRC/scripts/training/filter-model-given-input.pl \ `$MOSES_SRC/scripts/training/filter-model-given-input.pl \\
$corpus_dir/evaluation/filtered \ $corpus_dir/evaluation/filtered \\
$corpus_dir/evaluation/$OUTPUT_FILE_NAME.moses.table.ini \ $corpus_dir/evaluation/$OUTPUT_FILE_NAME.moses.table.ini \\
$INPUT_FILE -Binarizer "$MOSES_SRC/bin/CreateOnDiskPt \ $INPUT_FILE -Binarizer "$MOSES_SRC/bin/CreateOnDiskPt \\
1 1 4 100 2"`; 1 1 4 100 2"`;
`rm $corpus_dir/evaluation/$OUTPUT_FILE_NAME.moses.table.ini`; `rm $corpus_dir/evaluation/$OUTPUT_FILE_NAME.moses.table.ini`;
`$MOSES_SRC/scripts/ems/support/substitute-filtered-tables.perl \ `$MOSES_SRC/scripts/ems/support/substitute-filtered-tables.perl \\
$corpus_dir/evaluation/filtered/moses.ini \ $corpus_dir/evaluation/filtered/moses.ini \\
< $corpus_dir/model/moses.ini \ < $corpus_dir/model/moses.ini \\
> $corpus_dir/evaluation/moses.filtered.ini`; > $corpus_dir/evaluation/moses.filtered.ini`;
my $drop_stderr = $VERBOSE ? "" : " 2>/dev/null"; my $drop_stderr = $VERBOSE ? "" : " 2>/dev/null";
`$DECODER \ `$DECODER \\
-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 \ -search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 \\
-threads 16 -feature-overwrite 'TranslationModel0 table-limit=100' \ -threads 16 -feature-overwrite 'TranslationModel0 table-limit=100' \\
-max-trans-opt-per-coverage 100 \ -max-trans-opt-per-coverage 100 \\
-f $corpus_dir/evaluation/moses.filtered.ini -distortion-limit 0 \ -f $corpus_dir/evaluation/moses.filtered.ini -distortion-limit 0 \\
< $INPUT_FILE \ < $INPUT_FILE \\
> $OUTPUT_FILE $drop_stderr`; > $OUTPUT_FILE $drop_stderr`;
print "$DECODER \ print "$DECODER \\
-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 \ -search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 \\
-threads 16 -feature-overwrite 'TranslationModel0 table-limit=100' \ -threads 16 -feature-overwrite 'TranslationModel0 table-limit=100' \\
-max-trans-opt-per-coverage 100 \ -max-trans-opt-per-coverage 100 \\
-f $corpus_dir/evaluation/moses.filtered.ini -distortion-limit 0 \ -f $corpus_dir/evaluation/moses.filtered.ini -distortion-limit 0 \\
< $INPUT_FILE \ < $INPUT_FILE \\
> $OUTPUT_FILE $drop_stderr\n"; > $OUTPUT_FILE $drop_stderr\n";
} }

View File

@ -103,34 +103,34 @@ sub run_transliteration
print STDERR "Filter Table\n"; print STDERR "Filter Table\n";
`$MOSES_SRC/scripts/training/train-model.perl \ `$MOSES_SRC/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -first-step 9 \ -mgiza -mgiza-cpus 10 -dont-zip -first-step 9 \\
-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \ -external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \ -e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \\
-reordering msd-bidirectional-fe -score-options '--KneserNey' \ -reordering msd-bidirectional-fe -score-options '--KneserNey' \\
-phrase-translation-table $TRANSLIT_MODEL/model/phrase-table \ -phrase-translation-table $TRANSLIT_MODEL/model/phrase-table \\
-reordering-table $TRANSLIT_MODEL/model/reordering-table \ -reordering-table $TRANSLIT_MODEL/model/reordering-table \\
-config $eval_file.moses.table.ini \ -config $eval_file.moses.table.ini \\
-lm 0:3:$eval_file.moses.table.ini:8`; -lm 0:3:$eval_file.moses.table.ini:8`;
`$MOSES_SRC/scripts/training/filter-model-given-input.pl \ `$MOSES_SRC/scripts/training/filter-model-given-input.pl \\
$eval_file.filtered $eval_file.moses.table.ini $eval_file \ $eval_file.filtered $eval_file.moses.table.ini $eval_file \\
-Binarizer "$MOSES_SRC/bin/CreateOnDiskPt 1 1 4 100 2"`; -Binarizer "$MOSES_SRC/bin/CreateOnDiskPt 1 1 4 100 2"`;
`rm $eval_file.moses.table.ini`; `rm $eval_file.moses.table.ini`;
print STDERR "Apply Filter\n"; print STDERR "Apply Filter\n";
`$MOSES_SRC/scripts/ems/support/substitute-filtered-tables-and-weights.perl \ `$MOSES_SRC/scripts/ems/support/substitute-filtered-tables-and-weights.perl \\
$eval_file.filtered/moses.ini $TRANSLIT_MODEL/model/moses.ini \ $eval_file.filtered/moses.ini $TRANSLIT_MODEL/model/moses.ini \\
$TRANSLIT_MODEL/tuning/moses.tuned.ini $eval_file.filtered.ini`; $TRANSLIT_MODEL/tuning/moses.tuned.ini $eval_file.filtered.ini`;
`$MOSES_SRC/bin/moses \ `$MOSES_SRC/bin/moses \\
-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 \ -search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000 \\
-threads 16 -drop-unknown -distortion-limit 0 \ -threads 16 -drop-unknown -distortion-limit 0 \\
-n-best-list $eval_file.op.nBest 50 \ -n-best-list $eval_file.op.nBest 50 \\
-f $eval_file.filtered.ini \ -f $eval_file.filtered.ini \\
< $eval_file \ < $eval_file \\
> $eval_file.op`; > $eval_file.op`;
} }

View File

@ -118,80 +118,80 @@ sub learn_transliteration_model{
print "Align Corpus\n"; print "Align Corpus\n";
`$MOSES_SRC_DIR/scripts/training/train-model.perl \ `$MOSES_SRC_DIR/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -last-step 1 \ -mgiza -mgiza-cpus 10 -dont-zip -last-step 1 \\
-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \ -external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \ -e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \\
-score-options '--KneserNey' -corpus $OUT_DIR/training/corpus$t \ -score-options '--KneserNey' -corpus $OUT_DIR/training/corpus$t \\
-corpus-dir $OUT_DIR/training/prepared`; -corpus-dir $OUT_DIR/training/prepared`;
`$MOSES_SRC_DIR/scripts/training/train-model.perl -mgiza -mgiza-cpus 10 \ `$MOSES_SRC_DIR/scripts/training/train-model.perl -mgiza -mgiza-cpus 10 \\
-dont-zip -first-step 2 -last-step 2 \ -dont-zip -first-step 2 -last-step 2 \\
-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \ -external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \ -e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \\
-score-options '--KneserNey' -corpus-dir $OUT_DIR/training/prepared \ -score-options '--KneserNey' -corpus-dir $OUT_DIR/training/prepared \\
-giza-e2f $OUT_DIR/training/giza -direction 2`; -giza-e2f $OUT_DIR/training/giza -direction 2`;
`$MOSES_SRC_DIR/scripts/training/train-model.perl \ `$MOSES_SRC_DIR/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -first-step 2 -last-step 2 \ -mgiza -mgiza-cpus 10 -dont-zip -first-step 2 -last-step 2 \\
-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \ -external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \ -e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \\
-score-options '--KneserNey' -corpus-dir $OUT_DIR/training/prepared \ -score-options '--KneserNey' -corpus-dir $OUT_DIR/training/prepared \\
-giza-f2e $OUT_DIR/training/giza-inverse -direction 1`; -giza-f2e $OUT_DIR/training/giza-inverse -direction 1`;
`$MOSES_SRC_DIR/scripts/training/train-model.perl \ `$MOSES_SRC_DIR/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -first-step 3 -last-step 3 \ -mgiza -mgiza-cpus 10 -dont-zip -first-step 3 -last-step 3 \\
-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \ -external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \ -e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \\
-score-options '--KneserNey' -giza-e2f $OUT_DIR/training/giza \ -score-options '--KneserNey' -giza-e2f $OUT_DIR/training/giza \\
-giza-f2e $OUT_DIR/training/giza-inverse \ -giza-f2e $OUT_DIR/training/giza-inverse \\
-alignment-file $OUT_DIR/model/aligned \ -alignment-file $OUT_DIR/model/aligned \\
-alignment-stem $OUT_DIR/model/aligned -alignment grow-diag-final-and`; -alignment-stem $OUT_DIR/model/aligned -alignment grow-diag-final-and`;
print "Train Translation Models\n"; print "Train Translation Models\n";
`$MOSES_SRC_DIR/scripts/training/train-model.perl \ `$MOSES_SRC_DIR/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -first-step 4 -last-step 4 \ -mgiza -mgiza-cpus 10 -dont-zip -first-step 4 -last-step 4 \\
-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \ -external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \ -e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \\
-score-options '--KneserNey' -lexical-file $OUT_DIR/model/lex \ -score-options '--KneserNey' -lexical-file $OUT_DIR/model/lex \\
-alignment-file $OUT_DIR/model/aligned \ -alignment-file $OUT_DIR/model/aligned \\
-alignment-stem $OUT_DIR/model/aligned \ -alignment-stem $OUT_DIR/model/aligned \\
-corpus $OUT_DIR/training/corpus$t`; -corpus $OUT_DIR/training/corpus$t`;
`$MOSES_SRC_DIR/scripts/training/train-model.perl \ `$MOSES_SRC_DIR/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -first-step 5 -last-step 5 \ -mgiza -mgiza-cpus 10 -dont-zip -first-step 5 -last-step 5 \\
-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \ -external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \ -e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \\
-score-options '--KneserNey' -alignment-file $OUT_DIR/model/aligned \ -score-options '--KneserNey' -alignment-file $OUT_DIR/model/aligned \\
-alignment-stem $OUT_DIR/model/aligned -extract-file \ -alignment-stem $OUT_DIR/model/aligned -extract-file \\
$OUT_DIR/model/extract -corpus $OUT_DIR/training/corpus$t`; $OUT_DIR/model/extract -corpus $OUT_DIR/training/corpus$t`;
`$MOSES_SRC_DIR/scripts/training/train-model.perl \ `$MOSES_SRC_DIR/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -first-step 6 -last-step 6 \ -mgiza -mgiza-cpus 10 -dont-zip -first-step 6 -last-step 6 \\
-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \ -external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \ -e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \\
-score-options '--KneserNey' -extract-file $OUT_DIR/model/extract \ -score-options '--KneserNey' -extract-file $OUT_DIR/model/extract \\
-lexical-file $OUT_DIR/model/lex -phrase-translation-table \ -lexical-file $OUT_DIR/model/lex -phrase-translation-table \\
$OUT_DIR/model/phrase-table`; $OUT_DIR/model/phrase-table`;
print "Train Language Models\n"; print "Train Language Models\n";
`$SRILM_DIR/ngram-count \ `$SRILM_DIR/ngram-count \\
-order 5 -interpolate -kndiscount -addsmooth1 0.0 -unk \ -order 5 -interpolate -kndiscount -addsmooth1 0.0 -unk \\
-text $OUT_DIR/lm/target -lm $OUT_DIR/lm/targetLM`; -text $OUT_DIR/lm/target -lm $OUT_DIR/lm/targetLM`;
`$MOSES_SRC_DIR/bin/build_binary \ `$MOSES_SRC_DIR/bin/build_binary \\
$OUT_DIR/lm/targetLM $OUT_DIR/lm/targetLM.bin`; $OUT_DIR/lm/targetLM $OUT_DIR/lm/targetLM.bin`;
print "Create Config File\n"; print "Create Config File\n";
`$MOSES_SRC_DIR/scripts/training/train-model.perl \ `$MOSES_SRC_DIR/scripts/training/train-model.perl \\
-mgiza -mgiza-cpus 10 -dont-zip -first-step 9 \ -mgiza -mgiza-cpus 10 -dont-zip -first-step 9 \\
-external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \ -external-bin-dir $EXTERNAL_BIN_DIR -f $INPUT_EXTENSION \\
-e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \ -e $OUTPUT_EXTENSION -alignment grow-diag-final-and -parts 5 \\
-score-options '--KneserNey' \ -score-options '--KneserNey' \\
-phrase-translation-table $OUT_DIR/model/phrase-table \ -phrase-translation-table $OUT_DIR/model/phrase-table \\
-config $OUT_DIR/model/moses.ini -lm 0:5:$OUT_DIR/lm/targetLM.bin:8`; -config $OUT_DIR/model/moses.ini -lm 0:5:$OUT_DIR/lm/targetLM.bin:8`;
} }