This commit is contained in:
Nadir Durrani 2013-07-04 19:58:19 +01:00
parent 389b7762e8
commit d2bc6a2584
5 changed files with 39 additions and 28 deletions

Binary file not shown.

View File

@ -1,35 +1,34 @@
#!/bin/sh
PATH=$PATH:/fs/hel1/nadir/SRILM/bin/i686-m64/
echo 'Training OSM - Start'
date
mkdir $5
ln -s $1 $5/e
ln -s $2 $5/f
$6/scripts/OSM/flipAlignment $3 > $5/align
echo 'Extracting Singletons'
$6/scripts/OSM/extract-singletons.perl $5/e $5/f $5/align > $5/Singletons
echo 'Converting Bilingual Sentence Pair into Operation Corpus'
$6/scripts/OSM/generateSequences $5/e $5/f $5/align $5/Singletons > $5/opCorpus # Generates Operation Corpus
echo 'Learning Operation Sequence Translation Model'
ngram-count -kndiscount -order $4 -unk -text $5/opCorpus -lm $5/operationLM
echo 'Binarizing'
$6/bin/build_binary $5/operationLM$4 $5/operationLM.bin
\rm $5/e
\rm $5/f
\rm $5/align
ln -s $1 $5/e
ln -s $2 $5/f
./flipAlignment $3 > $5/align
echo 'Extracting Singletons'
./extract-singletons.perl $5/e $5/f $5/align > $5/Singletons
echo 'Converting Bilingual Sentence Pair into Operation Corpus'
./generateSequences $5/e $5/f $5/align $5/Singletons > $5/opCorpus # Generates Operation Corpus
echo 'Learning Operation Sequence Translation Model'
ngram-count -kndiscount -order $4 -unk -text $5/opCorpus -lm $5/operationLM$4
echo 'Binarizing'
../../bin/build_binary $5/operationLM$4 $5/operationLM$4.bin
echo 'Training OSM - End'
date

View File

@ -516,8 +516,8 @@ build-osm
out: osm-model
ignore-unless: operation-sequence-model
rerun-on-change: operation-sequence-model training-options script giza-settings
template: $moses-script-dir/OSM/OSM-Train IN0.$output-extension IN0.$input-extension IN1.$alignment-symmetrization-method $operation-sequence-model-order OUT $moses-src-dir
default-name: model/OSM/
template: $moses-script-dir/OSM/OSM-Train.sh IN0.$output-extension IN0.$input-extension IN1.$alignment-symmetrization-method $operation-sequence-model-order OUT $moses-src-dir
default-name: model/OSM
extract-phrases
in: corpus-mml-postfilter=OR=word-alignment scored-corpus
out: extracted-phrases
@ -586,7 +586,7 @@ build-sparse
default-name: model/sparse-features
template: $moses-script-dir/ems/support/build-sparse-features.perl IN $input-extension $output-extension OUT "$sparse-features"
create-config
in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains INTERPOLATED-LM:binlm LM:binlm osm-model
in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm
out: config
ignore-if: use-hiero
rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini

View File

@ -2164,11 +2164,13 @@ sub get_config_tables {
sub define_training_create_config {
my ($step_id) = @_;
my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,@LM)
my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,$osm, @LM)
= &get_output_and_input($step_id);
my $cmd = &get_config_tables($config,$reordering_table,$phrase_translation_table,$generation_table,$domains);
$cmd .= "-osm-model $osm/operationLM.bin " if $osm;
# sparse lexical features provide additional content for config file
$cmd .= "-additional-ini-file $sparse_lexical_features.ini " if $sparse_lexical_features;

View File

@ -31,7 +31,7 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_
$_DECODING_GRAPH_BACKOFF,
$_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE,
@_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS,
$_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG,
$_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM,
$_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,
$_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2,
$_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
@ -119,6 +119,7 @@ $_HELP = 1
'xml' => \$_XML,
'no-word-alignment' => \$_OMIT_WORD_ALIGNMENT,
'config=s' => \$_CONFIG,
'osm-model=s' => \$_OSM,
'max-lexical-reordering' => \$_MAX_LEXICAL_REORDERING,
'do-steps=s' => \$_DO_STEPS,
'memscore:s' => \$_MEMSCORE,
@ -1992,6 +1993,15 @@ sub create_ini {
}
}
# operation sequence model
if($_OSM)
{
$feature_spec .= "OpSequenceModel num-features=5 path=". $_OSM . " \n";
$weight_spec .= "OpSequenceModel0= 0.08 -0.02 0.02 -0.001 0.03\n";
}
# distance-based reordering
if (!$_HIERARCHICAL) {
$feature_spec .= "Distortion\n";