mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 21:42:19 +03:00
In EMS
This commit is contained in:
parent
389b7762e8
commit
d2bc6a2584
Binary file not shown.
@ -1,35 +1,34 @@
|
||||
#!/bin/sh
|
||||
|
||||
PATH=$PATH:/fs/hel1/nadir/SRILM/bin/i686-m64/
|
||||
|
||||
echo 'Training OSM - Start'
|
||||
date
|
||||
|
||||
mkdir $5
|
||||
ln -s $1 $5/e
|
||||
ln -s $2 $5/f
|
||||
|
||||
$6/scripts/OSM/flipAlignment $3 > $5/align
|
||||
|
||||
echo 'Extracting Singletons'
|
||||
|
||||
$6/scripts/OSM/extract-singletons.perl $5/e $5/f $5/align > $5/Singletons
|
||||
|
||||
echo 'Converting Bilingual Sentence Pair into Operation Corpus'
|
||||
|
||||
$6/scripts/OSM/generateSequences $5/e $5/f $5/align $5/Singletons > $5/opCorpus # Generates Operation Corpus
|
||||
|
||||
echo 'Learning Operation Sequence Translation Model'
|
||||
|
||||
ngram-count -kndiscount -order $4 -unk -text $5/opCorpus -lm $5/operationLM
|
||||
|
||||
echo 'Binarizing'
|
||||
|
||||
$6/bin/build_binary $5/operationLM$4 $5/operationLM.bin
|
||||
|
||||
\rm $5/e
|
||||
\rm $5/f
|
||||
\rm $5/align
|
||||
|
||||
ln -s $1 $5/e
|
||||
ln -s $2 $5/f
|
||||
|
||||
./flipAlignment $3 > $5/align
|
||||
|
||||
echo 'Extracting Singletons'
|
||||
|
||||
./extract-singletons.perl $5/e $5/f $5/align > $5/Singletons
|
||||
|
||||
echo 'Converting Bilingual Sentence Pair into Operation Corpus'
|
||||
|
||||
./generateSequences $5/e $5/f $5/align $5/Singletons > $5/opCorpus # Generates Operation Corpus
|
||||
|
||||
echo 'Learning Operation Sequence Translation Model'
|
||||
|
||||
ngram-count -kndiscount -order $4 -unk -text $5/opCorpus -lm $5/operationLM$4
|
||||
|
||||
echo 'Binarizing'
|
||||
|
||||
../../bin/build_binary $5/operationLM$4 $5/operationLM$4.bin
|
||||
|
||||
echo 'Training OSM - End'
|
||||
date
|
||||
|
||||
|
@ -516,8 +516,8 @@ build-osm
|
||||
out: osm-model
|
||||
ignore-unless: operation-sequence-model
|
||||
rerun-on-change: operation-sequence-model training-options script giza-settings
|
||||
template: $moses-script-dir/OSM/OSM-Train IN0.$output-extension IN0.$input-extension IN1.$alignment-symmetrization-method $operation-sequence-model-order OUT $moses-src-dir
|
||||
default-name: model/OSM/
|
||||
template: $moses-script-dir/OSM/OSM-Train.sh IN0.$output-extension IN0.$input-extension IN1.$alignment-symmetrization-method $operation-sequence-model-order OUT $moses-src-dir
|
||||
default-name: model/OSM
|
||||
extract-phrases
|
||||
in: corpus-mml-postfilter=OR=word-alignment scored-corpus
|
||||
out: extracted-phrases
|
||||
@ -586,7 +586,7 @@ build-sparse
|
||||
default-name: model/sparse-features
|
||||
template: $moses-script-dir/ems/support/build-sparse-features.perl IN $input-extension $output-extension OUT "$sparse-features"
|
||||
create-config
|
||||
in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains INTERPOLATED-LM:binlm LM:binlm osm-model
|
||||
in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm
|
||||
out: config
|
||||
ignore-if: use-hiero
|
||||
rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini
|
||||
|
@ -2164,11 +2164,13 @@ sub get_config_tables {
|
||||
sub define_training_create_config {
|
||||
my ($step_id) = @_;
|
||||
|
||||
my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,@LM)
|
||||
my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,$osm, @LM)
|
||||
= &get_output_and_input($step_id);
|
||||
|
||||
my $cmd = &get_config_tables($config,$reordering_table,$phrase_translation_table,$generation_table,$domains);
|
||||
|
||||
$cmd .= "-osm-model $osm/operationLM.bin " if $osm;
|
||||
|
||||
# sparse lexical features provide additional content for config file
|
||||
$cmd .= "-additional-ini-file $sparse_lexical_features.ini " if $sparse_lexical_features;
|
||||
|
||||
|
@ -31,7 +31,7 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_
|
||||
$_DECODING_GRAPH_BACKOFF,
|
||||
$_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE,
|
||||
@_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS,
|
||||
$_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG,
|
||||
$_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM,
|
||||
$_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,
|
||||
$_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2,
|
||||
$_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
|
||||
@ -119,6 +119,7 @@ $_HELP = 1
|
||||
'xml' => \$_XML,
|
||||
'no-word-alignment' => \$_OMIT_WORD_ALIGNMENT,
|
||||
'config=s' => \$_CONFIG,
|
||||
'osm-model=s' => \$_OSM,
|
||||
'max-lexical-reordering' => \$_MAX_LEXICAL_REORDERING,
|
||||
'do-steps=s' => \$_DO_STEPS,
|
||||
'memscore:s' => \$_MEMSCORE,
|
||||
@ -1992,6 +1993,15 @@ sub create_ini {
|
||||
}
|
||||
}
|
||||
|
||||
# operation sequence model
|
||||
|
||||
if($_OSM)
|
||||
{
|
||||
|
||||
$feature_spec .= "OpSequenceModel num-features=5 path=". $_OSM . " \n";
|
||||
$weight_spec .= "OpSequenceModel0= 0.08 -0.02 0.02 -0.001 0.03\n";
|
||||
}
|
||||
|
||||
# distance-based reordering
|
||||
if (!$_HIERARCHICAL) {
|
||||
$feature_spec .= "Distortion\n";
|
||||
|
Loading…
Reference in New Issue
Block a user