diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl index bf2858e43..5e9324048 100755 --- a/scripts/training/train-model.perl +++ b/scripts/training/train-model.perl @@ -39,7 +39,7 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_ $_CONTINUE,$_MAX_LEXICAL_REORDERING,$_DO_STEPS, @_ADDITIONAL_INI,$_ADDITIONAL_INI_FILE, $_SPARSE_TRANSLATION_TABLE, @_BASELINE_ALIGNMENT_MODEL, $_BASELINE_EXTRACT, $_BASELINE_CORPUS, $_BASELINE_ALIGNMENT, - $_DICTIONARY, $_SPARSE_PHRASE_FEATURES, $_EPPEX, $IGNORE); + $_DICTIONARY, $_SPARSE_PHRASE_FEATURES, $_EPPEX, $_INSTANCE_WEIGHTS_FILE, $IGNORE); my $_CORES = 1; my $debug = 0; # debug this script, do not delete any files in debug mode @@ -132,7 +132,8 @@ $_HELP = 1 'baseline-extract=s' => \$_BASELINE_EXTRACT, 'baseline-corpus=s' => \$_BASELINE_CORPUS, 'baseline-alignment=s' => \$_BASELINE_ALIGNMENT, - 'cores=i' => \$_CORES + 'cores=i' => \$_CORES, + 'instance-weights-file=s' => \$_INSTANCE_WEIGHTS_FILE ); if ($_HELP) { @@ -1253,7 +1254,8 @@ sub get_lexical_factored { $___LEXICAL_COUNTS, $_BASELINE_CORPUS.".".$___F, $_BASELINE_CORPUS.".".$___E, - $_BASELINE_ALIGNMENT); + $_BASELINE_ALIGNMENT, + $_INSTANCE_WEIGHTS_FILE); } else { foreach my $factor (split(/\+/,$___TRANSLATION_FACTORS)) { @@ -1274,7 +1276,8 @@ sub get_lexical_factored { $___LEXICAL_COUNTS, $_BASELINE_CORPUS.".".$factor_f.".".$___F, $_BASELINE_CORPUS.".".$factor_e.".".$___E, - $_BASELINE_ALIGNMENT); + $_BASELINE_ALIGNMENT, + $_INSTANCE_WEIGHTS_FILE); } } } @@ -1422,6 +1425,7 @@ sub extract_phrase { } $cmd .= " --GZOutput "; + $cmd .= " --InstanceWeights $_INSTANCE_WEIGHTS_FILE " if defined $_INSTANCE_WEIGHTS_FILE; $cmd .= " --BaselineExtract $_BASELINE_EXTRACT" if defined($_BASELINE_EXTRACT) && $PHRASE_EXTRACT =~ /extract-parallel.perl/; map { die "File not found: $_" if ! -e $_ } ($alignment_file_e, $alignment_file_f, $alignment_file_a);