Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Hieu Hoang 2014-05-25 10:06:51 +01:00
commit cfe22c29bc
3 changed files with 27 additions and 19 deletions

View File

@ -20,7 +20,7 @@ clean
out: clean-stem
default-name: corpus/clean
rerun-on-change: max-sentence-length $moses-script-dir/training/clean-corpus-n.perl
template: $moses-script-dir/training/clean-corpus-n.perl IN $input-extension $output-extension OUT 1 $max-sentence-length $working-dir/corpus/clean.lines-retained.VERSION
template: $moses-script-dir/training/clean-corpus-n.perl IN $input-extension $output-extension OUT 1 $max-sentence-length OUT.lines-retained
error: there is a blank factor
parse
in: clean-stem
@ -35,7 +35,7 @@ post-parse-clean
out: clean-parsed-stem
default-name: corpus/parsed-clean
pass-unless: input-parser output-parser
template: $moses-script-dir/training/clean-corpus-n.perl IN $input-extension $output-extension OUT 1 10000 $working-dir/corpus/parsed-clean.lines-retained.VERSION --ignore-xml
template: $moses-script-dir/training/clean-corpus-n.perl IN $input-extension $output-extension OUT 1 10000 OUT.lines-retained --ignore-xml
error: there is a blank factor
factorize
in: clean-parsed-stem
@ -83,7 +83,7 @@ post-split-clean
default-name: corpus/split-clean
ignore-if: input-parser output-parser
pass-unless: input-splitter output-splitter
template: $moses-script-dir/training/clean-corpus-n.perl IN $input-extension $output-extension OUT 1 $max-sentence-length $working-dir/corpus/split-clean.lines-retained.VERSION
template: $moses-script-dir/training/clean-corpus-n.perl IN $input-extension $output-extension OUT 1 $max-sentence-length OUT.lines-retained
error: there is a blank factor
post-split-clean-syntax
in: split-stem
@ -91,7 +91,7 @@ post-split-clean-syntax
default-name: corpus/split-clean
ignore-unless: input-parser output-parser
pass-unless: input-splitter output-splitter
template: $moses-script-dir/training/clean-corpus-n.perl IN $input-extension $output-extension OUT 1 10000 $working-dir/corpus/parsed-clean.lines-retained.VERSION --ignore-xml
template: $moses-script-dir/training/clean-corpus-n.perl IN $input-extension $output-extension OUT 1 10000 OUT.lines-retained --ignore-xml
error: there is a blank factor
[RECASING] single
@ -104,8 +104,9 @@ tokenize
train
in: tokenized
out: recase-config
template: $moses-script-dir/recaser/train-recaser.perl -train-script $TRAINING:script -dir $working-dir/recasing/model.VERSION -corpus IN -scripts-root-dir $moses-script-dir -config OUT -ngram-count $lm-training
template: $moses-script-dir/recaser/train-recaser.perl -train-script $TRAINING:script -dir OUT.model -corpus IN -scripts-root-dir $moses-script-dir -config OUT -ngram-count $lm-training
default-name: recasing/moses.ini
tmp-name: recasing/model
ignore-unless: EVALUATION:recaser
error: cannot execute binary file

View File

@ -97,7 +97,7 @@ $VERSION = $DELETE_CRASHED if $DELETE_CRASHED;
$VERSION = $DELETE_VERSION if $DELETE_VERSION;
&compute_version_number() if $EXECUTE && !$CONTINUE && !$DELETE_CRASHED && !$DELETE_VERSION;
`mkdir -p steps/$VERSION`;
`mkdir -p steps/$VERSION` unless -d "steps/$VERSION";
&log_config() unless $DELETE_CRASHED || $DELETE_VERSION;
print "running experimenal run number $VERSION\n";
@ -714,7 +714,7 @@ sub delete_crashed {
for(my $i=0;$i<=$#DO_STEP;$i++) {
my $step_file = &versionize(&step_file($i),$DELETE_CRASHED);
next unless -e $step_file;
next unless &check_if_crashed($i,$DELETE_CRASHED);
next unless &check_if_crashed($i,$DELETE_CRASHED,"no wait");
&delete_step($DO_STEP[$i],$DELETE_CRASHED);
$crashed++;
}
@ -774,6 +774,8 @@ sub delete_version {
&delete_step($step,$version);
}
}
my $deleted_flag_file = &steps_file("deleted.$DELETE_VERSION",$DELETE_VERSION);
`touch $deleted_flag_file` if $EXECUTE;
}
sub get_step_from_step_file {
@ -802,23 +804,26 @@ sub delete_step {
}
}
# delete output files that match a given prefix
sub delete_output {
my ($file) = @_;
# delete directory that matches exactly
if (-d $file) {
print "\tdelete directory $file\n";
`rm -r $file` if $EXECUTE;
return;
}
elsif (-e $file) {
# delete regular file that matches exactly
if (-e $file) {
print "\tdelete file $file\n";
`rm $file` if $EXECUTE;
}
else {
my @FILES = `ls $file.* 2>/dev/null`;
foreach (@FILES) {
chop;
print "\tdelete file $_\n";
`rm $_` if $EXECUTE;
}
# delete files that have additional extension
my @FILES = `ls $file.* 2>/dev/null`;
foreach (@FILES) {
chop;
print "\tdelete file $_\n";
`rm $_` if $EXECUTE;
}
}
@ -1470,12 +1475,12 @@ sub get_parameters_relevant_for_re_use {
}
sub check_if_crashed {
my ($i,$version) = @_;
my ($i,$version,$no_wait) = @_;
$version = $VERSION unless $version; # default: current version
my $file = &versionize(&step_file($i),$version).".STDERR";
# while running, sometimes the STDERR file is slow in appearing - wait a bit just in case
if ($version == $VERSION) {
if ($version == $VERSION && !$no_wait) {
my $j = 0;
while (! -e $file && $j < 100) {
sleep(5);
@ -1483,7 +1488,7 @@ sub check_if_crashed {
}
}
#print "checking if $DO_STEP[$i]($version) crashed...\n";
#print "checking if $DO_STEP[$i]($version) crashed -> $file...\n";
return 1 if ! -e $file;
# check digest file (if it exists)

View File

@ -21,7 +21,9 @@ function load_experiment_info() {
file_exists($dir."/steps/1")) {
$topd = dir($dir."/steps");
while (false !== ($run = $topd->read())) {
if (preg_match('/^([0-9]+)$/',$run,$match) && $run>0) {
if (preg_match('/^([0-9]+)$/',$run,$match)
&& $run>0
&& !file_exists("$dir/steps/$run/deleted.$run")) {
$d = dir($dir."/steps/$run");
while (false !== ($entry = $d->read())) {
process_file_entry("$dir/steps/$run/",$entry);