mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-28 14:32:38 +03:00
script to copy model files to local disk before running the decoder - useful for grid
This commit is contained in:
parent
836ca8212a
commit
a5ee3c1b6d
115
scripts/ems/support/cache-model.perl
Executable file
115
scripts/ems/support/cache-model.perl
Executable file
@ -0,0 +1,115 @@
|
||||
#!/usr/bin/perl -w
|
||||
#
|
||||
# This file is part of moses. Its use is licensed under the GNU Lesser General
|
||||
# Public License version 2.1 or, at your option, any later version.
|
||||
|
||||
# utility script for deploying decode (may be within tune) jobs over a cluster
|
||||
# with NFS-mounted drives. copy all the model files to local disk.
|
||||
|
||||
use strict;
|
||||
|
||||
die("ERROR: syntax is cache-model.perl moses.ini cache-dir")
|
||||
unless scalar @ARGV == 2;
|
||||
my ($CONFIG,$CACHE_DIR) = @ARGV;
|
||||
|
||||
# create dir (if nor already there)
|
||||
`mkdir -p $CACHE_DIR`;
|
||||
|
||||
# name for new config file
|
||||
my $cached_config = $CONFIG;
|
||||
$cached_config =~ s/\//_/g;
|
||||
$cached_config = "$CACHE_DIR/$cached_config";
|
||||
|
||||
# lock / already
|
||||
while(-e "$cached_config.lock") {
|
||||
sleep(10);
|
||||
}
|
||||
my $just_update_timestamps = (-e $cached_config);
|
||||
`touch $cached_config.lock` unless $just_update_timestamps;
|
||||
|
||||
# find files to cache (and produce new config)
|
||||
open(OLD,$CONFIG) || die("ERROR: could not open config '$CONFIG'");
|
||||
open(NEW,">$cached_config") unless $just_update_timestamps;
|
||||
while(<OLD>) {
|
||||
if (/(PhraseDictionary.+ path=)(\S+)(.*)$/ ||
|
||||
/(LexicalReordering.+ path=)(\S+)(.*)$/ ||
|
||||
/(Generation.+ path=)(\S+)(.*)$/ ||
|
||||
/(OpSequenceModel.+ path=)(\S+)(.*)$/ ||
|
||||
/(KENLM.+ path=)(\S+)(.*)$/) {
|
||||
my ($pre,$path,$post) = ($1,$2,$3);
|
||||
my $new_path;
|
||||
if (/^PhraseDictionaryCompact/) {
|
||||
$new_path = &cache_file($path,".minphr");
|
||||
}
|
||||
elsif (/^PhraseDictionaryBinary/) {
|
||||
foreach my $suffix (".binphr.idx",".binphr.srctree.wa",".binphr.srcvoc",".binphr.tgtdata.wa",".binphr.tgtvoc") {
|
||||
$new_path = &cache_file($path,$suffix);
|
||||
}
|
||||
}
|
||||
elsif (/^LexicalReordering/ && -e "$path.minlexr") {
|
||||
$new_path = &cache_file($path,".minlexr");
|
||||
}
|
||||
elsif (/^LexicalReordering/ && -e "$path.binlexr.idx") {
|
||||
foreach my $suffix (".binlexr.idx",".binlexr.srctree",".binlexr.tgtdata",".binlexr.voc0",".binlexr.voc1") {
|
||||
$new_path = &cache_file($path,$suffix);
|
||||
}
|
||||
}
|
||||
# some other files may need some more special handling
|
||||
# but this works for me right now. feel free to add
|
||||
else {
|
||||
$new_path = &cache_file($path,"");
|
||||
}
|
||||
print NEW "$pre$new_path$post\n" unless $just_update_timestamps;
|
||||
}
|
||||
else {
|
||||
print NEW $_ unless $just_update_timestamps;
|
||||
}
|
||||
}
|
||||
close(NEW) unless $just_update_timestamps;
|
||||
close(OLD);
|
||||
|
||||
`rm $cached_config.lock` unless $just_update_timestamps;
|
||||
print "$cached_config\n";
|
||||
|
||||
sub cache_file {
|
||||
my ($path,$suffix) = @_;
|
||||
|
||||
# add gzipped extension if that's what it is
|
||||
if (! -e "$path$suffix" && -e "$path$suffix.gz") {
|
||||
$suffix .= ".gz";
|
||||
}
|
||||
|
||||
# file does not exist... nothing to do
|
||||
if (! -e "$path$suffix") {
|
||||
print STDERR "WARINING: $path$suffix does not exist - cannot be cached by cache-model.perl\n";
|
||||
return $path;
|
||||
}
|
||||
|
||||
# follow symbolic link
|
||||
my $uniq_path = `readlink -f $path$suffix`;
|
||||
chop($uniq_path);
|
||||
|
||||
# create cached file name
|
||||
my $cached_path = $uniq_path;
|
||||
$cached_path = substr($cached_path,0,length($cached_path)-length($suffix));
|
||||
$cached_path =~ s/\//_/g;
|
||||
$cached_path = "$CACHE_DIR/$cached_path";
|
||||
|
||||
# sleep if another process is copying right now...
|
||||
while(-e "$cached_path$suffix.lock") {
|
||||
sleep(10);
|
||||
}
|
||||
# done if already there
|
||||
if (-e "$cached_path$suffix") {
|
||||
`touch $cached_path$suffix`; # update time stamp
|
||||
return $cached_path;
|
||||
}
|
||||
|
||||
# okay, go for it
|
||||
`touch $cached_path$suffix.lock`;
|
||||
`cp $path$suffix $cached_path$suffix`;
|
||||
`rm $cached_path$suffix.lock`;
|
||||
|
||||
return $cached_path;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user