mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-08 04:27:53 +03:00
2ed6804f12
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3224 1f5c12ca-751b-0410-a591-d2e778427230
53 lines
1.5 KiB
Bash
Executable File
53 lines
1.5 KiB
Bash
Executable File
#!/bin/sh
|
|
|
|
if [ $# -lt 8 ]
|
|
then
|
|
echo "Usage: $0 <\"java options\"> <berkeleyaligner jar file> <input file stem> <previous berkeley param dir> <output directory> <source lang> <target lang> <alignment name (i.e. 'berk' or 'low-posterior')> <posterior threshold> [aligner options...]"
|
|
exit 1
|
|
fi
|
|
|
|
JAVA_OPTS=$1
|
|
JAR=$2
|
|
INFILE=$3
|
|
PARAMDIR=$4
|
|
OUTNAME=$5
|
|
SLANG=$6
|
|
TLANG=$7
|
|
TAG=$8
|
|
POSTERIOR=$9
|
|
shift
|
|
shift
|
|
shift
|
|
shift
|
|
shift
|
|
shift
|
|
shift
|
|
shift
|
|
shift
|
|
|
|
JAVA_CMD="/usr/local/share/java/bin/java $JAVA_OPTS -jar $JAR -Data.trainSources $INFILE.list -Main.loadParamsDir $PARAMDIR -exec.execDir $OUTNAME -Main.loadLexicalModelOnly false -Data.englishSuffix $SLANG -Data.foreignSuffix $TLANG -exec.create true -Main.saveParams false -Main.alignTraining true -Main.forwardModels HMM -Main.reverseModels HMM -Main.mode JOINT -Main.iters 0 -Data.testSources -EMWordAligner.posteriorDecodingThreshold $POSTERIOR $@"
|
|
echo "Running $JAVA_CMD"
|
|
$JAVA_CMD
|
|
|
|
#clean up
|
|
rm $OUTNAME/training.*Input.txt
|
|
rm $OUTNAME/training.*Trees.txt
|
|
gzip $OUTNAME/training.$SLANG-$TLANG.A3
|
|
gzip $OUTNAME/training.$TLANG-$SLANG.A3
|
|
|
|
#now shift the output
|
|
perl -e "
|
|
use strict;
|
|
while (<STDIN>) {
|
|
chomp();
|
|
my @pairs = split(\" \");
|
|
for (my \$i=0;\$i<scalar(@pairs);\$i++) {
|
|
die (\"bad pair \$pairs[\$i]\n\") unless \$pairs[\$i] =~ /(\d+)\-(\d+)/;
|
|
\$pairs[\$i] = (\$2).\"-\".(\$1);
|
|
}
|
|
print join(\" \",@pairs);
|
|
print(\"\n\");
|
|
};" < $OUTNAME/training.$SLANG-$TLANG.align > $OUTNAME.$TAG
|
|
|
|
gzip $OUTNAME/training.$SLANG-$TLANG.align
|