Patching up the suffix array wrappers

This commit is contained in:
Wilker Aziz 2013-06-24 15:38:10 +01:00
parent b49e6a162f
commit 2c19238c24
4 changed files with 14 additions and 8 deletions

View File

@ -2552,6 +2552,8 @@ sub define_tuningevaluation_filter {
# get model, and whether suffix array is used. Determines the pt implementation. # get model, and whether suffix array is used. Determines the pt implementation.
my $sa_exec_dir = &get("TRAINING:suffix-array"); my $sa_exec_dir = &get("TRAINING:suffix-array");
my $sa_extractors = &get("GENERAL:sa_extractors");
$sa_extractors = 1 unless $sa_extractors;
my ($ptImpl, $numFF); my ($ptImpl, $numFF);
if ($hierarchical) { if ($hierarchical) {
@ -2589,11 +2591,14 @@ sub define_tuningevaluation_filter {
# filter command # filter command
if ($sa_exec_dir) { if ($sa_exec_dir) {
# suffix array # suffix array
$cmd .= "$scripts/training/wrappers/adam-suffix-array/suffix-array-extract.sh $sa_exec_dir $phrase_translation_table $input_filter $filter_dir \n"; $cmd .= "$scripts/training/wrappers/adam-suffix-array/suffix-array-extract.sh $sa_exec_dir $phrase_translation_table $input_filter $filter_dir $sa_extractors \n";
my $escaped_filter_dir = $filter_dir; my $escaped_filter_dir = $filter_dir;
$escaped_filter_dir =~ s/\//\\\\\//g; $escaped_filter_dir =~ s/\//\\\\\//g;
$cmd .= "cat $config | sed s/10\\ 0\\ 0\\ 7.*/10\\ 0\\ 0\\ 7\\ $escaped_filter_dir/g > $filter_dir/moses.ini \n"; $cmd .= "cat $config | sed s/10\\ 0\\ 0\\ 7.*/10\\ 0\\ 0\\ 7\\ $escaped_filter_dir/g > $filter_dir/moses.ini \n";
# kind of a hack -- the correct thing would be to make the generation of the config file ($filter_dir/moses.ini)
# set the PhraseDictionaryALSuffixArray's path to the filtered directory rather than to the suffix array itself
$cmd .= "sed -i 's%path=$phrase_translation_table%path=$filter_dir%' $filter_dir/moses.ini\n";
} }
else { else {
# normal phrase table # normal phrase table

View File

@ -1919,6 +1919,7 @@ sub create_ini {
$phrase_table_impl_name = "PhraseDictionaryBinary" if $phrase_table_impl==1; $phrase_table_impl_name = "PhraseDictionaryBinary" if $phrase_table_impl==1;
$phrase_table_impl_name = "PhraseDictionaryOnDisk" if $phrase_table_impl==2; $phrase_table_impl_name = "PhraseDictionaryOnDisk" if $phrase_table_impl==2;
$phrase_table_impl_name = "PhraseDictionaryMemory" if $phrase_table_impl==6; $phrase_table_impl_name = "PhraseDictionaryMemory" if $phrase_table_impl==6;
$phrase_table_impl_name = "PhraseDictionaryALSuffixArray" if $phrase_table_impl==10;
#table limit #table limit
my $table_limit = 0; my $table_limit = 0;

View File

@ -24,10 +24,10 @@ rm -rf $SA_OUTPUT/bitext
pushd . pushd .
cd $SA_EXEC_DIR cd $SA_EXEC_DIR
./sa-compile.pl -output $SA_OUTPUT -b bitext_name=$SOURCE_CORPUS,$TARGET_CORPUS -a alignment_name=$ALIGNMENT > $SA_OUTPUT/extract.ini python $SA_EXEC_DIR/cdec/sa/compile.py -o $SA_OUTPUT -f $SOURCE_CORPUS -e $TARGET_CORPUS -a $ALIGNMENT -c $SA_OUTPUT/extract.ini
popd popd
echo "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0\n" > $GLUE_GRAMMAR echo "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0" > $GLUE_GRAMMAR
echo "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0\n" >> $GLUE_GRAMMAR echo "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0" >> $GLUE_GRAMMAR
echo "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0\n" >> $GLUE_GRAMMAR echo "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" >> $GLUE_GRAMMAR

View File

@ -12,14 +12,14 @@ SA_EXEC_DIR=$1
MODEL_DIR=$2 MODEL_DIR=$2
INPUT_FILE=$3 INPUT_FILE=$3
OUTPUT_DIR=$4 OUTPUT_DIR=$4
JOBS=$5
mkdir $OUTPUT_DIR mkdir $OUTPUT_DIR
pushd . pushd .
cd $OUTPUT_DIR cd $OUTPUT_DIR
cat $INPUT_FILE | $SA_EXEC_DIR/escape-testset.pl | $SA_EXEC_DIR/extractor.py -c $MODEL_DIR/extract.ini cat $INPUT_FILE | python $SA_EXEC_DIR/cdec/sa/extract.py -c $MODEL_DIR/extract.ini -g $OUTPUT_DIR -j $JOBS -z > $OUTPUT_DIR/input.sgm
gzip $OUTPUT_DIR/grammar.out.*
popd popd