Patching up the suffix array wrappers

This commit is contained in:
Wilker Aziz 2013-06-24 15:38:10 +01:00
parent b49e6a162f
commit 2c19238c24
4 changed files with 14 additions and 8 deletions

View File

@ -2552,6 +2552,8 @@ sub define_tuningevaluation_filter {
# get model, and whether suffix array is used. Determines the pt implementation.
my $sa_exec_dir = &get("TRAINING:suffix-array");
my $sa_extractors = &get("GENERAL:sa_extractors");
$sa_extractors = 1 unless $sa_extractors;
my ($ptImpl, $numFF);
if ($hierarchical) {
@ -2589,11 +2591,14 @@ sub define_tuningevaluation_filter {
# filter command
if ($sa_exec_dir) {
# suffix array
$cmd .= "$scripts/training/wrappers/adam-suffix-array/suffix-array-extract.sh $sa_exec_dir $phrase_translation_table $input_filter $filter_dir \n";
$cmd .= "$scripts/training/wrappers/adam-suffix-array/suffix-array-extract.sh $sa_exec_dir $phrase_translation_table $input_filter $filter_dir $sa_extractors \n";
my $escaped_filter_dir = $filter_dir;
$escaped_filter_dir =~ s/\//\\\\\//g;
$cmd .= "cat $config | sed s/10\\ 0\\ 0\\ 7.*/10\\ 0\\ 0\\ 7\\ $escaped_filter_dir/g > $filter_dir/moses.ini \n";
# kind of a hack -- the correct thing would be to make the generation of the config file ($filter_dir/moses.ini)
# set the PhraseDictionaryALSuffixArray's path to the filtered directory rather than to the suffix array itself
$cmd .= "sed -i 's%path=$phrase_translation_table%path=$filter_dir%' $filter_dir/moses.ini\n";
}
else {
# normal phrase table

View File

@ -1919,6 +1919,7 @@ sub create_ini {
$phrase_table_impl_name = "PhraseDictionaryBinary" if $phrase_table_impl==1;
$phrase_table_impl_name = "PhraseDictionaryOnDisk" if $phrase_table_impl==2;
$phrase_table_impl_name = "PhraseDictionaryMemory" if $phrase_table_impl==6;
$phrase_table_impl_name = "PhraseDictionaryALSuffixArray" if $phrase_table_impl==10;
#table limit
my $table_limit = 0;

View File

@ -24,10 +24,10 @@ rm -rf $SA_OUTPUT/bitext
pushd .
cd $SA_EXEC_DIR
./sa-compile.pl -output $SA_OUTPUT -b bitext_name=$SOURCE_CORPUS,$TARGET_CORPUS -a alignment_name=$ALIGNMENT > $SA_OUTPUT/extract.ini
python $SA_EXEC_DIR/cdec/sa/compile.py -o $SA_OUTPUT -f $SOURCE_CORPUS -e $TARGET_CORPUS -a $ALIGNMENT -c $SA_OUTPUT/extract.ini
popd
echo "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0\n" > $GLUE_GRAMMAR
echo "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0\n" >> $GLUE_GRAMMAR
echo "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0\n" >> $GLUE_GRAMMAR
echo "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0" > $GLUE_GRAMMAR
echo "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0" >> $GLUE_GRAMMAR
echo "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" >> $GLUE_GRAMMAR

View File

@ -12,14 +12,14 @@ SA_EXEC_DIR=$1
MODEL_DIR=$2
INPUT_FILE=$3
OUTPUT_DIR=$4
JOBS=$5
mkdir $OUTPUT_DIR
pushd .
cd $OUTPUT_DIR
cat $INPUT_FILE | $SA_EXEC_DIR/escape-testset.pl | $SA_EXEC_DIR/extractor.py -c $MODEL_DIR/extract.ini
gzip $OUTPUT_DIR/grammar.out.*
cat $INPUT_FILE | python $SA_EXEC_DIR/cdec/sa/extract.py -c $MODEL_DIR/extract.ini -g $OUTPUT_DIR -j $JOBS -z > $OUTPUT_DIR/input.sgm
popd