mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 05:55:02 +03:00
Patching up the suffix array wrappers
This commit is contained in:
parent
b49e6a162f
commit
2c19238c24
@ -2552,6 +2552,8 @@ sub define_tuningevaluation_filter {
|
||||
|
||||
# get model, and whether suffix array is used. Determines the pt implementation.
|
||||
my $sa_exec_dir = &get("TRAINING:suffix-array");
|
||||
my $sa_extractors = &get("GENERAL:sa_extractors");
|
||||
$sa_extractors = 1 unless $sa_extractors;
|
||||
|
||||
my ($ptImpl, $numFF);
|
||||
if ($hierarchical) {
|
||||
@ -2589,11 +2591,14 @@ sub define_tuningevaluation_filter {
|
||||
# filter command
|
||||
if ($sa_exec_dir) {
|
||||
# suffix array
|
||||
$cmd .= "$scripts/training/wrappers/adam-suffix-array/suffix-array-extract.sh $sa_exec_dir $phrase_translation_table $input_filter $filter_dir \n";
|
||||
$cmd .= "$scripts/training/wrappers/adam-suffix-array/suffix-array-extract.sh $sa_exec_dir $phrase_translation_table $input_filter $filter_dir $sa_extractors \n";
|
||||
|
||||
my $escaped_filter_dir = $filter_dir;
|
||||
$escaped_filter_dir =~ s/\//\\\\\//g;
|
||||
$cmd .= "cat $config | sed s/10\\ 0\\ 0\\ 7.*/10\\ 0\\ 0\\ 7\\ $escaped_filter_dir/g > $filter_dir/moses.ini \n";
|
||||
# kind of a hack -- the correct thing would be to make the generation of the config file ($filter_dir/moses.ini)
|
||||
# set the PhraseDictionaryALSuffixArray's path to the filtered directory rather than to the suffix array itself
|
||||
$cmd .= "sed -i 's%path=$phrase_translation_table%path=$filter_dir%' $filter_dir/moses.ini\n";
|
||||
}
|
||||
else {
|
||||
# normal phrase table
|
||||
|
@ -1919,6 +1919,7 @@ sub create_ini {
|
||||
$phrase_table_impl_name = "PhraseDictionaryBinary" if $phrase_table_impl==1;
|
||||
$phrase_table_impl_name = "PhraseDictionaryOnDisk" if $phrase_table_impl==2;
|
||||
$phrase_table_impl_name = "PhraseDictionaryMemory" if $phrase_table_impl==6;
|
||||
$phrase_table_impl_name = "PhraseDictionaryALSuffixArray" if $phrase_table_impl==10;
|
||||
|
||||
#table limit
|
||||
my $table_limit = 0;
|
||||
|
@ -24,10 +24,10 @@ rm -rf $SA_OUTPUT/bitext
|
||||
pushd .
|
||||
cd $SA_EXEC_DIR
|
||||
|
||||
./sa-compile.pl -output $SA_OUTPUT -b bitext_name=$SOURCE_CORPUS,$TARGET_CORPUS -a alignment_name=$ALIGNMENT > $SA_OUTPUT/extract.ini
|
||||
python $SA_EXEC_DIR/cdec/sa/compile.py -o $SA_OUTPUT -f $SOURCE_CORPUS -e $TARGET_CORPUS -a $ALIGNMENT -c $SA_OUTPUT/extract.ini
|
||||
|
||||
popd
|
||||
|
||||
echo "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0\n" > $GLUE_GRAMMAR
|
||||
echo "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0\n" >> $GLUE_GRAMMAR
|
||||
echo "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0\n" >> $GLUE_GRAMMAR
|
||||
echo "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0" > $GLUE_GRAMMAR
|
||||
echo "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0" >> $GLUE_GRAMMAR
|
||||
echo "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" >> $GLUE_GRAMMAR
|
||||
|
@ -12,14 +12,14 @@ SA_EXEC_DIR=$1
|
||||
MODEL_DIR=$2
|
||||
INPUT_FILE=$3
|
||||
OUTPUT_DIR=$4
|
||||
JOBS=$5
|
||||
|
||||
mkdir $OUTPUT_DIR
|
||||
|
||||
pushd .
|
||||
cd $OUTPUT_DIR
|
||||
|
||||
cat $INPUT_FILE | $SA_EXEC_DIR/escape-testset.pl | $SA_EXEC_DIR/extractor.py -c $MODEL_DIR/extract.ini
|
||||
gzip $OUTPUT_DIR/grammar.out.*
|
||||
cat $INPUT_FILE | python $SA_EXEC_DIR/cdec/sa/extract.py -c $MODEL_DIR/extract.ini -g $OUTPUT_DIR -j $JOBS -z > $OUTPUT_DIR/input.sgm
|
||||
|
||||
popd
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user