mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-28 22:45:50 +03:00
74 lines
3.1 KiB
Makefile
74 lines
3.1 KiB
Makefile
# -*- Makefile -*-
|
|
# This module specifies the locations of required scripts and programs
|
|
|
|
# Moses directories:
|
|
# MOSES_ROOT: root directory of the distribution
|
|
# MOSES_BIN: where compiled binaries are kept
|
|
# MGIZA_ROOT: root directory of the mgiza installation
|
|
MOSES_ROOT ?= ${HOME}/accept/exp/journal-paper/moses
|
|
MOSES_BIN ?= ${MOSES_ROOT}/bin
|
|
MGIZA_ROOT ?= ${MOSES_ROOT}
|
|
|
|
# default location (unless specified otherwise above)
|
|
MOSES_BIN ?= ${MOSES_ROOT}/bin
|
|
MOSES_SCRIPTS ?= ${MOSES_ROOT}/scripts
|
|
MERT_BIN ?= ${MOSES_BIN}
|
|
|
|
M4M_SCRIPTS ?= ${m4mdir}scripts
|
|
|
|
# default locations of scripts and executables
|
|
|
|
# utilities
|
|
parallel ?= $(shell which parallel) --gnu
|
|
$(if ${parallel},,$(error GNU parallel utility not found!))
|
|
|
|
# corpus preprocessing
|
|
pre-tokenize.${L1} ?= ${MOSES_SCRIPTS}/tokenizer/pre-tokenizer.perl -l ${L1}
|
|
pre-tokenize.${L2} ?= ${MOSES_SCRIPTS}/tokenizer/pre-tokenizer.perl -l ${L2}
|
|
|
|
tokenize.${L1} ?= ${MOSES_SCRIPTS}/tokenizer/tokenizer.perl -q -a -l ${L1} -no-escape
|
|
tokenize.${L2} ?= ${MOSES_SCRIPTS}/tokenizer/tokenizer.perl -q -a -l ${L2} -no-escape
|
|
train-truecaser ?= ${MOSES_SCRIPTS}/recaser/train-truecaser.perl
|
|
run-truecaser ?= ${MOSES_SCRIPTS}/recaser/truecase.perl
|
|
run-detruecaser ?= ${MOSES_SCRIPTS}/recaser/detruecase.perl
|
|
run-lowercaser ?= ${MOSES_SCRIPTS}/tokenizer/lowercase.perl
|
|
|
|
# lm construction
|
|
kenlm.build ?= ${MOSES_BIN}/lmplz
|
|
kenlm.binarize ?= ${MOSES_BIN}/build_binary
|
|
|
|
# word alignment with mgiza
|
|
giza.txt2snt.sh ?= ${M4M_SCRIPTS}/giza.txt2snt.sh
|
|
giza2bal.pl ?= ${MOSES_SCRIPTS}/training/giza2bal.pl
|
|
mgiza.merge ?= ${MGIZA_ROOT}/scripts/merge_alignment.py
|
|
mgiza ?= ${MGIZA_ROOT}/bin/mgiza
|
|
snt2cooc ?= ${MGIZA_ROOT}/bin/snt2cooc
|
|
plain2snt ?= ${MGIZA_ROOT}/bin/plain2snt
|
|
mkcls ?= ${MGIZA_ROOT}/bin/mkcls
|
|
symal ?= ${MOSES_BIN}/symal
|
|
merge-sorted ?= ${MOSES_BIN}/merge-sorted
|
|
|
|
# word alignment with fast_align
|
|
fast_align ?= ${HOME}/bin/fast_align
|
|
|
|
# phrase and distortion table constuction
|
|
# moses.make-lex: extracts word translation lexicon from a word-aligned corpus
|
|
# in text format
|
|
moses.make-lex ?= ${M4M_SCRIPTS}/moses.make-lex.py
|
|
moses.extract-phrases ?= ${M4M_SCRIPTS}/moses.phrase-extract.sh
|
|
moses.score-phrases ?= ${M4M_SCRIPTS}/moses.score-phrases.sh
|
|
moses.score-reordering ?= ${MOSES_BIN}/lexical-reordering-score
|
|
moses.extract ?= ${MOSES_BIN}/extract
|
|
|
|
# translation
|
|
moses ?= ${MOSES_BIN}/moses
|
|
|
|
# tuning and evaluation
|
|
mert ?= ${MOSES_SCRIPTS}/training/mert-moses.pl
|
|
clean-decoder-output ?= ${MOSES_SCRIPTS}/ems/support/remove-segmentation-markup.perl
|
|
detruecase ?= ${MOSES_SCRIPTS}/recaser/detruecase.perl
|
|
multi-bleu ?= ${MOSES_SCRIPTS}/generic/multi-bleu.perl
|
|
analyze ?= ${MOSES_SCRIPTS}/ems/support/analysis.perl
|
|
report ?= ${MOSES_SCRIPTS}/ems/support/report-experiment-scores.perl
|
|
apply-weights ?= ${MOSES_SCRIPTS}/ems/support/substitute-weights.perl
|