mosesdecoder/contrib/m4m/modules/tools.m4m

74 lines
3.1 KiB
Makefile

# -*- Makefile -*-
# This module specifies the locations of required scripts and programs
# Moses directories:
# MOSES_ROOT: root directory of the distribution
# MOSES_BIN: where compiled binaries are kept
# MGIZA_ROOT: root directory of the mgiza installation
MOSES_ROOT ?= ${HOME}/accept/exp/journal-paper/moses
MOSES_BIN ?= ${MOSES_ROOT}/bin
MGIZA_ROOT ?= ${MOSES_ROOT}
# default location (unless specified otherwise above)
MOSES_BIN ?= ${MOSES_ROOT}/bin
MOSES_SCRIPTS ?= ${MOSES_ROOT}/scripts
MERT_BIN ?= ${MOSES_BIN}
M4M_SCRIPTS ?= ${m4mdir}scripts
# default locations of scripts and executables
# utilities
parallel ?= $(shell which parallel) --gnu
$(if ${parallel},,$(error GNU parallel utility not found!))
# corpus preprocessing
pre-tokenize.${L1} ?= ${MOSES_SCRIPTS}/tokenizer/pre-tokenizer.perl -l ${L1}
pre-tokenize.${L2} ?= ${MOSES_SCRIPTS}/tokenizer/pre-tokenizer.perl -l ${L2}
tokenize.${L1} ?= ${MOSES_SCRIPTS}/tokenizer/tokenizer.perl -q -a -l ${L1} -no-escape
tokenize.${L2} ?= ${MOSES_SCRIPTS}/tokenizer/tokenizer.perl -q -a -l ${L2} -no-escape
train-truecaser ?= ${MOSES_SCRIPTS}/recaser/train-truecaser.perl
run-truecaser ?= ${MOSES_SCRIPTS}/recaser/truecase.perl
run-detruecaser ?= ${MOSES_SCRIPTS}/recaser/detruecase.perl
run-lowercaser ?= ${MOSES_SCRIPTS}/tokenizer/lowercase.perl
# lm construction
kenlm.build ?= ${MOSES_BIN}/lmplz
kenlm.binarize ?= ${MOSES_BIN}/build_binary
# word alignment with mgiza
giza.txt2snt.sh ?= ${M4M_SCRIPTS}/giza.txt2snt.sh
giza2bal.pl ?= ${MOSES_SCRIPTS}/training/giza2bal.pl
mgiza.merge ?= ${MGIZA_ROOT}/scripts/merge_alignment.py
mgiza ?= ${MGIZA_ROOT}/bin/mgiza
snt2cooc ?= ${MGIZA_ROOT}/bin/snt2cooc
plain2snt ?= ${MGIZA_ROOT}/bin/plain2snt
mkcls ?= ${MGIZA_ROOT}/bin/mkcls
symal ?= ${MOSES_BIN}/symal
merge-sorted ?= ${MOSES_BIN}/merge-sorted
# word alignment with fast_align
fast_align ?= ${HOME}/bin/fast_align
# phrase and distortion table constuction
# moses.make-lex: extracts word translation lexicon from a word-aligned corpus
# in text format
moses.make-lex ?= ${M4M_SCRIPTS}/moses.make-lex.py
moses.extract-phrases ?= ${M4M_SCRIPTS}/moses.phrase-extract.sh
moses.score-phrases ?= ${M4M_SCRIPTS}/moses.score-phrases.sh
moses.score-reordering ?= ${MOSES_BIN}/lexical-reordering-score
moses.extract ?= ${MOSES_BIN}/extract
# translation
moses ?= ${MOSES_BIN}/moses
# tuning and evaluation
mert ?= ${MOSES_SCRIPTS}/training/mert-moses.pl
clean-decoder-output ?= ${MOSES_SCRIPTS}/ems/support/remove-segmentation-markup.perl
detruecase ?= ${MOSES_SCRIPTS}/recaser/detruecase.perl
multi-bleu ?= ${MOSES_SCRIPTS}/generic/multi-bleu.perl
analyze ?= ${MOSES_SCRIPTS}/ems/support/analysis.perl
report ?= ${MOSES_SCRIPTS}/ems/support/report-experiment-scores.perl
apply-weights ?= ${MOSES_SCRIPTS}/ems/support/substitute-weights.perl