Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Hieu Hoang 2011-12-02 00:42:06 +07:00
commit 8c1c17e9f1
34 changed files with 243 additions and 2496 deletions

57
.gitignore vendored
View File

@ -1,65 +1,30 @@
*.[oa]
*.la
*.lo
*.Po
*.so
*.a
*.swp
*~
CreateOnDisk/src/CreateOnDiskPt
Makefile
Makefile.in
aclocal.m4
autom4te.cache/
config.h
config.log
config.guess
config.status
config.sub
configure
depcomp
dist*
install-sh
jam-files/bjam
jam-files/engine/bootstrap
jam-files/engine/bin.*
lm/.deps/
lm/.libs/
util/.deps/
util/.libs/
lm/build_binary
lm/query
libtool
mert/.deps/
mert/Makefile
mert/Makefile.in
mert/evaluator
mert/extractor
mert/mert
mert/megam_i686.opt
mert/pro
misc/.deps/
mert/.libs/
misc/Makefile
misc/Makefile.in
misc/processLexicalTable
misc/processPhraseTable
misc/queryLexicalTable
misc/queryPhraseTable
moses-chart/src/.deps/
moses-chart-cmd/src/moses_chart
moses-cmd/src/.deps/
moses-cmd/src/Makefile
moses-cmd/src/Makefile.in
moses-cmd/src/checkplf
moses-cmd/src/lmbrgrid
moses-cmd/src/moses
moses/src/.deps/
moses/src/.libs/
moses/src/Makefile
moses/src/Makefile.in
regression-testing/moses-reg-test-data-*
regression-testing/tests/mert.extractor-bin/FEATSTAT*
regression-testing/tests/mert.extractor-bin/SCORESTAT*
scripts-2*
scripts/ems/biconcor/biconcor
scripts/release-exclude
scripts/training/cmert-0.5/mert
@ -73,17 +38,6 @@ scripts/training/phrase-extract/consolidate
scripts/training/phrase-extract/consolidate-direct
scripts/training/phrase-extract/consolidate-reverse
scripts/training/phrase-extract/extract
scripts/training/phrase-extract/extract-ghkm/config.guess
scripts/training/phrase-extract/extract-ghkm/config.h.in
scripts/training/phrase-extract/extract-ghkm/config.sub
scripts/training/phrase-extract/extract-ghkm/depcomp
scripts/training/phrase-extract/extract-ghkm/install-sh
scripts/training/phrase-extract/extract-ghkm/m4/libtool.m4
scripts/training/phrase-extract/extract-ghkm/m4/ltoptions.m4
scripts/training/phrase-extract/extract-ghkm/m4/ltsugar.m4
scripts/training/phrase-extract/extract-ghkm/m4/ltversion.m4
scripts/training/phrase-extract/extract-ghkm/m4/lt~obsolete.m4
scripts/training/phrase-extract/extract-ghkm/missing
scripts/training/phrase-extract/extract-ghkm/tools/extract-ghkm
scripts/training/phrase-extract/extract-lex
scripts/training/phrase-extract/extract-rules
@ -92,12 +46,5 @@ scripts/training/phrase-extract/score
scripts/training/phrase-extract/statistics
scripts/training/symal/symal
scripts/training/train-model.perl
stamp-h1
ltmain.sh
m4/libtool.m4
m4/ltoptions.m4
m4/ltsugar.m4
m4/ltversion.m4
m4/lt~obsolete.m4
dist
bin

View File

@ -1,64 +1,109 @@
0) Preliminaries
PRELIMINARIES
Before building you need to decide what language model toolkit (SRI's,
IRST's, or Ken's) you want to use.
Moses is primarily targeted at gcc on UNIX.
If you want to use SRI's, you will need to download its source
and build it. The SRILM can be downloaded from
Moses requires gcc, Boost >= 1.36, and zlib including the headers that some
distributions package separately (i.e. -dev or -devel packages). Source is
available at http://boost.org .
There are several optional dependencies:
GIZA++ from http://code.google.com/p/giza-pp/ is used to build phrase tables.
Moses server requires xmlrpc-c with abyss-server. Source is available from
http://xmlrpc-c.sourceforge.net/.
The scripts support building ARPA format language models with SRILM or IRSTLM.
To apply models inside the decoder, you can use SRILM, IRSTLM, or KenLM. The
ARPA format is exchangable so that e.g. you can build a model with SRILM and
run the decoder with IRSTLM or KenLM.
If you want to use SRILM, you will need to download its source and build it.
The SRILM can be downloaded from
http://www.speech.sri.com/projects/srilm/download.html .
On x86_64, the default machine type is broken. Edit sbin/machine-type, find
this code
else if (`uname -m` == x86_64) then
set MACHINE_TYPE = i686
and change it to
else if (`uname -m` == x86_64) then
set MACHINE_TYPE = i686-m64
You may have to chmod +w sbin/machine-type first.
If you want to use IRST's, you will need to download its source and
build it. The IRSTLM can be downloaded from either the SourceForge
website
If you want to use IRSTLM, you will need to download its source and build it.
The IRSTLM can be downloaded from either the SourceForge website
http://sourceforge.net/projects/irstlm
or the official IRSTLM website
http://hlt.fbk.eu/en/irstlm
Ken's LM is included with the Moses distribution.
KenLM is included with Moses.
--------------------------------------------------------------------------
1) Instructions for building with SRILM
ADVICE ON INSTALLING EXTERNAL LIBRARIES
Build SRILM according to their release instructions. Make sure that
you DO NOT override the MACHINE_TYPE variable on the command line when
you do so, as this can lead to problems locating the library.
Generally, for trouble installing external libraries, you should get support
directly from the library maker:
./bjam [--with-srilm=/path/to/srilm]
Boost: http://www.boost.org/doc/libs/1_48_0/more/getting_started/unix-variants.html
IRSTLM: https://list.fbk.eu/sympa/subscribe/user-irstlm
SRILM: http://www.speech.sri.com/projects/srilm/#srilm-user
However, here's some general advice on installing software (for bash users):
#Determine where you want to install packages
PREFIX=$HOME/usr
#If your system has lib64 directories, lib64 should be used AND NOT lib
if [ -d /lib64 ]; then
LIBDIR=$PREFIX/lib64
else
LIBDIR=$PREFIX/lib
fi
#If you're installing to a non-standard path, tell programs where to find things:
export PATH=$PREFIX/bin${PATH:+:$PATH}
export LD_LIBRARY_PATH=$LIBDIR${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
export LIBRARY_PATH=$LIBDIR${LIBRARY_PATH:+:$LIBRARY_PATH}
export CPATH=$PREFIX/include${CPATH:+:$CPATH}
Add all the above code to your .bashrc or .bash_login as appropriate. Then
you're ready to install packages in non-standard paths:
#For autotools packages e.g. xmlrpc-c
./configure --prefix=$PREFIX --libdir=$PREFIX/lib64 [other options here]
#For Boost:
./bootstrap.sh
./b2 --prefix=$PREFIX --libdir=$PREFIX/lib64 link=static,shared threading=multi install
--------------------------------------------------------------------------
2) Instructions for building with IRSTLM
BUILDING
Build IRSTLM according to its release instructions.
Building consists of running
./bjam [options]
./bjam [--with-irstlm=/path/to/irstlm]
Common options are:
--with-srilm=/path/to/srilm to compile the decoder with SRILM support
--with-irstlm=/path/to/irstlm to compile the decoder with IRSTLM support
--with-giza=/path/to/giza to enable training scripts
-jN where N is the number of CPUs
Binaries will appear in dist/bin.
For further documentation, run
./bjam --help
--------------------------------------------------------------------------
3) Instructions for building with Ken's LM
./bjam
--------------------------------------------------------------------------
ALTERNATIVE WAYS TO BUILD ON UNIX AND OTHER PLATFORMS
Microsoft Windows
-----------------
Tested on 32-bit Windows XP and Vista using Visual Studio 2005.
Again, refer to the old manual
http://homepages.inf.ed.ac.uk/s0565741/papers/developers-manual.pdf
The Windows build doesn't use the SRI or IRST language model libraries as they can't be compiled
under Windows using Visual Studio. Instead, an internal language model, which behave like SRILM is used,
however, it can only handle up to trigrams.
Moses is primarily targeted at gcc on UNIX. Windows users should consult
http://ssli.ee.washington.edu/people/amittai/Moses-on-Win7.pdf .
Binaries for all external libraries needed can be downloaded from
http://www.statmt.org/moses/?n=Moses.LibrariesUsed
Only the decoder is developed and tested under Windows. There are difficulties using the training scripts under Windows, even with Cygwin.
Only the decoder is developed and tested under Windows. There are difficulties
using the training scripts under Windows, even with Cygwin.

View File

@ -1,6 +0,0 @@
bin_PROGRAMS = CreateOnDiskPt
CreateOnDiskPt_SOURCES = Main.cpp
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS)
CreateOnDiskPt_LDADD = -L$(top_srcdir)/OnDiskPt/src -L$(top_srcdir)/moses/src -lOnDiskPt -lmoses $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
CreateOnDiskPt_DEPENDENCIES = $(top_srcdir)/OnDiskPt/src/libOnDiskPt.a $(top_srcdir)/moses/src/libmoses.la

26
Jamroot
View File

@ -59,12 +59,12 @@
path-constant TOP : . ;
# Shell with trailing line removed http://lists.boost.org/boost-build/2007/08/17051.php
rule trim-nl ( str ) {
rule trim-nl ( str extras * ) {
return [ MATCH "([^
]*)" : $(str) ] ;
]*)" : $(str) ] $(extras) ;
}
rule _shell ( cmd ) {
return [ trim-nl [ SHELL $(cmd) ] ] ;
rule _shell ( cmd : extras * ) {
return [ trim-nl [ SHELL $(cmd) : $(extras) ] ] ;
}
import option ;
@ -110,7 +110,7 @@ if $(boost-version) < 103600 {
exit You have Boost $(boost-version). Moses requires at least 103600 (and preferably newer). : 1 ;
}
#Are we linking static binaries against shared boost?
boost-auto-shared = [ auto_shared "boost_program_options" : L-boost-search ] ;
boost-auto-shared = [ auto_shared "boost_program_options" : $(L-boost-search) ] ;
#Convenience rule for boost libraries. Defines library boost_$(name).
rule boost_lib ( name macro ) {
#Link multi-threaded programs against the -mt version if available. Old
@ -137,14 +137,20 @@ rule external_lib ( name ) {
external_lib z ;
requirements = ;
#libSegFault prints a stack trace on segfault. Link against it if available.
if [ test_flags "-lSegfault" ] {
external_lib SegFault ;
segfault = <library>SegFault ;
requirements += <library>SegFault ;
}
trace = [ option.get "notrace" : <define>TRACE_ENABLE=1 ] ;
boost-pool = [ option.get "enable-boost-pool" : : "<define>USE_BOOST_POOL" ] ;
requirements += [ option.get "notrace" : <define>TRACE_ENABLE=1 ] ;
requirements += [ option.get "enable-boost-pool" : : <define>USE_BOOST_POOL ] ;
if [ option.get "with-irstlm" ] {
requirements += <threading>single ;
}
import os ;
@ -164,9 +170,7 @@ project : requirements
<threading>multi:<define>WITH_THREADS
<threading>multi:<library>boost_thread
<define>_FILE_OFFSET_BITS=64 <define>_LARGE_FILES
$(segfault)
$(trace)
$(boost-pool)
$(requirements)
<cxxflags>$(cxxflags)
<cflags>$(cflags)
<linkflags>$(ldflags)

View File

@ -1,14 +0,0 @@
# not a GNU package. You can remove this line, if
# have all needed files, that a GNU package needs
AUTOMAKE_OPTIONS = foreign
ACLOCAL_AMFLAGS = -I m4
# order is important here: build moses before moses-cmd
if WITH_MERT
MERT = mert
endif
if WITH_SERVER
SERVER = contrib/server
endif
SUBDIRS = util lm moses/src OnDiskPt/src moses-cmd/src misc moses-chart-cmd/src CreateOnDisk/src $(MERT) $(SERVER)

View File

@ -1,14 +0,0 @@
lib_LIBRARIES = libOnDiskPt.a
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS)
libOnDiskPt_a_SOURCES = \
OnDiskWrapper.cpp \
SourcePhrase.cpp \
TargetPhrase.cpp \
Word.cpp \
Phrase.cpp \
PhraseNode.cpp \
TargetPhraseCollection.cpp \
Vocab.cpp

View File

@ -1,116 +0,0 @@
/* config.h.in. Generated from configure.in by autoheader. */
/* Defined if the requested minimum BOOST version is satisfied */
#undef HAVE_BOOST
/* Define to 1 if you have <boost/program_options.hpp> */
#undef HAVE_BOOST_PROGRAM_OPTIONS_HPP
/* Define to 1 if you have <boost/scoped_ptr.hpp> */
#undef HAVE_BOOST_SCOPED_PTR_HPP
/* Define to 1 if you have <boost/shared_ptr.hpp> */
#undef HAVE_BOOST_SHARED_PTR_HPP
/* Define to 1 if you have <boost/thread.hpp> */
#undef HAVE_BOOST_THREAD_HPP
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* flag for DMapLM */
#undef HAVE_DMAPLM
/* Define to 1 if you have the <getopt.h> header file. */
#undef HAVE_GETOPT_H
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* flag for IRSTLM */
#undef HAVE_IRSTLM
/* Define to 1 if you have the `oolm' library (-loolm). */
#undef HAVE_LIBOOLM
/* Define to 1 if you have the `tcmalloc' library (-ltcmalloc). */
#undef HAVE_LIBTCMALLOC
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to 1 if you have the <nl-cpt.h> header file. */
#undef HAVE_NL_CPT_H
/* flag for ORLM */
#undef HAVE_ORLM
/* flag for protobuf */
#undef HAVE_PROTOBUF
/* flag for RandLM */
#undef HAVE_RANDLM
/* flag for SRILM */
#undef HAVE_SRILM
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* flag for Syntactic Parser */
#undef HAVE_SYNLM
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* flag for zlib */
#undef HAVE_ZLIB
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#undef LT_OBJDIR
/* Name of package */
#undef PACKAGE
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Flag to enable use of Boost pool */
#undef USE_BOOST_POOL
/* Version number of package */
#undef VERSION

View File

@ -1,332 +0,0 @@
AC_INIT(moses/src)
AM_CONFIG_HEADER(config.h)
AM_INIT_AUTOMAKE(moses, 0.1)
AC_CONFIG_MACRO_DIR([m4])
AC_PROG_CXX
AC_PROG_CXXCPP
AC_LANG_CPLUSPLUS
AC_DISABLE_SHARED
AC_PROG_LIBTOOL
# Shared library are disabled for default
#LT_INIT([disable-shared])
AX_XMLRPC_C
BOOST_REQUIRE([1.36.0])
BOOST_SMART_PTR
BOOST_PROGRAM_OPTIONS
AC_ARG_WITH(protobuf,
[AC_HELP_STRING([--with-protobuf=PATH], [(optional) path to Google protobuf])],
[with_protobuf=$withval],
[with_protobuf=no]
)
AC_ARG_WITH(srilm,
[AC_HELP_STRING([--with-srilm=PATH], [(optional) path to SRI's LM toolkit])],
[with_srilm=$withval],
[with_srilm=no]
)
AC_ARG_WITH(srilm-dynamic,
[AC_HELP_STRING([--with-srilm-dynamic], [(optional) link dynamically with srilm])],
[with_srilm_dynamic=yes],
[with_srilm_dynamic=no]
)
AC_ARG_WITH(srilm-arch,
[AC_HELP_STRING([--with-srilm-arch=ARCH], [(optional) architecture for which SRILM was built])],
[with_srilm_arch=$withval],
[with_srilm_arch=no]
)
AC_ARG_WITH(irstlm,
[AC_HELP_STRING([--with-irstlm=PATH], [(optional) path to IRST's LM toolkit])],
[with_irstlm=$withval],
[with_irstlm=no]
)
AC_ARG_WITH(randlm,
[AC_HELP_STRING([--with-randlm=PATH], [(optional) path to RandLM toolkit])],
[with_randlm=$withval],
[with_randlm=no]
)
AC_ARG_WITH(orlm,
[AC_HELP_STRING([--with-orlm=PATH], [(optional) path to ORLM])],
[with_orlm=$withval],
[with_orlm=no]
)
AC_ARG_WITH(dmaplm,
[AC_HELP_STRING([--with-dmaplm=PATH], [(optional) path to DMapLM])],
[with_dmaplm=$withval],
[with_dmaplm=no]
)
AC_ARG_WITH(synlm,
[AC_HELP_STRING([--with-synlm], [(optional) Include syntactic language model parser; default is no])],
[with_synlm=$withval],
[with_synlm=no]
)
AC_ARG_WITH(notrace,
[AC_HELP_STRING([--notrace], [disable trace])],
[without_trace=yes],
)
AC_ARG_ENABLE(profiling,
[AC_HELP_STRING([--enable-profiling], [moses will dump profiling info])],
[CPPFLAGS="$CPPFLAGS -pg"; LDFLAGS="$LDFLAGS -pg" ]
)
AC_ARG_ENABLE(optimization,
[AC_HELP_STRING([--enable-optimization], [compile with -O3 flag])],
[CPPFLAGS="$CPPFLAGS -O3"; LDFLAGS="$LDFLAGS -O3" ]
)
AC_ARG_ENABLE(threads,
[AC_HELP_STRING([--enable-threads], [compile threadsafe library and multi-threaded moses (mosesmt)])],
[],
[enable_threads=no]
)
AC_ARG_WITH(zlib,
[AC_HELP_STRING([--with-zlib=PATH], [(optional) path to zlib])],
[with_zlib=$withval],
[with_zlib=no]
)
AC_ARG_WITH(tcmalloc,
[AC_HELP_STRING([--with-tcmalloc], [(optional) link with tcmalloc; default is no])],
[with_tcmalloc=$withval],
[with_tcmalloc=no]
)
AC_ARG_ENABLE(boost-pool,
[AC_HELP_STRING([--enable-boost-pool], [(optional) try to improve speed by selectively using Boost pool allocation (may increase total memory use); default is yes if Boost enabled])],
[enable_boost_pool=yes],
[enable_boost_pool=no]
)
AM_CONDITIONAL([INTERNAL_LM], false)
AM_CONDITIONAL([SRI_LM], false)
AM_CONDITIONAL([IRST_LM], false)
AM_CONDITIONAL([KEN_LM], false)
AM_CONDITIONAL([RAND_LM], false)
AM_CONDITIONAL([ORLM_LM], false)
AM_CONDITIONAL([DMAP_LM], false)
AM_CONDITIONAL([SYN_LM], false)
AM_CONDITIONAL([PROTOBUF], false)
AM_CONDITIONAL([am__fastdepCC], false)
AM_CONDITIONAL([WITH_THREADS],false)
if test "x$without_trace" = 'xyes'
then
AC_MSG_NOTICE([trace disabled, most regression test will fail])
else
AC_MSG_NOTICE([trace enabled (default)])
CPPFLAGS="$CPPFLAGS -DTRACE_ENABLE=1"
fi
if test "x$enable_threads" = 'xyes'
then
AC_MSG_NOTICE([Building threaded moses])
BOOST_THREADS
CPPFLAGS="$CPPFLAGS -DWITH_THREADS"
AM_CONDITIONAL([WITH_THREADS],true)
else
AC_MSG_NOTICE([Building non-threaded moses. This will disable the moses server])
fi
if test "x$with_protobuf" != 'xno'
then
SAVE_CPPFLAGS="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS -I${with_protobuf}/include"
AC_CHECK_HEADER(google/protobuf/message.h,
[AC_DEFINE([HAVE_PROTOBUF], [], [flag for protobuf])],
[AC_MSG_ERROR([Cannot find protobuf!])])
LIB_PROTOBUF="-lprotobuf"
LDFLAGS="$LDFLAGS -L${with_protobuf}/lib"
LIBS="$LIBS $LIB_PROTOBUF"
AC_PATH_PROG(PROTOC,protoc,,"${PATH}:${with_protobuf}/bin")
FMTLIBS="$FMTLIBS libprotobuf.a"
AM_CONDITIONAL([PROTOBUF], true)
fi
if test "x$with_srilm" != 'xno'
then
SAVE_CPPFLAGS="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS -I${with_srilm}/include"
AC_CHECK_HEADER(Ngram.h,
[AC_DEFINE([HAVE_SRILM], [], [flag for SRILM])],
[AC_MSG_ERROR([Cannot find SRILM!])])
if test "x$with_srilm_dynamic" != 'xyes'
then
LIB_SRILM="-loolm -ldstruct -lmisc -lflm"
# ROOT/lib/i686-m64/liboolm.a
# ROOT/lib/i686-m64/libdstruct.a
# ROOT/lib/i686-m64/libmisc.a
if test "x$with_srilm_arch" != 'xno'
then
MY_ARCH=${with_srilm_arch}
else
MY_ARCH=`${with_srilm}/sbin/machine-type`
fi
LDFLAGS="$LDFLAGS -L${with_srilm}/lib/${MY_ARCH} -L${with_srilm}/flm/obj/${MY_ARCH}"
LIBS="$LIBS $LIB_SRILM"
FMTLIBS="$FMTLIBS liboolm.a libdstruct.a libmisc.a"
else
LDFLAGS="$LDFLAGS -L${with_srilm}/lib"
LIBS="$LIBS -lsrilm"
fi
AC_CHECK_LIB([oolm], [trigram_init], [], [AC_MSG_ERROR([Cannot find SRILM's library in ${with_srilm}/lib/${MY_ARCH} ])])
AM_CONDITIONAL([SRI_LM], true)
fi
if test "x$with_irstlm" != 'xno'
then
SAVE_CPPFLAGS="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS -I${with_irstlm}/include"
AC_MSG_NOTICE([])
AC_MSG_NOTICE([!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!])
AC_MSG_NOTICE([!!! You are linking the IRSTLM library; be sure the release is >= 5.70.02 !!!])
AC_MSG_NOTICE([!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!])
AC_MSG_NOTICE([])
AC_CHECK_HEADER(n_gram.h,
[AC_DEFINE([HAVE_IRSTLM], [], [flag for IRSTLM])],
[AC_MSG_ERROR([Cannot find IRST-LM in ${with_irstlm}])])
MY_ARCH=`uname -m`
LIB_IRSTLM="-lirstlm"
LDFLAGS="$LDFLAGS -L${with_irstlm}/lib"
LIBS="$LIBS $LIB_IRSTLM"
FMTLIBS="$FMTLIBS libirstlm.a"
AM_CONDITIONAL([IRST_LM], true)
fi
CPPFLAGS="$CPPFLAGS -I\$(top_srcdir)"
#LDFLAGS="$LDFLAGS -L\$(top_srcdir)/util -lkenutil -L\$(top_srcdir)/lm -lkenlm -lz"
#KENUTIL_DEPS="\$(top_srcdir)/util/libkenutil.la"
#KENLM_DEPS="\$(top_srcdir)/lm/libkenlm.la"
#FMTLIBS="$FMTLIBS libkenutil.la libkenlm.la"
#AC_SUBST(KENUTIL_DEPS)
#AC_SUBST(KENLM_DEPS)
if test "x$with_randlm" != 'xno'
then
SAVE_CPPFLAGS="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS -I${with_randlm}/include"
AC_CHECK_HEADER(RandLM.h,
[AC_DEFINE([HAVE_RANDLM], [], [flag for RandLM])],
[AC_MSG_ERROR([Cannot find RandLM!])])
MY_ARCH=`uname -m`
LIB_RANDLM="-lrandlm"
LDFLAGS="$LDFLAGS -L${with_randlm}/lib"
LIBS="$LIBS $LIB_RANDLM"
FMTLIBS="$FMTLIBS librandlm.a"
AM_CONDITIONAL([RAND_LM], true)
fi
if test "x$with_dmaplm" != 'xno'
then
SAVE_CPPFLAGS="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS -I${with_dmaplm}/src/DMap"
AC_CHECK_HEADER(StructLanguageModel.h,
[AC_DEFINE([HAVE_DMAPLM], [], [flag for DMapLM])],
[AC_MSG_ERROR([Cannot find DMapLM!])])
LDFLAGS="$LDFLAGS -L${with_dmaplm}/src/DMap"
LIBS="$LIBS -lDMap"
FMTLIBS="FMTLIBS libdmap.la"
AM_CONDITIONAL([DMAP_LM], true)
fi
if test "x$with_orlm" != 'xno'
then
SAVE_CPPFLAGS="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS -I${with_orlm}/"
AC_CHECK_HEADER(onlineRLM.h,
#AC_CHECK_HEADER(multiOnlineRLM.h,
[AC_DEFINE([HAVE_ORLM], [], [flag for ORLM])],
[AC_MSG_ERROR([Cannot find ORLM!])])
MY_ARCH=`uname -m`
AM_CONDITIONAL([ORLM_LM], true)
fi
if test "x$with_tcmalloc" != 'xno'
then
AC_CHECK_LIB([tcmalloc], [malloc], [], [AC_MSG_ERROR([Cannot find tcmalloc])])
fi
if test "x$enable_boost_pool" != 'xno'
then
AC_CHECK_HEADER(boost/pool/object_pool.hpp,
[AC_DEFINE([USE_BOOST_POOL], [], [Flag to enable use of Boost pool])],
[AC_MSG_WARN([Cannot find boost/pool/object_pool.hpp])]
)
fi
if test "x$with_synlm" != 'xno'
then
SAVE_CPPFLAGS="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS -I${PWD}/synlm/hhmm/rvtl/include -I${PWD}/synlm/hhmm/wsjparse/include -lm"
AC_CHECK_HEADERS(nl-cpt.h,
[AC_DEFINE([HAVE_SYNLM], [], [flag for Syntactic Parser])],
[AC_MSG_ERROR([Cannot find SYNLM in ${PWD}/synlm/hhmm])])
AM_CONDITIONAL([SYN_LM], true)
fi
AM_CONDITIONAL([WITH_MERT],false)
AC_CHECK_HEADERS([getopt.h],
[AM_CONDITIONAL([WITH_MERT],true)],
[AC_MSG_WARN([Cannot find getopt.h - disabling new mert])])
AM_CONDITIONAL([WITH_SERVER],false)
if test "x$have_xmlrpc_c" = "xyes" && test "x$enable_threads" = "xyes"; then
AM_CONDITIONAL([WITH_SERVER],true)
else
AC_MSG_NOTICE([Disabling server])
fi
if test "x$with_zlib" != 'xno'
then
CPPFLAGS="$CPPFLAGS -I${with_zlib}/include"
LDFLAGS="$LDFLAGS -L${with_zlib}/lib"
fi
# zlib is always required (see ./moses/src/gzfilebuf.h)
# TODO: This shouldn't be presented to the user as a config option if it isn't actually an option
AC_CHECK_HEADER(zlib.h,
[AC_DEFINE([HAVE_ZLIB], [], [flag for zlib])],
[AC_MSG_ERROR([Cannot find zlib.h. Please install it. For Debian, try 'sudo aptitude install zlib1g-dev'])])
LIBS="$LIBS -lz"
AC_CONFIG_FILES(Makefile OnDiskPt/src/Makefile moses/src/Makefile moses-cmd/src/Makefile moses-chart-cmd/src/Makefile misc/Makefile mert/Makefile contrib/server/Makefile CreateOnDisk/src/Makefile util/Makefile lm/Makefile)
AC_OUTPUT()

View File

@ -1,22 +1,39 @@
#If you get compilation errors here, make sure you have xmlrpc-c installed properly. . .
#If you get compilation errors here, make sure you have xmlrpc-c installed properly, including the abyss server option.
import option ;
import path ;
with-xmlrpc-c = [ option.get "with-xmlrpc-c" ] ;
if $(with-xmlrpc-c) {
build-moses-server = true ;
shell-prefix = $(with-xmlrpc-c)/bin/ ;
} else {
if [ SHELL $(TOP)"/jam-files/test.sh -include xmlrpc-c/base.hpp -lxmlrpc_server_abyss++" ] = 0 {
build-moses-server = true ;
xmlrpc-command = $(with-xmlrpc-c)/bin/xmlrpc-c-config ;
if ! [ path.exists $(xmlrpc-command) ] {
exit Could not find $(xmlrpc-command) : 1 ;
}
shell-prefix = "" ;
} else {
xmlrpc-check = [ _shell "xmlrpc-c-config --features 2>/dev/null" : exit-status ] ;
if $(xmlrpc-check[2]) = 0 {
if [ MATCH "(abyss-server)" : $(xmlrpc-check[1]) ] {
build-moses-server = true ;
} else {
echo "Found xmlrpc-c but it does not have abyss-server. Skipping mosesserver." ;
}
}
xmlrpc-command = "xmlrpc-c-config" ;
}
rule shell_or_die ( cmd ) {
local ret = [ _shell $(cmd) : exit-status ] ;
if $(ret[2]) != 0 {
exit "Failed to run $(cmd)" : 1 ;
}
return $(ret[1]) ;
}
if $(build-moses-server) = true
{
xmlrpc-linkflags = [ _shell "$(shell-prefix)xmlrpc-c-config c++2 abyss-server --libs" ] ;
xmlrpc-cxxflags = [ _shell "$(shell-prefix)xmlrpc-c-config c++2 abyss-server --cflags" ] ;
xmlrpc-linkflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --libs" ] ;
xmlrpc-cxxflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --cflags" ] ;
exe mosesserver : mosesserver.cpp ../../moses/src//moses ../../OnDiskPt/src//OnDiskPt : <linkflags>$(xmlrpc-linkflags) <cxxflags>$(xmlrpc-cxxflags) ;
} else {

View File

@ -1,5 +0,0 @@
bin_PROGRAMS = mosesserver
mosesserver_SOURCES = mosesserver.cpp
mosesserver_CPPFLAGS = -W -Wall -I$(top_srcdir)/moses/src $(XMLRPC_C_CPPFLAGS) $(BOOST_CPPFLAGS)
mosesserver_LDADD = -L$(top_srcdir)/moses/src -lmoses -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(XMLRPC_C_LIBS) $(BOOST_THREAD_LIBS)
mosesserver_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.la $(top_srcdir)/OnDiskPt/src/libOnDiskPt.a

View File

@ -86,15 +86,7 @@ function run_single_test () {
err=""
echo "## ./bjam clean" >> $longlog
./bjam clean >> $longlog 2>&1 || warn "bjam clean failed, suspicious"
echo "## ./bjam $MCC_CONFIGURE_ARGS" >> $longlog
if [ -z "$err" ]; then
./bjam $MCC_CONFIGURE_ARGS >> $longlog 2>&1 || err="bjam"
fi
cd regression-testing
cd regression-testing
regtest_file=$(echo "$REGTEST_ARCHIVE" | sed 's/^.*\///')
# download data for regression tests if necessary
@ -104,15 +96,22 @@ function run_single_test () {
tar xzf $regtest_file
touch $regtest_file.ok
fi
regtest_dir=$PWD/$(basename $regtest_file .tgz)
cd ..
echo "## ./bjam clean" >> $longlog
./bjam clean $MCC_CONFIGURE_ARGS --with-regtest=$regtest_dir >> $longlog 2>&1 || warn "bjam clean failed, suspicious"
echo "## ./bjam $MCC_CONFIGURE_ARGS" >> $longlog
if [ -z "$err" ]; then
./bjam $MCC_CONFIGURE_ARGS >> $longlog 2>&1 || err="bjam"
fi
echo "## regression tests" >> $longlog
if [ -z "$err" ]; then
./run-test-suite.perl &>> $longlog
regtest_status=$?
[ $regtest_status -eq 1 ] && die "Failed to run regression tests"
[ $regtest_status -eq 2 ] && err="regression tests"
./bjam $MCC_CONFIGURE_ARGS --with-regtest=$regtest_dir >> $longlog 2>&1 || err="regression tests"
fi
cd ..
if [ -z "$err" ] && [ "$MCC_RUN_EMS" = "yes" ]; then
echo "## EMS" >> $longlog

View File

@ -1,25 +0,0 @@
lib_LTLIBRARIES = libkenlm.la
bin_PROGRAMS = query build_binary
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS)
libkenlm_la_SOURCES = \
bhiksha.cc \
binary_format.cc \
config.cc \
lm_exception.cc \
model.cc \
search_hashed.cc \
search_trie.cc \
quantize.cc \
read_arpa.cc \
trie.cc \
trie_sort.cc \
virtual_interface.cc \
vocab.cc
query_SOURCES = ngram_query.cc
query_LDADD = libkenlm.la $(top_srcdir)/util/libkenutil.la
build_binary_SOURCES = build_binary.cc
build_binary_LDADD = libkenlm.la $(top_srcdir)/util/libkenutil.la

View File

@ -160,44 +160,45 @@ int main(int argc, char *argv[]) {
}
if (optind + 1 == argc) {
ShowSizes(argv[optind], config);
} else if (optind + 2 == argc) {
return 0;
}
const char *model_type, *from_file;
if (optind + 2 == argc) {
model_type = "probing";
from_file = argv[optind];
config.write_mmap = argv[optind + 1];
if (quantize || set_backoff_bits) ProbingQuantizationUnsupported();
ProbingModel(argv[optind], config);
} else if (optind + 3 == argc) {
const char *model_type = argv[optind];
const char *from_file = argv[optind + 1];
model_type = argv[optind];
from_file = argv[optind + 1];
config.write_mmap = argv[optind + 2];
if (!strcmp(model_type, "probing")) {
if (quantize || set_backoff_bits) ProbingQuantizationUnsupported();
ProbingModel(from_file, config);
} else if (!strcmp(model_type, "trie")) {
if (quantize) {
if (bhiksha) {
QuantArrayTrieModel(from_file, config);
} else {
QuantTrieModel(from_file, config);
}
} else {
Usage(argv[0]);
}
if (!strcmp(model_type, "probing")) {
if (quantize || set_backoff_bits) ProbingQuantizationUnsupported();
ProbingModel(from_file, config);
} else if (!strcmp(model_type, "trie")) {
if (quantize) {
if (bhiksha) {
QuantArrayTrieModel(from_file, config);
} else {
if (bhiksha) {
ArrayTrieModel(from_file, config);
} else {
TrieModel(from_file, config);
}
QuantTrieModel(from_file, config);
}
} else {
Usage(argv[0]);
if (bhiksha) {
ArrayTrieModel(from_file, config);
} else {
TrieModel(from_file, config);
}
}
} else {
Usage(argv[0]);
}
}
catch (const std::exception &e) {
std::cerr << "Built " << config.write_mmap << " successfully." << std::endl;
} catch (const std::exception &e) {
std::cerr << e.what() << std::endl;
std::cerr << "ERROR" << std::endl;
return 1;
}
std::cerr << "SUCCESS" << std::endl;
return 0;
}

View File

@ -229,7 +229,7 @@ void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialW
if (config.messages) *config.messages << "Missing special word " << str << "; will treat it as <unk>.";
break;
case THROW_UP:
UTIL_THROW(SpecialWordMissingException, "The ARPA file is missing " << str << " and the model is configured to reject these models. Run build_binary -s to disable this check.");
UTIL_THROW(SpecialWordMissingException, "The ARPA file is missing " << str << " and the model is configured to reject these models. If you built your APRA with IRSTLM and forgot to run add-start-end.sh, complain to <bertoldi at fbk.eu> stating that you think build-lm.sh should do this by default, then go back and retrain your model from the start. To bypass this check and treat " << str << " as an OOV, pass -s. The resulting model will not work with e.g. Moses.");
}
}

View File

@ -1,52 +0,0 @@
AC_DEFUN([AX_XMLRPC_C], [
AC_MSG_CHECKING(for XMLRPC-C)
AC_ARG_WITH(xmlrpc-c,
[ --with-xmlrpc-c=PATH Enable XMLRPC-C support. Setting the PATH to yes will search for xmlrpc-c-config on the shell PATH,],
[
if test "$withval" = "no"; then
AC_MSG_RESULT(no)
else
if test "$withval" = "yes"; then
xmlrpc_cc_prg="xmlrpc-c-config"
else
xmlrpc_cc_prg="$withval"
fi
if eval $xmlrpc_cc_prg --version 2>/dev/null >/dev/null; then
XMLRPC_C_CPPFLAGS=`$xmlrpc_cc_prg --cflags c++2 abyss-server`
XMLRPC_C_LIBS=`$xmlrpc_cc_prg c++2 abyss-server --libs`
CXXFLAGS_SAVED=$CXXFLAGS
CXXFLAGS="$CXXFLAGS $XMLRPC_C_CPPFLAGS"
LIBS_SAVED=$LIBS
LIBS="$LIBS $XMLRPC_C_LIBS"
AC_TRY_LINK(
[ #include <xmlrpc-c/server.h>
],[ xmlrpc_registry_new(NULL); ],
[
AC_MSG_RESULT(ok)
], [
AC_MSG_RESULT(failed)
AC_MSG_ERROR(Could not compile XMLRPC-C test.)
])
dnl AC_DEFINE(HAVE_XMLRPC_C, 1, Support for XMLRPC-C.)
have_xmlrpc_c=yes
AC_SUBST(XMLRPC_C_LIBS)
AC_SUBST(XMLRPC_C_CPPFLAGS)
LIBS=$LIBS_SAVED
CXXFLAGS=$CXXFLAGS_SAVED
else
AC_MSG_RESULT(failed)
AC_MSG_ERROR(Could not compile XMLRPC-C test.)
fi
fi
],[
AC_MSG_RESULT(ignored)
])
])

File diff suppressed because it is too large Load Diff

View File

@ -56,7 +56,7 @@ private:
typedef map<vector<int>,int,CompareNgrams> counts_t;
typedef map<vector<int>,int,CompareNgrams>::iterator counts_iterator;
typedef map<vector<int>,int,CompareNgrams>::iterator counts_const_iterator;
typedef map<vector<int>,int,CompareNgrams>::const_iterator counts_const_iterator;
typedef ScopedVector<counts_t> refcounts_t;
/**

View File

@ -1,45 +0,0 @@
lib_LTLIBRARIES = libmert.la
bin_PROGRAMS = mert extractor evaluator pro
AM_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE $(BOOST_CPPFLAGS)
libmert_la_SOURCES = \
Util.cpp \
FileStream.cpp \
Timer.cpp \
ScoreStats.cpp ScoreArray.cpp ScoreData.cpp \
ScoreDataIterator.cpp \
FeatureStats.cpp FeatureArray.cpp FeatureData.cpp \
FeatureDataIterator.cpp \
Data.cpp \
BleuScorer.cpp \
Point.cpp \
PerScorer.cpp \
Scorer.cpp \
ScorerFactory.cpp \
Optimizer.cpp \
TERsrc/alignmentStruct.cpp \
TERsrc/hashMap.cpp \
TERsrc/hashMapStringInfos.cpp \
TERsrc/stringHasher.cpp \
TERsrc/terAlignment.cpp \
TERsrc/terShift.cpp \
TERsrc/hashMapInfos.cpp \
TERsrc/infosHasher.cpp \
TERsrc/stringInfosHasher.cpp \
TERsrc/tercalc.cpp \
TERsrc/tools.cpp \
TerScorer.cpp \
CderScorer.cpp \
MergeScorer.cpp
mert_SOURCES = mert.cpp $(top_builddir)/moses/src/ThreadPool.cpp
extractor_SOURCES = extractor.cpp
evaluator_SOURCES = evaluator.cpp
pro_SOURCES = pro.cpp
extractor_LDADD = libmert.la -lm -lz
mert_LDADD = libmert.la -lm -lz $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
evaluator_LDADD = libmert.la -lm -lz
pro_LDADD = libmert.la $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIBS)
pro_DEPENDENCIES = $(top_srcdir)/kenlm/libkenlm.la libmert.la

View File

@ -1,16 +0,0 @@
bin_PROGRAMS = processPhraseTable processLexicalTable queryLexicalTable queryPhraseTable
processPhraseTable_SOURCES = GenerateTuples.cpp processPhraseTable.cpp
processLexicalTable_SOURCES = processLexicalTable.cpp
queryLexicalTable_SOURCES = queryLexicalTable.cpp
queryPhraseTable_SOURCES = queryPhraseTable.cpp
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS)
processPhraseTable_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/moses/src -L$(top_srcdir)/OnDiskPt/src -lmoses -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
processLexicalTable_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/moses/src -L$(top_srcdir)/OnDiskPt/src -lmoses -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
queryLexicalTable_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/moses/src -L$(top_srcdir)/OnDiskPt/src -lmoses -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
queryPhraseTable_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/moses/src -L$(top_srcdir)/OnDiskPt/src -lmoses -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)

View File

@ -1,111 +0,0 @@
#! /bin/sh
# mkinstalldirs --- make directory hierarchy
# Author: Noah Friedman <friedman@prep.ai.mit.edu>
# Created: 1993-05-16
# Public domain
errstatus=0
dirmode=""
usage="\
Usage: mkinstalldirs [-h] [--help] [-m mode] dir ..."
# process command line arguments
while test $# -gt 0 ; do
case $1 in
-h | --help | --h*) # -h for help
echo "$usage" 1>&2
exit 0
;;
-m) # -m PERM arg
shift
test $# -eq 0 && { echo "$usage" 1>&2; exit 1; }
dirmode=$1
shift
;;
--) # stop option processing
shift
break
;;
-*) # unknown option
echo "$usage" 1>&2
exit 1
;;
*) # first non-opt arg
break
;;
esac
done
for file
do
if test -d "$file"; then
shift
else
break
fi
done
case $# in
0) exit 0 ;;
esac
case $dirmode in
'')
if mkdir -p -- . 2>/dev/null; then
echo "mkdir -p -- $*"
exec mkdir -p -- "$@"
fi
;;
*)
if mkdir -m "$dirmode" -p -- . 2>/dev/null; then
echo "mkdir -m $dirmode -p -- $*"
exec mkdir -m "$dirmode" -p -- "$@"
fi
;;
esac
for file
do
set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'`
shift
pathcomp=
for d
do
pathcomp="$pathcomp$d"
case $pathcomp in
-*) pathcomp=./$pathcomp ;;
esac
if test ! -d "$pathcomp"; then
echo "mkdir $pathcomp"
mkdir "$pathcomp" || lasterr=$?
if test ! -d "$pathcomp"; then
errstatus=$lasterr
else
if test ! -z "$dirmode"; then
echo "chmod $dirmode $pathcomp"
lasterr=""
chmod "$dirmode" "$pathcomp" || lasterr=$?
if test ! -z "$lasterr"; then
errstatus=$lasterr
fi
fi
fi
fi
pathcomp="$pathcomp/"
done
done
exit $errstatus
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# End:
# mkinstalldirs ends here

View File

@ -1,10 +0,0 @@
bin_PROGRAMS = moses_chart
moses_chart_SOURCES = Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS)
moses_chart_LDADD = -L$(top_srcdir)/moses/src -L$(top_srcdir)/OnDiskPt/src -lmoses -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
moses_chart_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.la $(top_srcdir)/OnDiskPt/src/libOnDiskPt.a

View File

@ -1,10 +0,0 @@
bin_PROGRAMS = moses lmbrgrid
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS)
moses_SOURCES = Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp LatticeMBR.cpp
moses_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
lmbrgrid_SOURCES = LatticeMBRGrid.cpp LatticeMBR.cpp IOWrapper.cpp
lmbrgrid_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)

View File

@ -100,7 +100,7 @@ void LanguageModelImplementation::CalcScore(const Phrase &phrase, float &fullSco
if (word == GetSentenceStartArray()) {
// do nothing, don't include prob for <s> unigram
if (currPos != 0) {
std::cerr << "Your data contains <s> in a position other than the first word." << std::endl;
std::cerr << "Either your data contains <s> in a position other than the first word or your language model is missing <s>. Did you build your ARPA using IRSTLM and forget to run add-start-end.sh?" << std::endl;
abort();
}
} else {

View File

@ -6,7 +6,7 @@ if $(with-irstlm) != ""
lib irstlm : : <search>$(with-irstlm)/lib ;
obj IRST.o : IRST.cpp ..//headers : <include>$(with-irstlm)/include ;
alias irst : IRST.o irstlm : : : <define>LM_IRST ;
echo "" ;
echo "Forcing single-threaded build because of IRSTLM." ;
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
echo "!!! You are linking the IRSTLM library; be sure the release is >= 5.70.02 !!!" ;
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;

View File

@ -198,7 +198,7 @@ template <class Model> void LanguageModelKen<Model>::CalcScore(const Phrase &phr
} else {
lm::WordIndex index = TranslateID(word);
if (index == m_ngram->GetVocabulary().BeginSentence()) {
std::cerr << "Your data contains <s> in a position other than the first word." << std::endl;
std::cerr << "Either your data contains <s> in a position other than the first word or your language model is missing <s>. Did you build your ARPA using IRSTLM and forget to run add-start-end.sh?" << std::endl;
abort();
}
float score = TransformLMScore(m_ngram->Score(*state0, index, *state1));

View File

@ -1,335 +0,0 @@
lib_LTLIBRARIES = libmoses.la
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS)
libmoses_ladir = ${includedir}
libmoses_la_HEADERS = \
AlignmentInfo.h \
AlignmentInfoCollection.h \
BilingualDynSuffixArray.h \
BitmapContainer.h \
CellCollection.h \
ChartCell.h \
ChartCellCollection.h \
ChartHypothesis.h \
ChartHypothesisCollection.h \
ChartManager.h \
ChartRuleLookupManager.h \
ChartRuleLookupManagerMemory.h \
ChartRuleLookupManagerOnDisk.h \
ChartTranslationOption.h \
ChartTranslationOptionCollection.h \
ChartTranslationOptionList.h \
ChartTrellisDetour.h \
ChartTrellisDetourQueue.h \
ChartTrellisNode.h \
ChartTrellisPath.h \
ChartTrellisPathList.h \
ConfusionNet.h \
DecodeFeature.h \
DecodeGraph.h \
DecodeStep.h \
DecodeStepGeneration.h \
DecodeStepTranslation.h \
Dictionary.h \
DotChart.h \
DotChartInMemory.h \
DotChartOnDisk.h \
DummyScoreProducers.h \
DynSAInclude/file.h \
DynSAInclude/vocab.h \
DynSuffixArray.h \
FFState.h \
Factor.h \
FactorCollection.h \
FactorTypeSet.h \
FeatureFunction.h \
File.h \
FilePtr.h \
FloydWarshall.h \
GenerationDictionary.h \
GlobalLexicalModel.h \
gzfilebuf.h \
hash.h \
Hypothesis.h \
HypothesisStack.h \
HypothesisStackCubePruning.h \
HypothesisStackNormal.h \
InputFileStream.h \
InputType.h \
LMList.h \
LVoc.h \
LM/Base.h \
LM/Joint.h \
LM/Factory.h \
LM/Implementation.h \
LM/MultiFactor.h \
LM/Remote.h \
LM/SingleFactor.h \
LM/Ken.h \
LexicalReordering.h \
LexicalReorderingState.h \
LexicalReorderingTable.h \
Manager.h \
NonTerminal.h \
ObjectPool.h \
PCNTools.h \
PDTAimp.h \
Parameter.h \
PartialTranslOptColl.h \
Phrase.h \
PhraseDictionary.h \
PhraseDictionaryALSuffixArray.h \
PhraseDictionaryDynSuffixArray.h \
PhraseDictionaryMemory.h \
PhraseDictionarySCFG.h \
PhraseDictionaryNode.h \
PhraseDictionaryNodeSCFG.h \
PhraseDictionaryOnDisk.h \
PhraseDictionaryTree.h \
PhraseDictionaryTreeAdaptor.h \
PrefixTree.h \
PrefixTreeMap.h \
ReorderingConstraint.h \
ReorderingStack.h \
RuleCube.h \
RuleCubeItem.h \
RuleCubeQueue.h \
RuleTableLoader.h \
RuleTableLoaderCompact.h \
RuleTableLoaderFactory.h \
RuleTableLoaderHiero.h \
RuleTableLoaderStandard.h \
ScoreComponentCollection.h \
ScoreIndexManager.h \
ScoreProducer.h \
Search.h \
SearchCubePruning.h \
SearchNormal.h \
Sentence.h \
SentenceStats.h \
SquareMatrix.h \
StaticData.h \
TargetPhrase.h \
TargetPhraseCollection.h \
ThreadPool.h \
Timer.h \
TranslationOption.h \
TranslationOptionCollection.h \
TranslationOptionCollectionConfusionNet.h \
TranslationOptionCollectionText.h \
TranslationOptionList.h \
TranslationSystem.h \
TreeInput.h \
TrellisPath.h \
TrellisPathCollection.h \
TrellisPathList.h \
TypeDef.h \
UniqueObject.h \
UserMessage.h \
Util.h \
Word.h \
WordLattice.h \
WordsBitmap.h \
WordsRange.h \
XmlOption.h
if PROTOBUF
libmoses_la_HEADERS += rule.pb.h hypergraph.pb.h
endif
if SRI_LM
libmoses_la_HEADERS += LM/SRI.h \
LM/ParallelBackoff.h
endif
if IRST_LM
libmoses_la_HEADERS += LM/IRST.h
endif
if RAND_LM
libmoses_la_HEADERS += LM/Rand.h
endif
if ORLM_LM
libmoses_la_HEADERS += LM/ORLM.h \
DynSAInclude/params.h \
DynSAInclude/hash.h \
DynSAInclude/quantizer.h \
DynSAInclude/RandLMFilter.h \
DynSAInclude/RandLMCache.h
endif
if SYN_LM
libmoses_la_HEADERS += SyntacticLanguageModel.h
endif
libmoses_la_SOURCES = \
AlignmentInfo.cpp \
AlignmentInfoCollection.cpp \
BilingualDynSuffixArray.cpp \
BitmapContainer.cpp \
ChartCell.cpp \
ChartCellCollection.cpp \
ChartHypothesis.cpp \
ChartHypothesisCollection.cpp \
ChartManager.cpp \
ChartRuleLookupManager.cpp \
ChartRuleLookupManagerMemory.cpp \
ChartRuleLookupManagerOnDisk.cpp \
ChartTranslationOption.cpp \
ChartTranslationOptionCollection.cpp \
ChartTranslationOptionList.cpp \
ChartTrellisDetour.cpp \
ChartTrellisDetourQueue.cpp \
ChartTrellisNode.cpp \
ChartTrellisPath.cpp \
ConfusionNet.cpp \
DecodeFeature.cpp \
DecodeGraph.cpp \
DecodeStep.cpp \
DecodeStepGeneration.cpp \
DecodeStepTranslation.cpp \
Dictionary.cpp \
DotChart.cpp \
DotChartInMemory.cpp \
DotChartOnDisk.cpp \
DummyScoreProducers.cpp \
DynSAInclude/file.cpp \
DynSAInclude/vocab.cpp \
DynSuffixArray.cpp \
FFState.cpp \
Factor.cpp \
FactorCollection.cpp \
FactorTypeSet.cpp \
FeatureFunction.cpp \
FloydWarshall.cpp \
GenerationDictionary.cpp \
GlobalLexicalModel.cpp \
hash.cpp \
Hypothesis.cpp \
HypothesisStack.cpp \
HypothesisStackCubePruning.cpp \
HypothesisStackNormal.cpp \
InputFileStream.cpp \
InputType.cpp \
LMList.cpp \
LVoc.cpp \
LM/Base.cpp \
LM/Factory.cpp \
LM/Implementation.cpp \
LM/Joint.cpp \
LM/Ken.cpp \
LM/MultiFactor.cpp \
LM/Remote.cpp \
LM/SingleFactor.cpp \
LexicalReordering.cpp \
LexicalReorderingState.cpp \
LexicalReorderingTable.cpp \
Manager.cpp \
PCNTools.cpp \
Parameter.cpp \
PartialTranslOptColl.cpp \
Phrase.cpp \
PhraseDictionary.cpp \
PhraseDictionaryALSuffixArray.cpp \
PhraseDictionaryDynSuffixArray.cpp \
PhraseDictionaryHiero.cpp \
PhraseDictionaryMemory.cpp \
PhraseDictionarySCFG.cpp \
PhraseDictionaryNode.cpp \
PhraseDictionaryNodeSCFG.cpp \
PhraseDictionaryOnDisk.cpp \
PhraseDictionaryTree.cpp \
PhraseDictionaryTreeAdaptor.cpp \
PrefixTreeMap.cpp \
ReorderingConstraint.cpp \
ReorderingStack.cpp \
RuleCube.cpp \
RuleCubeItem.cpp \
RuleCubeQueue.cpp \
RuleTableLoaderCompact.cpp \
RuleTableLoaderFactory.cpp \
RuleTableLoaderHiero.cpp \
RuleTableLoaderStandard.cpp \
ScoreComponentCollection.cpp \
ScoreIndexManager.cpp \
ScoreProducer.cpp \
Search.cpp \
SearchCubePruning.cpp \
SearchNormal.cpp \
Sentence.cpp \
SentenceStats.cpp \
SquareMatrix.cpp \
StaticData.cpp \
TargetPhrase.cpp \
TargetPhraseCollection.cpp \
ThreadPool.cpp \
Timer.cpp \
TranslationOption.cpp \
TranslationOptionCollection.cpp \
TranslationOptionCollectionConfusionNet.cpp \
TranslationOptionCollectionText.cpp \
TranslationOptionList.cpp \
TranslationSystem.cpp \
TreeInput.cpp \
TrellisPath.cpp \
TrellisPathCollection.cpp \
UserMessage.cpp \
Util.cpp \
Word.cpp \
WordLattice.cpp \
WordsBitmap.cpp \
WordsRange.cpp \
XmlOption.cpp
if PROTOBUF
BUILT_SOURCES = \
rule.pb.h \
rule.pb.cc \
hypergraph.pb.h \
hypergraph.pb.cc
CLEANFILES = $(BUILT_SOURCES)
SUFFIXES = .proto
rule.pb.cc: rule.proto
@PROTOC@ --cpp_out=. $<
rule.pb.h: rule.proto
@PROTOC@ --cpp_out=. $<
hypergraph.pb.cc: hypergraph.proto
@PROTOC@ --cpp_out=. $<
hypergraph.pb.h: hypergraph.proto
@PROTOC@ --cpp_out=. $<
libmoses_la_SOURCES += rule.pb.cc hypergraph.pb.cc
endif
if SRI_LM
libmoses_la_SOURCES += LM/SRI.cpp \
LM/ParallelBackoff.cpp
endif
if IRST_LM
libmoses_la_SOURCES += LM/IRST.cpp
endif
if RAND_LM
libmoses_la_SOURCES += LM/Rand.cpp
endif
if ORLM_LM
libmoses_la_SOURCES += LM/ORLM.cpp \
DynSAInclude/onlineRLM.h \
DynSAInclude/perfecthash.h \
DynSAInclude/params.cpp
endif
if SYN_LM
libmoses_la_SOURCES += SyntacticLanguageModel.cpp
endif
libmoses_la_LIBADD = $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)

View File

@ -1,109 +0,0 @@
#!/bin/bash
cat <<EOF
Moses is moving to Boost Jam. To build Moses, run one command:
./bjam [--with-srilm=/path/to/srilm] [--with-irstlm=/path/to/irstlm] -j4
If that's not working for you, complain to moses-support then run
./regenerate-makefiles.sh --force to continue using autotools.
EOF
if [ z"$1" != z--force ]; then
exit 1
fi
# NOTE:
# Versions 1.9 (or higher) of aclocal and automake are required.
# And version >=2.60 of autoconf
# And version >=1.4.7 of m4
# For Mac OSX users:
# Standard distribution usually includes versions 1.6.
# Get versions 1.9 or higher
# Set the following variable to the correct paths
#ACLOCAL="/path/to/aclocal-1.9"
#AUTOMAKE="/path/to/automake-1.9"
function die () {
echo "$@" >&2
# Try to be as helpful as possible by detecting OS and making recommendations
if (( $(lsb_release -a | fgrep -ci "ubuntu") > 0 )); then
echo >&2
echo >&2 "Need to install build autotools on Ubuntu? Use:"
echo >&2 "sudo aptitude install autoconf automake libtool build-essential"
fi
if (( $(uname -a | fgrep -ci "darwin") > 0 )); then
echo >&2
echo >&2 "Having problems on Mac OSX?"
echo >&2 "You might have an old version of aclocal/automake. You'll need to upgrade these."
fi
exit 1
}
if [ -z "$ACLOCAL" ]; then
ACLOCAL=`which aclocal`
[ -n "$ACLOCAL" ] || die "aclocal not found on your system. Please install it or set $ACLOCAL"
fi
if [ -z "$AUTOMAKE" ]; then
AUTOMAKE=`which automake`
[ -n "$AUTOMAKE" ] || die "automake not found on your system. Please install it or set $AUTOMAKE"
fi
if [ -z "$AUTOCONF" ]; then
AUTOCONF=`which autoconf`
[ -n "$AUTOCONF" ] || die "autoconf not found on your system. Please install it or set $AUTOCONF"
fi
if [ -z "$LIBTOOLIZE" ]; then
LIBTOOLIZE=`which libtoolize`
if [ -z "$LIBTOOLIZE" ]; then
LIBTOOLIZE=`which glibtoolize`
fi
[ -n "$LIBTOOLIZE" ] || die "libtoolize/glibtoolize not found on your system. Please install it or set $LIBTOOLIZE"
fi
echo >&2 "Detected aclocal: $($ACLOCAL --version | head -n1)"
echo >&2 "Detected autoconf: $($AUTOCONF --version | head -n1)"
echo >&2 "Detected automake: $($AUTOMAKE --version | head -n1)"
echo >&2 "Detected libtoolize: $($LIBTOOLIZE --version | head -n1)"
echo "Calling $ACLOCAL -I m4..."
$ACLOCAL -I m4 || die "aclocal failed"
echo "Calling $AUTOCONF..."
$AUTOCONF || die "autoconf failed"
echo "Calling $LIBTOOLIZE"
$LIBTOOLIZE || die "libtoolize failed"
echo "Calling $AUTOMAKE --add-missing..."
$AUTOMAKE --add-missing || die "automake failed"
case `uname -s` in
Darwin)
cores=$(sysctl -n hw.ncpu)
;;
Linux)
cores=$(cat /proc/cpuinfo | fgrep -c processor)
;;
*)
echo "Unknown platform."
cores=
;;
esac
if [ -z "$cores" ]; then
cores=2 # assume 2 cores if we can't figure it out
echo >&2 "Assuming 2 cores"
else
echo >&2 "Detected $cores cores"
fi
echo
echo "You should now be able to configure and build:"
echo " ./configure [--with-srilm=/path/to/srilm] [--with-irstlm=/path/to/irstlm] [--with-randlm=/path/to/randlm] [--with-synlm] [--with-xmlrpc-c=/path/to/xmlrpc-c-config]"
echo " make -j ${cores}"
echo

View File

@ -1 +0,0 @@
*.pyc

View File

@ -1,6 +1,7 @@
#See ../Jamroot for options.
import option ;
build-project ems/biconcor ;
build-project training ;
with-giza = [ option.get "with-giza" ] ;
@ -37,91 +38,21 @@ if $(location) {
install ghkm : training/phrase-extract/extract-ghkm//extract-ghkm : <location>$(location)/training/phrase-extract/extract-ghkm/tools ;
install compactify : training/compact-rule-table//compactify : <location>$(location)/training/compact-rule-table/tools ;
install phrase-extract : training/phrase-extract//released-programs : <location>$(location)/training/phrase-extract ;
install phrase-extract : training/phrase-extract//programs : <location>$(location)/training/phrase-extract ;
install lexical-reordering : training/lexical-reordering//score : <location>$(location)/training/lexical-reordering ;
install symal : training/symal//symal : <location>$(location)/symal ;
install symal : training/symal//symal : <location>$(location)/training/symal ;
install biconcor : ems/biconcor//biconcor : <location>$(location)/ems/biconcor ;
if $(WITH-GIZA) != no {
install train-model : training//train-model.perl : <location>$(location)/training ;
} else {
alias train-model ;
}
install scripts :
analysis/README
analysis/sentence-by-sentence.pl
[ glob-tree README *.js *.pl *.perl *.pm *.py *.sh *.php : tests regression-testing other bin train_model.perl ]
[ glob tokenizer/nonbreaking_prefixes/* ems/example/*.* ems/example/data/* ems/web/* analysis/smtgui/* : ems/web/javascripts ]
generic/fsa-sample.fsa
ems/experiment.machines
ems/experiment.meta
ems/experiment.perl
ems/example/config.basic
ems/example/config.factored
ems/example/config.hierarchical
ems/example/config.syntax
ems/example/config.toy
ems/example/data/nc-5k.en
ems/example/data/nc-5k.fr
ems/example/data/test-ref.en.sgm
ems/example/data/test-src.fr.sgm
ems/support/analysis.perl
ems/support/berkeley-process.sh
ems/support/berkeley-train.sh
ems/support/consolidate-training-data.perl
ems/support/generic-multicore-parallelizer.perl
ems/support/generic-parallelizer.perl
ems/support/input-from-sgm.perl
ems/support/interpolate-lm.perl
ems/support/reference-from-sgm.perl
ems/support/remove-segmenation-markup.perl
ems/support/report-experiment-scores.perl
ems/support/reuse-weights.perl
ems/support/run-command-on-multiple-refsets.perl
ems/support/wrap-xml.perl
ems/web/analysis.php
ems/web/analysis_diff.php
ems/web/comment.php
ems/web/diff.php
ems/web/index.php
ems/web/lib.php
ems/web/overview.php
ems/web/setup
ems/web/javascripts/builder.js
ems/web/javascripts/controls.js
ems/web/javascripts/dragdrop.js
ems/web/javascripts/effects.js
ems/web/javascripts/prototype.js
ems/web/javascripts/scriptaculous.js
ems/web/javascripts/slider.js
ems/web/javascripts/sound.js
ems/web/javascripts/unittest.js
generic/compound-splitter.perl
generic/extract-factors.pl
generic/lopar2pos.pl
generic/moses-parallel.pl
generic/mteval-v12.pl
generic/multi-bleu.perl
generic/qsub-wrapper.pl
README
[ glob tokenizer/*.perl tokenizer/nonbreaking_prefixes/* ]
training/absolutize_moses_model.pl
training/build-generation-table.perl
training/clean-corpus-n.perl
training/clone_moses_model.pl
training/filter-model-given-input.pl
training/filter-rule-table.py
training/zmert-moses.pl
training/mert-moses.pl
training/mert-moses-multi.pl
training/postprocess-lopar.perl
training/reduce_combine.pl
training/combine_factors.pl
training/symal/giza2bal.pl
training/wrappers/parse-de-bitpar.perl
training/wrappers/parse-en-collins.perl
training/wrappers/make-factor-en-pos.mxpost.perl
training/wrappers/make-factor-pos.tree-tagger.perl
training/wrappers/make-factor-stem.perl
[ glob recaser/*.perl ]
: <install-source-root>. <location>$(location) ;
alias install : ghkm compactify phrase-extract lexical-reordering symal scripts train-model ;
}

View File

@ -1,3 +1,3 @@
exe biconcur : Vocabulary.cpp SuffixArray.cpp TargetCorpus.cpp Alignment.cpp Mismatch.cpp PhrasePair.cpp PhrasePairCollection.cpp biconcor.cpp base64.cpp ;
exe biconcor : Vocabulary.cpp SuffixArray.cpp TargetCorpus.cpp Alignment.cpp Mismatch.cpp PhrasePair.cpp PhrasePairCollection.cpp biconcor.cpp base64.cpp ;
install legacy : biconcur : <location>. ;
install legacy : biconcor : <location>. ;

View File

@ -8,7 +8,7 @@ binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
# apply switches
my ($DIR,$CORPUS,$SCRIPTS_ROOT_DIR,$CONFIG);
my ($DIR,$CORPUS,$SCRIPTS_ROOT_DIR,$CONFIG,$HELP,$ERROR);
my $LM = "SRILM"; # SRILM is default.
my $BUILD_LM = "build-lm.sh";
my $NGRAM_COUNT = "ngram-count";
@ -16,24 +16,66 @@ my $TRAIN_SCRIPT = "train-factored-phrase-model.perl";
my $MAX_LEN = 1;
my $FIRST_STEP = 1;
my $LAST_STEP = 11;
die("train-recaser.perl --dir recaser --corpus cased")
$ERROR = "training Aborted."
unless &GetOptions('first-step=i' => \$FIRST_STEP,
'last-step=i' => \$LAST_STEP,
'corpus=s' => \$CORPUS,
'config=s' => \$CONFIG,
'dir=s' => \$DIR,
'ngram-count=s' => \$NGRAM_COUNT,
'build-lm=s' => \$BUILD_LM,
'lm=s' => \$LM,
'train-script=s' => \$TRAIN_SCRIPT,
'scripts-root-dir=s' => \$SCRIPTS_ROOT_DIR,
'max-len=i' => \$MAX_LEN);
'dir=s' => \$DIR,
'ngram-count=s' => \$NGRAM_COUNT,
'build-lm=s' => \$BUILD_LM,
'lm=s' => \$LM,
'train-script=s' => \$TRAIN_SCRIPT,
'scripts-root-dir=s' => \$SCRIPTS_ROOT_DIR,
'max-len=i' => \$MAX_LEN,
'help' => \$HELP);
# check and set default to unset parameters
die("please specify working dir --dir") unless defined($DIR);
die("please specify --corpus") if !defined($CORPUS)
$ERROR = "please specify working dir --dir" unless defined($DIR) || defined($HELP);
$ERROR = "please specify --corpus" if !defined($CORPUS) && !defined($HELP)
&& $FIRST_STEP <= 2 && $LAST_STEP >= 1;
if ($HELP || $ERROR) {
if ($ERROR) {
print STDERR "ERROR: " . $ERROR . "\n";
}
print STDERR "Usage: $0 --dir /output/recaser --corpus /Cased/corpus/files [options ...]";
print STDERR "\n\nOptions:
== MANDATORY ==
--dir=dir ... outputted recaser directory.
--corpus=file ... inputted cased corpus.
== OPTIONAL ==
= Recaser Training configuration =
--train-script=file ... path to the train script (default: train-factored-phrase-model.perl in \$PATH).
--config=config ... training script configuration.
--scripts-root-dir=dir ... scripts directory.
--max-len=int ... max phrase length (default: 1).
= Language Model Training configuration =
--lm=[IRSTLM,SRILM] ... language model (default: SRILM).
--build-lm=file ... path to build-lm.sh if not in \$PATH (used only with --lm=IRSTLM).
--ngram-count=file ... path to ngram-count.sh if not in \$PATH (used only with --lm=SRILM).
= Steps this script will perform =
(1) Truecasing (disabled);
(2) Language Model Training;
(3) Data Preparation
(4-10) Recaser Model Training;
(11) Cleanup.
--first-step=[1-11] ... step where script starts (default: 1).
--last-step=[1-11] ... step where script ends (default: 11).
--help ... this usage output.\n";
if ($ERROR) {
exit(1);
}
else {
exit(0);
}
}
# main loop
`mkdir -p $DIR`;
&truecase() if 0 && $FIRST_STEP == 1;
@ -60,7 +102,7 @@ sub train_lm {
}
print STDERR "** Using $LM **" . "\n";
print STDERR $cmd."\n";
print STDERR `$cmd`;
system($cmd) == 0 || die("Language model training failed with error " . ($? >> 8) . "\n");
}
sub prepare_data {
@ -110,12 +152,18 @@ sub train_recase_model {
$cmd .= " -scripts-root-dir $SCRIPTS_ROOT_DIR" if $SCRIPTS_ROOT_DIR;
$cmd .= " -config $CONFIG" if $CONFIG;
print STDERR $cmd."\n";
print STDERR `$cmd`;
system($cmd) == 0 || die("Recaser model training failed with error " . ($? >> 8) . "\n");
}
sub cleanup {
print STDERR "\n(11) Cleaning up @ ".`date`;
`rm -f $DIR/extract*`;
my $clean_1 = $?;
`rm -f $DIR/aligned*`;
my $clean_2 = $?;
`rm -f $DIR/lex*`;
my $clean_3 = $?;
if ($clean_1 + $clean_2 + $clean_3 != 0) {
print STDERR "Training successful but some files could not be cleaned.\n";
}
}

View File

@ -0,0 +1,5 @@
The language suffix can be found here:
http://www.loc.gov/standards/iso639-2/php/code_list.php

View File

@ -19,9 +19,7 @@ exe relax-parse : tables-core.cpp SyntaxTree.cpp XmlTree.cpp relax-parse.cpp ;
exe statistics : tables-core.cpp AlignmentPhrase.cpp statistics.cpp InputFileStream ;
alias released-programs : extract extract-rules score consolidate ;
alias programs : extract extract-rules extract-lex score consolidate consolidate-direct consolidate-direct consolidate-reverse relax-parse statistics ;
alias programs : extract extract-rules extract-lex score consolidate consolidate-direct consolidate-reverse relax-parse statistics ;
install legacy : programs : <location>. <install-type>EXE ;

View File

@ -1,12 +0,0 @@
lib_LTLIBRARIES = libkenutil.la
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS)
libkenutil_la_SOURCES = \
bit_packing.cc \
ersatz_progress.cc \
exception.cc \
file.cc \
file_piece.cc \
murmur_hash.cc \
mmap.cc