diff --git a/.gitignore b/.gitignore index 94c29354d..0380331c4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,65 +1,30 @@ -*.[oa] -*.la -*.lo -*.Po *.so +*.a +*.swp *~ CreateOnDisk/src/CreateOnDiskPt -Makefile -Makefile.in -aclocal.m4 -autom4te.cache/ -config.h -config.log -config.guess -config.status -config.sub -configure -depcomp dist* -install-sh jam-files/bjam jam-files/engine/bootstrap jam-files/engine/bin.* -lm/.deps/ -lm/.libs/ -util/.deps/ -util/.libs/ lm/build_binary lm/query -libtool -mert/.deps/ -mert/Makefile -mert/Makefile.in mert/evaluator mert/extractor mert/mert mert/megam_i686.opt mert/pro -misc/.deps/ -mert/.libs/ -misc/Makefile -misc/Makefile.in misc/processLexicalTable misc/processPhraseTable misc/queryLexicalTable misc/queryPhraseTable -moses-chart/src/.deps/ moses-chart-cmd/src/moses_chart -moses-cmd/src/.deps/ -moses-cmd/src/Makefile -moses-cmd/src/Makefile.in moses-cmd/src/checkplf moses-cmd/src/lmbrgrid moses-cmd/src/moses -moses/src/.deps/ -moses/src/.libs/ -moses/src/Makefile -moses/src/Makefile.in regression-testing/moses-reg-test-data-* regression-testing/tests/mert.extractor-bin/FEATSTAT* regression-testing/tests/mert.extractor-bin/SCORESTAT* -scripts-2* scripts/ems/biconcor/biconcor scripts/release-exclude scripts/training/cmert-0.5/mert @@ -73,17 +38,6 @@ scripts/training/phrase-extract/consolidate scripts/training/phrase-extract/consolidate-direct scripts/training/phrase-extract/consolidate-reverse scripts/training/phrase-extract/extract -scripts/training/phrase-extract/extract-ghkm/config.guess -scripts/training/phrase-extract/extract-ghkm/config.h.in -scripts/training/phrase-extract/extract-ghkm/config.sub -scripts/training/phrase-extract/extract-ghkm/depcomp -scripts/training/phrase-extract/extract-ghkm/install-sh -scripts/training/phrase-extract/extract-ghkm/m4/libtool.m4 -scripts/training/phrase-extract/extract-ghkm/m4/ltoptions.m4 -scripts/training/phrase-extract/extract-ghkm/m4/ltsugar.m4 -scripts/training/phrase-extract/extract-ghkm/m4/ltversion.m4 -scripts/training/phrase-extract/extract-ghkm/m4/lt~obsolete.m4 -scripts/training/phrase-extract/extract-ghkm/missing scripts/training/phrase-extract/extract-ghkm/tools/extract-ghkm scripts/training/phrase-extract/extract-lex scripts/training/phrase-extract/extract-rules @@ -92,12 +46,5 @@ scripts/training/phrase-extract/score scripts/training/phrase-extract/statistics scripts/training/symal/symal scripts/training/train-model.perl -stamp-h1 -ltmain.sh -m4/libtool.m4 -m4/ltoptions.m4 -m4/ltsugar.m4 -m4/ltversion.m4 -m4/lt~obsolete.m4 dist bin diff --git a/BUILD-INSTRUCTIONS.txt b/BUILD-INSTRUCTIONS.txt index ad22c8b48..e5c0ad8ea 100644 --- a/BUILD-INSTRUCTIONS.txt +++ b/BUILD-INSTRUCTIONS.txt @@ -1,64 +1,109 @@ -0) Preliminaries +PRELIMINARIES -Before building you need to decide what language model toolkit (SRI's, -IRST's, or Ken's) you want to use. +Moses is primarily targeted at gcc on UNIX. -If you want to use SRI's, you will need to download its source -and build it. The SRILM can be downloaded from +Moses requires gcc, Boost >= 1.36, and zlib including the headers that some +distributions package separately (i.e. -dev or -devel packages). Source is +available at http://boost.org . + +There are several optional dependencies: + +GIZA++ from http://code.google.com/p/giza-pp/ is used to build phrase tables. + +Moses server requires xmlrpc-c with abyss-server. Source is available from +http://xmlrpc-c.sourceforge.net/. + +The scripts support building ARPA format language models with SRILM or IRSTLM. +To apply models inside the decoder, you can use SRILM, IRSTLM, or KenLM. The +ARPA format is exchangable so that e.g. you can build a model with SRILM and +run the decoder with IRSTLM or KenLM. + +If you want to use SRILM, you will need to download its source and build it. +The SRILM can be downloaded from http://www.speech.sri.com/projects/srilm/download.html . +On x86_64, the default machine type is broken. Edit sbin/machine-type, find +this code + else if (`uname -m` == x86_64) then + set MACHINE_TYPE = i686 +and change it to + else if (`uname -m` == x86_64) then + set MACHINE_TYPE = i686-m64 +You may have to chmod +w sbin/machine-type first. - -If you want to use IRST's, you will need to download its source and -build it. The IRSTLM can be downloaded from either the SourceForge -website +If you want to use IRSTLM, you will need to download its source and build it. +The IRSTLM can be downloaded from either the SourceForge website http://sourceforge.net/projects/irstlm or the official IRSTLM website http://hlt.fbk.eu/en/irstlm - -Ken's LM is included with the Moses distribution. +KenLM is included with Moses. -------------------------------------------------------------------------- -1) Instructions for building with SRILM +ADVICE ON INSTALLING EXTERNAL LIBRARIES -Build SRILM according to their release instructions. Make sure that -you DO NOT override the MACHINE_TYPE variable on the command line when -you do so, as this can lead to problems locating the library. +Generally, for trouble installing external libraries, you should get support +directly from the library maker: - ./bjam [--with-srilm=/path/to/srilm] +Boost: http://www.boost.org/doc/libs/1_48_0/more/getting_started/unix-variants.html +IRSTLM: https://list.fbk.eu/sympa/subscribe/user-irstlm +SRILM: http://www.speech.sri.com/projects/srilm/#srilm-user + +However, here's some general advice on installing software (for bash users): + +#Determine where you want to install packages +PREFIX=$HOME/usr +#If your system has lib64 directories, lib64 should be used AND NOT lib +if [ -d /lib64 ]; then + LIBDIR=$PREFIX/lib64 +else + LIBDIR=$PREFIX/lib +fi +#If you're installing to a non-standard path, tell programs where to find things: +export PATH=$PREFIX/bin${PATH:+:$PATH} +export LD_LIBRARY_PATH=$LIBDIR${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} +export LIBRARY_PATH=$LIBDIR${LIBRARY_PATH:+:$LIBRARY_PATH} +export CPATH=$PREFIX/include${CPATH:+:$CPATH} + +Add all the above code to your .bashrc or .bash_login as appropriate. Then +you're ready to install packages in non-standard paths: + +#For autotools packages e.g. xmlrpc-c +./configure --prefix=$PREFIX --libdir=$PREFIX/lib64 [other options here] + +#For Boost: +./bootstrap.sh +./b2 --prefix=$PREFIX --libdir=$PREFIX/lib64 link=static,shared threading=multi install -------------------------------------------------------------------------- -2) Instructions for building with IRSTLM +BUILDING -Build IRSTLM according to its release instructions. +Building consists of running + ./bjam [options] - ./bjam [--with-irstlm=/path/to/irstlm] +Common options are: +--with-srilm=/path/to/srilm to compile the decoder with SRILM support +--with-irstlm=/path/to/irstlm to compile the decoder with IRSTLM support +--with-giza=/path/to/giza to enable training scripts +-jN where N is the number of CPUs + +Binaries will appear in dist/bin. + +For further documentation, run + ./bjam --help -------------------------------------------------------------------------- -3) Instructions for building with Ken's LM - - ./bjam - --------------------------------------------------------------------------- - - ALTERNATIVE WAYS TO BUILD ON UNIX AND OTHER PLATFORMS Microsoft Windows ----------------- -Tested on 32-bit Windows XP and Vista using Visual Studio 2005. -Again, refer to the old manual - http://homepages.inf.ed.ac.uk/s0565741/papers/developers-manual.pdf -The Windows build doesn't use the SRI or IRST language model libraries as they can't be compiled -under Windows using Visual Studio. Instead, an internal language model, which behave like SRILM is used, -however, it can only handle up to trigrams. +Moses is primarily targeted at gcc on UNIX. Windows users should consult +http://ssli.ee.washington.edu/people/amittai/Moses-on-Win7.pdf . Binaries for all external libraries needed can be downloaded from http://www.statmt.org/moses/?n=Moses.LibrariesUsed -Only the decoder is developed and tested under Windows. There are difficulties using the training scripts under Windows, even with Cygwin. - - +Only the decoder is developed and tested under Windows. There are difficulties +using the training scripts under Windows, even with Cygwin. diff --git a/CreateOnDisk/src/Makefile.am b/CreateOnDisk/src/Makefile.am deleted file mode 100644 index f8c99a741..000000000 --- a/CreateOnDisk/src/Makefile.am +++ /dev/null @@ -1,6 +0,0 @@ -bin_PROGRAMS = CreateOnDiskPt -CreateOnDiskPt_SOURCES = Main.cpp -AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS) - -CreateOnDiskPt_LDADD = -L$(top_srcdir)/OnDiskPt/src -L$(top_srcdir)/moses/src -lOnDiskPt -lmoses $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS) -CreateOnDiskPt_DEPENDENCIES = $(top_srcdir)/OnDiskPt/src/libOnDiskPt.a $(top_srcdir)/moses/src/libmoses.la diff --git a/Jamroot b/Jamroot index 754b9400a..89ff30cb9 100644 --- a/Jamroot +++ b/Jamroot @@ -59,12 +59,12 @@ path-constant TOP : . ; # Shell with trailing line removed http://lists.boost.org/boost-build/2007/08/17051.php -rule trim-nl ( str ) { +rule trim-nl ( str extras * ) { return [ MATCH "([^ -]*)" : $(str) ] ; +]*)" : $(str) ] $(extras) ; } -rule _shell ( cmd ) { - return [ trim-nl [ SHELL $(cmd) ] ] ; +rule _shell ( cmd : extras * ) { + return [ trim-nl [ SHELL $(cmd) : $(extras) ] ] ; } import option ; @@ -110,7 +110,7 @@ if $(boost-version) < 103600 { exit You have Boost $(boost-version). Moses requires at least 103600 (and preferably newer). : 1 ; } #Are we linking static binaries against shared boost? -boost-auto-shared = [ auto_shared "boost_program_options" : L-boost-search ] ; +boost-auto-shared = [ auto_shared "boost_program_options" : $(L-boost-search) ] ; #Convenience rule for boost libraries. Defines library boost_$(name). rule boost_lib ( name macro ) { #Link multi-threaded programs against the -mt version if available. Old @@ -137,14 +137,20 @@ rule external_lib ( name ) { external_lib z ; +requirements = ; + #libSegFault prints a stack trace on segfault. Link against it if available. if [ test_flags "-lSegfault" ] { external_lib SegFault ; - segfault = SegFault ; + requirements += SegFault ; } -trace = [ option.get "notrace" : TRACE_ENABLE=1 ] ; -boost-pool = [ option.get "enable-boost-pool" : : "USE_BOOST_POOL" ] ; +requirements += [ option.get "notrace" : TRACE_ENABLE=1 ] ; +requirements += [ option.get "enable-boost-pool" : : USE_BOOST_POOL ] ; + +if [ option.get "with-irstlm" ] { + requirements += single ; +} import os ; @@ -164,9 +170,7 @@ project : requirements multi:WITH_THREADS multi:boost_thread _FILE_OFFSET_BITS=64 _LARGE_FILES - $(segfault) - $(trace) - $(boost-pool) + $(requirements) $(cxxflags) $(cflags) $(ldflags) diff --git a/Makefile.am b/Makefile.am deleted file mode 100644 index e117c10f5..000000000 --- a/Makefile.am +++ /dev/null @@ -1,14 +0,0 @@ -# not a GNU package. You can remove this line, if -# have all needed files, that a GNU package needs -AUTOMAKE_OPTIONS = foreign - -ACLOCAL_AMFLAGS = -I m4 - -# order is important here: build moses before moses-cmd -if WITH_MERT - MERT = mert -endif -if WITH_SERVER - SERVER = contrib/server -endif -SUBDIRS = util lm moses/src OnDiskPt/src moses-cmd/src misc moses-chart-cmd/src CreateOnDisk/src $(MERT) $(SERVER) diff --git a/OnDiskPt/src/Makefile.am b/OnDiskPt/src/Makefile.am deleted file mode 100644 index 7070e372a..000000000 --- a/OnDiskPt/src/Makefile.am +++ /dev/null @@ -1,14 +0,0 @@ -lib_LIBRARIES = libOnDiskPt.a -AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS) -libOnDiskPt_a_SOURCES = \ - OnDiskWrapper.cpp \ - SourcePhrase.cpp \ - TargetPhrase.cpp \ - Word.cpp \ - Phrase.cpp \ - PhraseNode.cpp \ - TargetPhraseCollection.cpp \ - Vocab.cpp - - - diff --git a/config.h.in b/config.h.in deleted file mode 100644 index 444a2e218..000000000 --- a/config.h.in +++ /dev/null @@ -1,116 +0,0 @@ -/* config.h.in. Generated from configure.in by autoheader. */ - -/* Defined if the requested minimum BOOST version is satisfied */ -#undef HAVE_BOOST - -/* Define to 1 if you have */ -#undef HAVE_BOOST_PROGRAM_OPTIONS_HPP - -/* Define to 1 if you have */ -#undef HAVE_BOOST_SCOPED_PTR_HPP - -/* Define to 1 if you have */ -#undef HAVE_BOOST_SHARED_PTR_HPP - -/* Define to 1 if you have */ -#undef HAVE_BOOST_THREAD_HPP - -/* Define to 1 if you have the header file. */ -#undef HAVE_DLFCN_H - -/* flag for DMapLM */ -#undef HAVE_DMAPLM - -/* Define to 1 if you have the header file. */ -#undef HAVE_GETOPT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_INTTYPES_H - -/* flag for IRSTLM */ -#undef HAVE_IRSTLM - -/* Define to 1 if you have the `oolm' library (-loolm). */ -#undef HAVE_LIBOOLM - -/* Define to 1 if you have the `tcmalloc' library (-ltcmalloc). */ -#undef HAVE_LIBTCMALLOC - -/* Define to 1 if you have the header file. */ -#undef HAVE_MEMORY_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_NL_CPT_H - -/* flag for ORLM */ -#undef HAVE_ORLM - -/* flag for protobuf */ -#undef HAVE_PROTOBUF - -/* flag for RandLM */ -#undef HAVE_RANDLM - -/* flag for SRILM */ -#undef HAVE_SRILM - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDINT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDLIB_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRINGS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRING_H - -/* flag for Syntactic Parser */ -#undef HAVE_SYNLM - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_STAT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_TYPES_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_UNISTD_H - -/* flag for zlib */ -#undef HAVE_ZLIB - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#undef LT_OBJDIR - -/* Name of package */ -#undef PACKAGE - -/* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#undef PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#undef PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME - -/* Define to the home page for this package. */ -#undef PACKAGE_URL - -/* Define to the version of this package. */ -#undef PACKAGE_VERSION - -/* Define to 1 if you have the ANSI C header files. */ -#undef STDC_HEADERS - -/* Flag to enable use of Boost pool */ -#undef USE_BOOST_POOL - -/* Version number of package */ -#undef VERSION diff --git a/configure.in b/configure.in deleted file mode 100644 index 1acfee439..000000000 --- a/configure.in +++ /dev/null @@ -1,332 +0,0 @@ -AC_INIT(moses/src) - -AM_CONFIG_HEADER(config.h) -AM_INIT_AUTOMAKE(moses, 0.1) - -AC_CONFIG_MACRO_DIR([m4]) - -AC_PROG_CXX -AC_PROG_CXXCPP -AC_LANG_CPLUSPLUS - -AC_DISABLE_SHARED -AC_PROG_LIBTOOL -# Shared library are disabled for default -#LT_INIT([disable-shared]) - -AX_XMLRPC_C -BOOST_REQUIRE([1.36.0]) -BOOST_SMART_PTR -BOOST_PROGRAM_OPTIONS - -AC_ARG_WITH(protobuf, - [AC_HELP_STRING([--with-protobuf=PATH], [(optional) path to Google protobuf])], - [with_protobuf=$withval], - [with_protobuf=no] - ) - -AC_ARG_WITH(srilm, - [AC_HELP_STRING([--with-srilm=PATH], [(optional) path to SRI's LM toolkit])], - [with_srilm=$withval], - [with_srilm=no] - ) - -AC_ARG_WITH(srilm-dynamic, - [AC_HELP_STRING([--with-srilm-dynamic], [(optional) link dynamically with srilm])], - [with_srilm_dynamic=yes], - [with_srilm_dynamic=no] - ) - -AC_ARG_WITH(srilm-arch, - [AC_HELP_STRING([--with-srilm-arch=ARCH], [(optional) architecture for which SRILM was built])], - [with_srilm_arch=$withval], - [with_srilm_arch=no] - ) - - -AC_ARG_WITH(irstlm, - [AC_HELP_STRING([--with-irstlm=PATH], [(optional) path to IRST's LM toolkit])], - [with_irstlm=$withval], - [with_irstlm=no] - ) - -AC_ARG_WITH(randlm, - [AC_HELP_STRING([--with-randlm=PATH], [(optional) path to RandLM toolkit])], - [with_randlm=$withval], - [with_randlm=no] - ) -AC_ARG_WITH(orlm, - [AC_HELP_STRING([--with-orlm=PATH], [(optional) path to ORLM])], - [with_orlm=$withval], - [with_orlm=no] - ) -AC_ARG_WITH(dmaplm, - [AC_HELP_STRING([--with-dmaplm=PATH], [(optional) path to DMapLM])], - [with_dmaplm=$withval], - [with_dmaplm=no] - ) - -AC_ARG_WITH(synlm, - [AC_HELP_STRING([--with-synlm], [(optional) Include syntactic language model parser; default is no])], - [with_synlm=$withval], - [with_synlm=no] - ) - -AC_ARG_WITH(notrace, - [AC_HELP_STRING([--notrace], [disable trace])], - [without_trace=yes], - ) - - - -AC_ARG_ENABLE(profiling, - [AC_HELP_STRING([--enable-profiling], [moses will dump profiling info])], - [CPPFLAGS="$CPPFLAGS -pg"; LDFLAGS="$LDFLAGS -pg" ] - ) - -AC_ARG_ENABLE(optimization, - [AC_HELP_STRING([--enable-optimization], [compile with -O3 flag])], - [CPPFLAGS="$CPPFLAGS -O3"; LDFLAGS="$LDFLAGS -O3" ] - ) - -AC_ARG_ENABLE(threads, - [AC_HELP_STRING([--enable-threads], [compile threadsafe library and multi-threaded moses (mosesmt)])], - [], - [enable_threads=no] - ) - -AC_ARG_WITH(zlib, - [AC_HELP_STRING([--with-zlib=PATH], [(optional) path to zlib])], - [with_zlib=$withval], - [with_zlib=no] - ) - -AC_ARG_WITH(tcmalloc, - [AC_HELP_STRING([--with-tcmalloc], [(optional) link with tcmalloc; default is no])], - [with_tcmalloc=$withval], - [with_tcmalloc=no] - ) - -AC_ARG_ENABLE(boost-pool, - [AC_HELP_STRING([--enable-boost-pool], [(optional) try to improve speed by selectively using Boost pool allocation (may increase total memory use); default is yes if Boost enabled])], - [enable_boost_pool=yes], - [enable_boost_pool=no] - ) - - -AM_CONDITIONAL([INTERNAL_LM], false) -AM_CONDITIONAL([SRI_LM], false) -AM_CONDITIONAL([IRST_LM], false) -AM_CONDITIONAL([KEN_LM], false) -AM_CONDITIONAL([RAND_LM], false) -AM_CONDITIONAL([ORLM_LM], false) -AM_CONDITIONAL([DMAP_LM], false) -AM_CONDITIONAL([SYN_LM], false) -AM_CONDITIONAL([PROTOBUF], false) -AM_CONDITIONAL([am__fastdepCC], false) -AM_CONDITIONAL([WITH_THREADS],false) - - -if test "x$without_trace" = 'xyes' -then - AC_MSG_NOTICE([trace disabled, most regression test will fail]) -else - AC_MSG_NOTICE([trace enabled (default)]) - CPPFLAGS="$CPPFLAGS -DTRACE_ENABLE=1" -fi - -if test "x$enable_threads" = 'xyes' -then - AC_MSG_NOTICE([Building threaded moses]) - BOOST_THREADS - CPPFLAGS="$CPPFLAGS -DWITH_THREADS" - AM_CONDITIONAL([WITH_THREADS],true) -else - AC_MSG_NOTICE([Building non-threaded moses. This will disable the moses server]) -fi - -if test "x$with_protobuf" != 'xno' -then - SAVE_CPPFLAGS="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I${with_protobuf}/include" - - AC_CHECK_HEADER(google/protobuf/message.h, - [AC_DEFINE([HAVE_PROTOBUF], [], [flag for protobuf])], - [AC_MSG_ERROR([Cannot find protobuf!])]) - - LIB_PROTOBUF="-lprotobuf" - LDFLAGS="$LDFLAGS -L${with_protobuf}/lib" - LIBS="$LIBS $LIB_PROTOBUF" - AC_PATH_PROG(PROTOC,protoc,,"${PATH}:${with_protobuf}/bin") - FMTLIBS="$FMTLIBS libprotobuf.a" - AM_CONDITIONAL([PROTOBUF], true) -fi - -if test "x$with_srilm" != 'xno' -then - SAVE_CPPFLAGS="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I${with_srilm}/include" - - AC_CHECK_HEADER(Ngram.h, - [AC_DEFINE([HAVE_SRILM], [], [flag for SRILM])], - [AC_MSG_ERROR([Cannot find SRILM!])]) - - if test "x$with_srilm_dynamic" != 'xyes' - then - LIB_SRILM="-loolm -ldstruct -lmisc -lflm" - # ROOT/lib/i686-m64/liboolm.a - # ROOT/lib/i686-m64/libdstruct.a - # ROOT/lib/i686-m64/libmisc.a - if test "x$with_srilm_arch" != 'xno' - then - MY_ARCH=${with_srilm_arch} - else - MY_ARCH=`${with_srilm}/sbin/machine-type` - fi - LDFLAGS="$LDFLAGS -L${with_srilm}/lib/${MY_ARCH} -L${with_srilm}/flm/obj/${MY_ARCH}" - LIBS="$LIBS $LIB_SRILM" - FMTLIBS="$FMTLIBS liboolm.a libdstruct.a libmisc.a" - else - LDFLAGS="$LDFLAGS -L${with_srilm}/lib" - LIBS="$LIBS -lsrilm" - fi - AC_CHECK_LIB([oolm], [trigram_init], [], [AC_MSG_ERROR([Cannot find SRILM's library in ${with_srilm}/lib/${MY_ARCH} ])]) - AM_CONDITIONAL([SRI_LM], true) -fi - -if test "x$with_irstlm" != 'xno' -then - SAVE_CPPFLAGS="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I${with_irstlm}/include" - - - AC_MSG_NOTICE([]) - AC_MSG_NOTICE([!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!]) - AC_MSG_NOTICE([!!! You are linking the IRSTLM library; be sure the release is >= 5.70.02 !!!]) - AC_MSG_NOTICE([!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!]) - AC_MSG_NOTICE([]) - - - AC_CHECK_HEADER(n_gram.h, - [AC_DEFINE([HAVE_IRSTLM], [], [flag for IRSTLM])], - [AC_MSG_ERROR([Cannot find IRST-LM in ${with_irstlm}])]) - - MY_ARCH=`uname -m` - LIB_IRSTLM="-lirstlm" - LDFLAGS="$LDFLAGS -L${with_irstlm}/lib" - LIBS="$LIBS $LIB_IRSTLM" - FMTLIBS="$FMTLIBS libirstlm.a" - AM_CONDITIONAL([IRST_LM], true) -fi - -CPPFLAGS="$CPPFLAGS -I\$(top_srcdir)" -#LDFLAGS="$LDFLAGS -L\$(top_srcdir)/util -lkenutil -L\$(top_srcdir)/lm -lkenlm -lz" -#KENUTIL_DEPS="\$(top_srcdir)/util/libkenutil.la" -#KENLM_DEPS="\$(top_srcdir)/lm/libkenlm.la" -#FMTLIBS="$FMTLIBS libkenutil.la libkenlm.la" -#AC_SUBST(KENUTIL_DEPS) -#AC_SUBST(KENLM_DEPS) - -if test "x$with_randlm" != 'xno' -then - SAVE_CPPFLAGS="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I${with_randlm}/include" - - AC_CHECK_HEADER(RandLM.h, - [AC_DEFINE([HAVE_RANDLM], [], [flag for RandLM])], - [AC_MSG_ERROR([Cannot find RandLM!])]) - - - MY_ARCH=`uname -m` - LIB_RANDLM="-lrandlm" - LDFLAGS="$LDFLAGS -L${with_randlm}/lib" - LIBS="$LIBS $LIB_RANDLM" - FMTLIBS="$FMTLIBS librandlm.a" - AM_CONDITIONAL([RAND_LM], true) -fi - -if test "x$with_dmaplm" != 'xno' -then - SAVE_CPPFLAGS="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I${with_dmaplm}/src/DMap" - - AC_CHECK_HEADER(StructLanguageModel.h, - [AC_DEFINE([HAVE_DMAPLM], [], [flag for DMapLM])], - [AC_MSG_ERROR([Cannot find DMapLM!])]) - - LDFLAGS="$LDFLAGS -L${with_dmaplm}/src/DMap" - LIBS="$LIBS -lDMap" - FMTLIBS="FMTLIBS libdmap.la" - AM_CONDITIONAL([DMAP_LM], true) -fi - -if test "x$with_orlm" != 'xno' -then - SAVE_CPPFLAGS="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I${with_orlm}/" - - AC_CHECK_HEADER(onlineRLM.h, -#AC_CHECK_HEADER(multiOnlineRLM.h, - [AC_DEFINE([HAVE_ORLM], [], [flag for ORLM])], - [AC_MSG_ERROR([Cannot find ORLM!])]) - - MY_ARCH=`uname -m` - AM_CONDITIONAL([ORLM_LM], true) -fi -if test "x$with_tcmalloc" != 'xno' -then - AC_CHECK_LIB([tcmalloc], [malloc], [], [AC_MSG_ERROR([Cannot find tcmalloc])]) -fi - - -if test "x$enable_boost_pool" != 'xno' -then - AC_CHECK_HEADER(boost/pool/object_pool.hpp, - [AC_DEFINE([USE_BOOST_POOL], [], [Flag to enable use of Boost pool])], - [AC_MSG_WARN([Cannot find boost/pool/object_pool.hpp])] - ) -fi - -if test "x$with_synlm" != 'xno' -then - SAVE_CPPFLAGS="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS -I${PWD}/synlm/hhmm/rvtl/include -I${PWD}/synlm/hhmm/wsjparse/include -lm" - - AC_CHECK_HEADERS(nl-cpt.h, - [AC_DEFINE([HAVE_SYNLM], [], [flag for Syntactic Parser])], - [AC_MSG_ERROR([Cannot find SYNLM in ${PWD}/synlm/hhmm])]) - - AM_CONDITIONAL([SYN_LM], true) - -fi - - -AM_CONDITIONAL([WITH_MERT],false) -AC_CHECK_HEADERS([getopt.h], - [AM_CONDITIONAL([WITH_MERT],true)], - [AC_MSG_WARN([Cannot find getopt.h - disabling new mert])]) - -AM_CONDITIONAL([WITH_SERVER],false) -if test "x$have_xmlrpc_c" = "xyes" && test "x$enable_threads" = "xyes"; then - AM_CONDITIONAL([WITH_SERVER],true) -else - AC_MSG_NOTICE([Disabling server]) -fi - -if test "x$with_zlib" != 'xno' -then - CPPFLAGS="$CPPFLAGS -I${with_zlib}/include" - LDFLAGS="$LDFLAGS -L${with_zlib}/lib" -fi - -# zlib is always required (see ./moses/src/gzfilebuf.h) -# TODO: This shouldn't be presented to the user as a config option if it isn't actually an option -AC_CHECK_HEADER(zlib.h, - [AC_DEFINE([HAVE_ZLIB], [], [flag for zlib])], - [AC_MSG_ERROR([Cannot find zlib.h. Please install it. For Debian, try 'sudo aptitude install zlib1g-dev'])]) -LIBS="$LIBS -lz" - - -AC_CONFIG_FILES(Makefile OnDiskPt/src/Makefile moses/src/Makefile moses-cmd/src/Makefile moses-chart-cmd/src/Makefile misc/Makefile mert/Makefile contrib/server/Makefile CreateOnDisk/src/Makefile util/Makefile lm/Makefile) - -AC_OUTPUT() diff --git a/contrib/server/Jamfile b/contrib/server/Jamfile index 2978dc4d6..9f6d223cf 100644 --- a/contrib/server/Jamfile +++ b/contrib/server/Jamfile @@ -1,22 +1,39 @@ -#If you get compilation errors here, make sure you have xmlrpc-c installed properly. . . +#If you get compilation errors here, make sure you have xmlrpc-c installed properly, including the abyss server option. import option ; +import path ; with-xmlrpc-c = [ option.get "with-xmlrpc-c" ] ; if $(with-xmlrpc-c) { build-moses-server = true ; - shell-prefix = $(with-xmlrpc-c)/bin/ ; -} else { - if [ SHELL $(TOP)"/jam-files/test.sh -include xmlrpc-c/base.hpp -lxmlrpc_server_abyss++" ] = 0 { - build-moses-server = true ; + xmlrpc-command = $(with-xmlrpc-c)/bin/xmlrpc-c-config ; + if ! [ path.exists $(xmlrpc-command) ] { + exit Could not find $(xmlrpc-command) : 1 ; } - shell-prefix = "" ; +} else { + xmlrpc-check = [ _shell "xmlrpc-c-config --features 2>/dev/null" : exit-status ] ; + if $(xmlrpc-check[2]) = 0 { + if [ MATCH "(abyss-server)" : $(xmlrpc-check[1]) ] { + build-moses-server = true ; + } else { + echo "Found xmlrpc-c but it does not have abyss-server. Skipping mosesserver." ; + } + } + xmlrpc-command = "xmlrpc-c-config" ; +} + +rule shell_or_die ( cmd ) { + local ret = [ _shell $(cmd) : exit-status ] ; + if $(ret[2]) != 0 { + exit "Failed to run $(cmd)" : 1 ; + } + return $(ret[1]) ; } if $(build-moses-server) = true { - xmlrpc-linkflags = [ _shell "$(shell-prefix)xmlrpc-c-config c++2 abyss-server --libs" ] ; - xmlrpc-cxxflags = [ _shell "$(shell-prefix)xmlrpc-c-config c++2 abyss-server --cflags" ] ; + xmlrpc-linkflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --libs" ] ; + xmlrpc-cxxflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --cflags" ] ; exe mosesserver : mosesserver.cpp ../../moses/src//moses ../../OnDiskPt/src//OnDiskPt : $(xmlrpc-linkflags) $(xmlrpc-cxxflags) ; } else { diff --git a/contrib/server/Makefile.am b/contrib/server/Makefile.am deleted file mode 100644 index 925a8c40f..000000000 --- a/contrib/server/Makefile.am +++ /dev/null @@ -1,5 +0,0 @@ -bin_PROGRAMS = mosesserver -mosesserver_SOURCES = mosesserver.cpp -mosesserver_CPPFLAGS = -W -Wall -I$(top_srcdir)/moses/src $(XMLRPC_C_CPPFLAGS) $(BOOST_CPPFLAGS) -mosesserver_LDADD = -L$(top_srcdir)/moses/src -lmoses -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(XMLRPC_C_LIBS) $(BOOST_THREAD_LIBS) -mosesserver_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.la $(top_srcdir)/OnDiskPt/src/libOnDiskPt.a diff --git a/cruise-control/test_all_new_commits.sh b/cruise-control/test_all_new_commits.sh index e0e1f5ee8..c0039c7eb 100755 --- a/cruise-control/test_all_new_commits.sh +++ b/cruise-control/test_all_new_commits.sh @@ -86,15 +86,7 @@ function run_single_test () { err="" - echo "## ./bjam clean" >> $longlog - ./bjam clean >> $longlog 2>&1 || warn "bjam clean failed, suspicious" - - echo "## ./bjam $MCC_CONFIGURE_ARGS" >> $longlog - if [ -z "$err" ]; then - ./bjam $MCC_CONFIGURE_ARGS >> $longlog 2>&1 || err="bjam" - fi - - cd regression-testing + cd regression-testing regtest_file=$(echo "$REGTEST_ARCHIVE" | sed 's/^.*\///') # download data for regression tests if necessary @@ -104,15 +96,22 @@ function run_single_test () { tar xzf $regtest_file touch $regtest_file.ok fi + regtest_dir=$PWD/$(basename $regtest_file .tgz) + cd .. + + echo "## ./bjam clean" >> $longlog + ./bjam clean $MCC_CONFIGURE_ARGS --with-regtest=$regtest_dir >> $longlog 2>&1 || warn "bjam clean failed, suspicious" + + echo "## ./bjam $MCC_CONFIGURE_ARGS" >> $longlog + if [ -z "$err" ]; then + ./bjam $MCC_CONFIGURE_ARGS >> $longlog 2>&1 || err="bjam" + fi + echo "## regression tests" >> $longlog if [ -z "$err" ]; then - ./run-test-suite.perl &>> $longlog - regtest_status=$? - [ $regtest_status -eq 1 ] && die "Failed to run regression tests" - [ $regtest_status -eq 2 ] && err="regression tests" + ./bjam $MCC_CONFIGURE_ARGS --with-regtest=$regtest_dir >> $longlog 2>&1 || err="regression tests" fi - cd .. if [ -z "$err" ] && [ "$MCC_RUN_EMS" = "yes" ]; then echo "## EMS" >> $longlog diff --git a/lm/Makefile.am b/lm/Makefile.am deleted file mode 100644 index f208f3223..000000000 --- a/lm/Makefile.am +++ /dev/null @@ -1,25 +0,0 @@ -lib_LTLIBRARIES = libkenlm.la -bin_PROGRAMS = query build_binary - -AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS) -libkenlm_la_SOURCES = \ - bhiksha.cc \ - binary_format.cc \ - config.cc \ - lm_exception.cc \ - model.cc \ - search_hashed.cc \ - search_trie.cc \ - quantize.cc \ - read_arpa.cc \ - trie.cc \ - trie_sort.cc \ - virtual_interface.cc \ - vocab.cc - -query_SOURCES = ngram_query.cc -query_LDADD = libkenlm.la $(top_srcdir)/util/libkenutil.la - -build_binary_SOURCES = build_binary.cc -build_binary_LDADD = libkenlm.la $(top_srcdir)/util/libkenutil.la - diff --git a/lm/build_binary.cc b/lm/build_binary.cc index 827e5efb0..e235cc5a3 100644 --- a/lm/build_binary.cc +++ b/lm/build_binary.cc @@ -160,44 +160,45 @@ int main(int argc, char *argv[]) { } if (optind + 1 == argc) { ShowSizes(argv[optind], config); - } else if (optind + 2 == argc) { + return 0; + } + const char *model_type, *from_file; + if (optind + 2 == argc) { + model_type = "probing"; + from_file = argv[optind]; config.write_mmap = argv[optind + 1]; - if (quantize || set_backoff_bits) ProbingQuantizationUnsupported(); - ProbingModel(argv[optind], config); } else if (optind + 3 == argc) { - const char *model_type = argv[optind]; - const char *from_file = argv[optind + 1]; + model_type = argv[optind]; + from_file = argv[optind + 1]; config.write_mmap = argv[optind + 2]; - if (!strcmp(model_type, "probing")) { - if (quantize || set_backoff_bits) ProbingQuantizationUnsupported(); - ProbingModel(from_file, config); - } else if (!strcmp(model_type, "trie")) { - if (quantize) { - if (bhiksha) { - QuantArrayTrieModel(from_file, config); - } else { - QuantTrieModel(from_file, config); - } + } else { + Usage(argv[0]); + } + if (!strcmp(model_type, "probing")) { + if (quantize || set_backoff_bits) ProbingQuantizationUnsupported(); + ProbingModel(from_file, config); + } else if (!strcmp(model_type, "trie")) { + if (quantize) { + if (bhiksha) { + QuantArrayTrieModel(from_file, config); } else { - if (bhiksha) { - ArrayTrieModel(from_file, config); - } else { - TrieModel(from_file, config); - } + QuantTrieModel(from_file, config); } } else { - Usage(argv[0]); + if (bhiksha) { + ArrayTrieModel(from_file, config); + } else { + TrieModel(from_file, config); + } } } else { Usage(argv[0]); } - } - catch (const std::exception &e) { + std::cerr << "Built " << config.write_mmap << " successfully." << std::endl; + } catch (const std::exception &e) { std::cerr << e.what() << std::endl; - std::cerr << "ERROR" << std::endl; return 1; } - std::cerr << "SUCCESS" << std::endl; return 0; } diff --git a/lm/vocab.cc b/lm/vocab.cc index 3fefe6b13..c10743ceb 100644 --- a/lm/vocab.cc +++ b/lm/vocab.cc @@ -229,7 +229,7 @@ void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialW if (config.messages) *config.messages << "Missing special word " << str << "; will treat it as ."; break; case THROW_UP: - UTIL_THROW(SpecialWordMissingException, "The ARPA file is missing " << str << " and the model is configured to reject these models. Run build_binary -s to disable this check."); + UTIL_THROW(SpecialWordMissingException, "The ARPA file is missing " << str << " and the model is configured to reject these models. If you built your APRA with IRSTLM and forgot to run add-start-end.sh, complain to stating that you think build-lm.sh should do this by default, then go back and retrain your model from the start. To bypass this check and treat " << str << " as an OOV, pass -s. The resulting model will not work with e.g. Moses."); } } diff --git a/m4/ax_xmlrpc_c.m4 b/m4/ax_xmlrpc_c.m4 deleted file mode 100644 index a45760fff..000000000 --- a/m4/ax_xmlrpc_c.m4 +++ /dev/null @@ -1,52 +0,0 @@ -AC_DEFUN([AX_XMLRPC_C], [ - AC_MSG_CHECKING(for XMLRPC-C) - - AC_ARG_WITH(xmlrpc-c, - [ --with-xmlrpc-c=PATH Enable XMLRPC-C support. Setting the PATH to yes will search for xmlrpc-c-config on the shell PATH,], - [ - if test "$withval" = "no"; then - AC_MSG_RESULT(no) - - else - if test "$withval" = "yes"; then - xmlrpc_cc_prg="xmlrpc-c-config" - else - xmlrpc_cc_prg="$withval" - fi - - if eval $xmlrpc_cc_prg --version 2>/dev/null >/dev/null; then - XMLRPC_C_CPPFLAGS=`$xmlrpc_cc_prg --cflags c++2 abyss-server` - XMLRPC_C_LIBS=`$xmlrpc_cc_prg c++2 abyss-server --libs` - CXXFLAGS_SAVED=$CXXFLAGS - CXXFLAGS="$CXXFLAGS $XMLRPC_C_CPPFLAGS" - LIBS_SAVED=$LIBS - LIBS="$LIBS $XMLRPC_C_LIBS" - - AC_TRY_LINK( - [ #include - ],[ xmlrpc_registry_new(NULL); ], - [ - AC_MSG_RESULT(ok) - ], [ - AC_MSG_RESULT(failed) - AC_MSG_ERROR(Could not compile XMLRPC-C test.) - ]) - -dnl AC_DEFINE(HAVE_XMLRPC_C, 1, Support for XMLRPC-C.) - have_xmlrpc_c=yes - AC_SUBST(XMLRPC_C_LIBS) - AC_SUBST(XMLRPC_C_CPPFLAGS) - - LIBS=$LIBS_SAVED - CXXFLAGS=$CXXFLAGS_SAVED - - else - AC_MSG_RESULT(failed) - AC_MSG_ERROR(Could not compile XMLRPC-C test.) - fi - fi - - ],[ - AC_MSG_RESULT(ignored) - ]) -]) diff --git a/m4/boost.m4 b/m4/boost.m4 deleted file mode 100644 index 7e0ed075f..000000000 --- a/m4/boost.m4 +++ /dev/null @@ -1,1035 +0,0 @@ -# boost.m4: Locate Boost headers and libraries for autoconf-based projects. -# Copyright (C) 2007, 2008, 2009 Benoit Sigoure -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Additional permission under section 7 of the GNU General Public -# License, version 3 ("GPLv3"): -# -# If you convey this file as part of a work that contains a -# configuration script generated by Autoconf, you may do so under -# terms of your choice. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -m4_define([_BOOST_SERIAL], [m4_translit([ -# serial 12 -], [# -], [])]) - -# Original sources can be found at http://github.com/tsuna/boost.m4 -# You can fetch the latest version of the script by doing: -# wget http://github.com/tsuna/boost.m4/raw/master/build-aux/boost.m4 - -# ------ # -# README # -# ------ # - -# This file provides several macros to use the various Boost libraries. -# The first macro is BOOST_REQUIRE. It will simply check if it's possible to -# find the Boost headers of a given (optional) minimum version and it will -# define BOOST_CPPFLAGS accordingly. It will add an option --with-boost to -# your configure so that users can specify non standard locations. -# If the user's environment contains BOOST_ROOT and --with-boost was not -# specified, --with-boost=$BOOST_ROOT is implicitly used. -# For more README and documentation, go to http://github.com/tsuna/boost.m4 -# Note: THESE MACROS ASSUME THAT YOU USE LIBTOOL. If you don't, don't worry, -# simply read the README, it will show you what to do step by step. - -m4_pattern_forbid([^_?BOOST_]) - - -# _BOOST_SED_CPP(SED-PROGRAM, PROGRAM, -# [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) -# -------------------------------------------------------- -# Same as AC_EGREP_CPP, but leave the result in conftest.i. -# PATTERN is *not* overquoted, as in AC_EGREP_CPP. It could be useful -# to turn this into a macro which extracts the value of any macro. -m4_define([_BOOST_SED_CPP], -[AC_LANG_PREPROC_REQUIRE()dnl -AC_REQUIRE([AC_PROG_SED])dnl -AC_LANG_CONFTEST([AC_LANG_SOURCE([[$2]])]) -AS_IF([dnl eval is necessary to expand ac_cpp. -dnl Ultrix and Pyramid sh refuse to redirect output of eval, so use subshell. -dnl Beware of Windows end-of-lines, for instance if we are running -dnl some Windows programs under Wine. In that case, boost/version.hpp -dnl is certainly using "\r\n", but the regular Unix shell will only -dnl strip `\n' with backquotes, not the `\r'. This results in -dnl boost_cv_lib_version='1_37\r' for instance, which breaks -dnl everything else. -dnl Cannot use 'dnl' after [$4] because a trailing dnl may break AC_CACHE_CHECK -(eval "$ac_cpp conftest.$ac_ext") 2>&AS_MESSAGE_LOG_FD | - tr -d '\r' | - $SED -n -e "$1" >conftest.i 2>&1], - [$3], - [$4]) -rm -rf conftest* -])# AC_EGREP_CPP - - - -# BOOST_REQUIRE([VERSION], [ACTION-IF-NOT-FOUND]) -# ----------------------------------------------- -# Look for Boost. If version is given, it must either be a literal of the form -# "X.Y.Z" where X, Y and Z are integers (the ".Z" part being optional) or a -# variable "$var". -# Defines the value BOOST_CPPFLAGS. This macro only checks for headers with -# the required version, it does not check for any of the Boost libraries. -# On # success, defines HAVE_BOOST. On failure, calls the optional -# ACTION-IF-NOT-FOUND action if one was supplied. -# Otherwise aborts with an error message. -AC_DEFUN([BOOST_REQUIRE], -[AC_REQUIRE([AC_PROG_CXX])dnl -AC_REQUIRE([AC_PROG_GREP])dnl -echo "$as_me: this is boost.m4[]_BOOST_SERIAL" >&AS_MESSAGE_LOG_FD -boost_save_IFS=$IFS -boost_version_req=$1 -IFS=. -set x $boost_version_req 0 0 0 -IFS=$boost_save_IFS -shift -boost_version_req=`expr "$[1]" '*' 100000 + "$[2]" '*' 100 + "$[3]"` -AC_ARG_WITH([boost], - [AS_HELP_STRING([--with-boost=DIR], - [prefix of Boost $1 @<:@guess@:>@])])dnl -AC_ARG_VAR([BOOST_ROOT],[Location of Boost installation])dnl -# If BOOST_ROOT is set and the user has not provided a value to -# --with-boost, then treat BOOST_ROOT as if it the user supplied it. -if test x"$BOOST_ROOT" != x; then - if test x"$with_boost" = x; then - AC_MSG_NOTICE([Detected BOOST_ROOT; continuing with --with-boost=$BOOST_ROOT]) - with_boost=$BOOST_ROOT - else - AC_MSG_NOTICE([Detected BOOST_ROOT=$BOOST_ROOT, but overridden by --with-boost=$with_boost]) - fi -fi -AC_SUBST([DISTCHECK_CONFIGURE_FLAGS], - ["$DISTCHECK_CONFIGURE_FLAGS '--with-boost=$with_boost'"]) -boost_save_CPPFLAGS=$CPPFLAGS - AC_CACHE_CHECK([for Boost headers version >= $boost_version_req], - [boost_cv_inc_path], - [boost_cv_inc_path=no -AC_LANG_PUSH([C++])dnl -m4_pattern_allow([^BOOST_VERSION$])dnl - AC_LANG_CONFTEST([AC_LANG_PROGRAM([[#include -#if !defined BOOST_VERSION -# error BOOST_VERSION is not defined -#elif BOOST_VERSION < $boost_version_req -# error Boost headers version < $boost_version_req -#endif -]])]) - # If the user provided a value to --with-boost, use it and only it. - case $with_boost in #( - ''|yes) set x '' /opt/local/include /usr/local/include /opt/include \ - /usr/include C:/Boost/include;; #( - *) set x "$with_boost/include" "$with_boost";; - esac - shift - for boost_dir - do - # Without --layout=system, Boost (or at least some versions) installs - # itself in /include/boost-. This inner loop helps to - # find headers in such directories. - # - # Any ${boost_dir}/boost-x_xx directories are searched in reverse version - # order followed by ${boost_dir}. The final '.' is a sentinel for - # searching $boost_dir" itself. Entries are whitespace separated. - # - # I didn't indent this loop on purpose (to avoid over-indented code) - boost_layout_system_search_list=`cd "$boost_dir" 2>/dev/null \ - && ls -1 | "${GREP}" '^boost-' | sort -rn -t- -k2 \ - && echo .` - for boost_inc in $boost_layout_system_search_list - do - if test x"$boost_inc" != x.; then - boost_inc="$boost_dir/$boost_inc" - else - boost_inc="$boost_dir" # Uses sentinel in boost_layout_system_search_list - fi - if test x"$boost_inc" != x; then - # We are going to check whether the version of Boost installed - # in $boost_inc is usable by running a compilation that - # #includes it. But if we pass a -I/some/path in which Boost - # is not installed, the compiler will just skip this -I and - # use other locations (either from CPPFLAGS, or from its list - # of system include directories). As a result we would use - # header installed on the machine instead of the /some/path - # specified by the user. So in that precise case (trying - # $boost_inc), make sure the version.hpp exists. - # - # Use test -e as there can be symlinks. - test -e "$boost_inc/boost/version.hpp" || continue - CPPFLAGS="$CPPFLAGS -I$boost_inc" - fi - AC_COMPILE_IFELSE([], [boost_cv_inc_path=yes], [boost_cv_version=no]) - if test x"$boost_cv_inc_path" = xyes; then - if test x"$boost_inc" != x; then - boost_cv_inc_path=$boost_inc - fi - break 2 - fi - done - done -AC_LANG_POP([C++])dnl - ]) - case $boost_cv_inc_path in #( - no) - boost_errmsg="cannot find Boost headers version >= $boost_version_req" - m4_if([$2], [], [AC_MSG_ERROR([$boost_errmsg])], - [AC_MSG_NOTICE([$boost_errmsg])]) - $2 - ;;#( - yes) - BOOST_CPPFLAGS= - AC_DEFINE([HAVE_BOOST], [1], - [Defined if the requested minimum BOOST version is satisfied]) - ;;#( - *) - AC_SUBST([BOOST_CPPFLAGS], ["-I$boost_cv_inc_path"]) - ;; - esac - AC_CACHE_CHECK([for Boost's header version], - [boost_cv_lib_version], - [m4_pattern_allow([^BOOST_LIB_VERSION$])dnl - _BOOST_SED_CPP([/^boost-lib-version = /{s///;s/\"//g;p;g;}], - [#include -boost-lib-version = BOOST_LIB_VERSION], - [boost_cv_lib_version=`cat conftest.i`])]) - # e.g. "134" for 1_34_1 or "135" for 1_35 - boost_major_version=`echo "$boost_cv_lib_version" | sed 's/_//;s/_.*//'` - case $boost_major_version in #( - '' | *[[!0-9]]*) - AC_MSG_ERROR([invalid value: boost_major_version=$boost_major_version]) - ;; - esac -CPPFLAGS=$boost_save_CPPFLAGS -])# BOOST_REQUIRE - -# BOOST_STATIC() -# -------------- -# Add the "--enable-static-boost" configure argument. If this argument is given -# on the command line, static versions of the libraries will be looked up. -AC_DEFUN([BOOST_STATIC], - [AC_ARG_ENABLE([static-boost], - [AC_HELP_STRING([--enable-static-boost], - [Prefer the static boost libraries over the shared ones [no]])], - [enable_static_boost=yes], - [enable_static_boost=no])])# BOOST_STATIC - -# BOOST_FIND_HEADER([HEADER-NAME], [ACTION-IF-NOT-FOUND], [ACTION-IF-FOUND]) -# -------------------------------------------------------------------------- -# Wrapper around AC_CHECK_HEADER for Boost headers. Useful to check for -# some parts of the Boost library which are only made of headers and don't -# require linking (such as Boost.Foreach). -# -# Default ACTION-IF-NOT-FOUND: Fail with a fatal error unless Boost couldn't be -# found in the first place, in which case by default a notice is issued to the -# user. Presumably if we haven't died already it's because it's OK to not have -# Boost, which is why only a notice is issued instead of a hard error. -# -# Default ACTION-IF-FOUND: define the preprocessor symbol HAVE_ in -# case of success # (where HEADER-NAME is written LIKE_THIS, e.g., -# HAVE_BOOST_FOREACH_HPP). -AC_DEFUN([BOOST_FIND_HEADER], -[AC_REQUIRE([BOOST_REQUIRE])dnl -if test x"$boost_cv_inc_path" = xno; then - m4_default([$2], [AC_MSG_NOTICE([Boost not available, not searching for $1])]) -else -AC_LANG_PUSH([C++])dnl -boost_save_CPPFLAGS=$CPPFLAGS -CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" -AC_CHECK_HEADER([$1], - [m4_default([$3], [AC_DEFINE(AS_TR_CPP([HAVE_$1]), [1], - [Define to 1 if you have <$1>])])], - [m4_default([$2], [AC_MSG_ERROR([cannot find $1])])]) -CPPFLAGS=$boost_save_CPPFLAGS -AC_LANG_POP([C++])dnl -fi -])# BOOST_FIND_HEADER - - -# BOOST_FIND_LIB([LIB-NAME], [PREFERRED-RT-OPT], [HEADER-NAME], [CXX-TEST], -# [CXX-PROLOGUE]) -# ------------------------------------------------------------------------- -# Look for the Boost library LIB-NAME (e.g., LIB-NAME = `thread', for -# libboost_thread). Check that HEADER-NAME works and check that -# libboost_LIB-NAME can link with the code CXX-TEST. The optional argument -# CXX-PROLOGUE can be used to include some C++ code before the `main' -# function. -# -# Invokes BOOST_FIND_HEADER([HEADER-NAME]) (see above). -# -# Boost libraries typically come compiled with several flavors (with different -# runtime options) so PREFERRED-RT-OPT is the preferred suffix. A suffix is one -# or more of the following letters: sgdpn (in that order). s = static -# runtime, d = debug build, g = debug/diagnostic runtime, p = STLPort build, -# n = (unsure) STLPort build without iostreams from STLPort (it looks like `n' -# must always be used along with `p'). Additionally, PREFERRED-RT-OPT can -# start with `mt-' to indicate that there is a preference for multi-thread -# builds. Some sample values for PREFERRED-RT-OPT: (nothing), mt, d, mt-d, gdp -# ... If you want to make sure you have a specific version of Boost -# (eg, >= 1.33) you *must* invoke BOOST_REQUIRE before this macro. -AC_DEFUN([BOOST_FIND_LIB], -[AC_REQUIRE([BOOST_REQUIRE])dnl -AC_REQUIRE([_BOOST_FIND_COMPILER_TAG])dnl -AC_REQUIRE([BOOST_STATIC])dnl -AC_REQUIRE([_BOOST_GUESS_WHETHER_TO_USE_MT])dnl -if test x"$boost_cv_inc_path" = xno; then - AC_MSG_NOTICE([Boost not available, not searching for the Boost $1 library]) -else -dnl The else branch is huge and wasn't intended on purpose. -AC_LANG_PUSH([C++])dnl -AS_VAR_PUSHDEF([Boost_lib], [boost_cv_lib_$1])dnl -AS_VAR_PUSHDEF([Boost_lib_LDFLAGS], [boost_cv_lib_$1_LDFLAGS])dnl -AS_VAR_PUSHDEF([Boost_lib_LIBS], [boost_cv_lib_$1_LIBS])dnl -BOOST_FIND_HEADER([$3]) -boost_save_CPPFLAGS=$CPPFLAGS -CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS" -# Now let's try to find the library. The algorithm is as follows: first look -# for a given library name according to the user's PREFERRED-RT-OPT. For each -# library name, we prefer to use the ones that carry the tag (toolset name). -# Each library is searched through the various standard paths were Boost is -# usually installed. If we can't find the standard variants, we try to -# enforce -mt (for instance on MacOSX, libboost_threads.dylib doesn't exist -# but there's -obviously- libboost_threads-mt.dylib). -AC_CACHE_CHECK([for the Boost $1 library], [Boost_lib], - [Boost_lib=no - case "$2" in #( - mt | mt-) boost_mt=-mt; boost_rtopt=;; #( - mt* | mt-*) boost_mt=-mt; boost_rtopt=`expr "X$2" : 'Xmt-*\(.*\)'`;; #( - *) boost_mt=; boost_rtopt=$2;; - esac - if test $enable_static_boost = yes; then - boost_rtopt="s$boost_rtopt" - fi - # Find the proper debug variant depending on what we've been asked to find. - case $boost_rtopt in #( - *d*) boost_rt_d=$boost_rtopt;; #( - *[[sgpn]]*) # Insert the `d' at the right place (in between `sg' and `pn') - boost_rt_d=`echo "$boost_rtopt" | sed 's/\(s*g*\)\(p*n*\)/\1\2/'`;; #( - *) boost_rt_d='-d';; - esac - # If the PREFERRED-RT-OPT are not empty, prepend a `-'. - test -n "$boost_rtopt" && boost_rtopt="-$boost_rtopt" - $boost_guess_use_mt && boost_mt=-mt - # Look for the abs path the static archive. - # $libext is computed by Libtool but let's make sure it's non empty. - test -z "$libext" && - AC_MSG_ERROR([the libext variable is empty, did you invoke Libtool?]) - boost_save_ac_objext=$ac_objext - # Generate the test file. - AC_LANG_CONFTEST([AC_LANG_PROGRAM([#include <$3> -$5], [$4])]) -dnl Optimization hacks: compiling C++ is slow, especially with Boost. What -dnl we're trying to do here is guess the right combination of link flags -dnl (LIBS / LDFLAGS) to use a given library. This can take several -dnl iterations before it succeeds and is thus *very* slow. So what we do -dnl instead is that we compile the code first (and thus get an object file, -dnl typically conftest.o). Then we try various combinations of link flags -dnl until we succeed to link conftest.o in an executable. The problem is -dnl that the various TRY_LINK / COMPILE_IFELSE macros of Autoconf always -dnl remove all the temporary files including conftest.o. So the trick here -dnl is to temporarily change the value of ac_objext so that conftest.o is -dnl preserved accross tests. This is obviously fragile and I will burn in -dnl hell for not respecting Autoconf's documented interfaces, but in the -dnl mean time, it optimizes the macro by a factor of 5 to 30. -dnl Another small optimization: the first argument of AC_COMPILE_IFELSE left -dnl empty because the test file is generated only once above (before we -dnl start the for loops). - AC_COMPILE_IFELSE([], - [ac_objext=do_not_rm_me_plz], - [AC_MSG_ERROR([cannot compile a test that uses Boost $1])]) - ac_objext=$boost_save_ac_objext - boost_failed_libs= -# Don't bother to ident the 6 nested for loops, only the 2 innermost ones -# matter. -for boost_tag_ in -$boost_cv_lib_tag ''; do -for boost_ver_ in -$boost_cv_lib_version ''; do -for boost_mt_ in $boost_mt -mt ''; do -for boost_rtopt_ in $boost_rtopt '' -d; do - for boost_lib in \ - boost_$1$boost_tag_$boost_mt_$boost_rtopt_$boost_ver_ \ - boost_$1$boost_tag_$boost_rtopt_$boost_ver_ \ - boost_$1$boost_tag_$boost_mt_$boost_ver_ \ - boost_$1$boost_tag_$boost_ver_ - do - # Avoid testing twice the same lib - case $boost_failed_libs in #( - *@$boost_lib@*) continue;; - esac - # If with_boost is empty, we'll search in /lib first, which is not quite - # right so instead we'll try to a location based on where the headers are. - boost_tmp_lib=$with_boost - test x"$with_boost" = x && boost_tmp_lib=${boost_cv_inc_path%/include} - for boost_ldpath in "$boost_tmp_lib/lib" '' \ - /opt/local/lib /usr/local/lib /opt/lib /usr/lib \ - "$with_boost" C:/Boost/lib /lib /usr/lib64 /lib64 - do - test -e "$boost_ldpath" || continue - boost_save_LDFLAGS=$LDFLAGS - # Are we looking for a static library? - case $boost_ldpath:$boost_rtopt_ in #( - *?*:*s*) # Yes (Non empty boost_ldpath + s in rt opt) - Boost_lib_LIBS="$boost_ldpath/lib$boost_lib.$libext" - test -e "$Boost_lib_LIBS" || continue;; #( - *) # No: use -lboost_foo to find the shared library. - Boost_lib_LIBS="-l$boost_lib";; - esac - boost_save_LIBS=$LIBS - LIBS="$Boost_lib_LIBS $LIBS" - test x"$boost_ldpath" != x && LDFLAGS="$LDFLAGS -L$boost_ldpath" -dnl First argument of AC_LINK_IFELSE left empty because the test file is -dnl generated only once above (before we start the for loops). - _BOOST_AC_LINK_IFELSE([], - [Boost_lib=yes], [Boost_lib=no]) - ac_objext=$boost_save_ac_objext - LDFLAGS=$boost_save_LDFLAGS - LIBS=$boost_save_LIBS - if test x"$Boost_lib" = xyes; then - Boost_lib_LDFLAGS="-L$boost_ldpath -R$boost_ldpath" - break 6 - else - boost_failed_libs="$boost_failed_libs@$boost_lib@" - fi - done - done -done -done -done -done -rm -f conftest.$ac_objext -]) -case $Boost_lib in #( - no) _AC_MSG_LOG_CONFTEST - AC_MSG_ERROR([cannot not find the flags to link with Boost $1]) - ;; -esac -AC_SUBST(AS_TR_CPP([BOOST_$1_LDFLAGS]), [$Boost_lib_LDFLAGS]) -AC_SUBST(AS_TR_CPP([BOOST_$1_LIBS]), [$Boost_lib_LIBS]) -CPPFLAGS=$boost_save_CPPFLAGS -AS_VAR_POPDEF([Boost_lib])dnl -AS_VAR_POPDEF([Boost_lib_LDFLAGS])dnl -AS_VAR_POPDEF([Boost_lib_LIBS])dnl -AC_LANG_POP([C++])dnl -fi -])# BOOST_FIND_LIB - - -# --------------------------------------- # -# Checks for the various Boost libraries. # -# --------------------------------------- # - -# List of boost libraries: http://www.boost.org/libs/libraries.htm -# The page http://beta.boost.org/doc/libs is useful: it gives the first release -# version of each library (among other things). - -# BOOST_ARRAY() -# ------------- -# Look for Boost.Array -AC_DEFUN([BOOST_ARRAY], -[BOOST_FIND_HEADER([boost/array.hpp])]) - - -# BOOST_ASIO() -# ------------ -# Look for Boost.Asio (new in Boost 1.35). -AC_DEFUN([BOOST_ASIO], -[AC_REQUIRE([BOOST_SYSTEM])dnl -BOOST_FIND_HEADER([boost/asio.hpp])]) - - -# BOOST_BIND() -# ------------ -# Look for Boost.Bind -AC_DEFUN([BOOST_BIND], -[BOOST_FIND_HEADER([boost/bind.hpp])]) - - -# BOOST_CONVERSION() -# ------------------ -# Look for Boost.Conversion (cast / lexical_cast) -AC_DEFUN([BOOST_CONVERSION], -[BOOST_FIND_HEADER([boost/cast.hpp]) -BOOST_FIND_HEADER([boost/lexical_cast.hpp]) -])# BOOST_CONVERSION - - -# BOOST_DATE_TIME([PREFERRED-RT-OPT]) -# ----------------------------------- -# Look for Boost.Date_Time. For the documentation of PREFERRED-RT-OPT, see the -# documentation of BOOST_FIND_LIB above. -AC_DEFUN([BOOST_DATE_TIME], -[BOOST_FIND_LIB([date_time], [$1], - [boost/date_time/posix_time/posix_time.hpp], - [boost::posix_time::ptime t;]) -])# BOOST_DATE_TIME - - -# BOOST_FILESYSTEM([PREFERRED-RT-OPT]) -# ------------------------------------ -# Look for Boost.Filesystem. For the documentation of PREFERRED-RT-OPT, see -# the documentation of BOOST_FIND_LIB above. -# Do not check for boost/filesystem.hpp because this file was introduced in -# 1.34. -AC_DEFUN([BOOST_FILESYSTEM], -[# Do we have to check for Boost.System? This link-time dependency was -# added as of 1.35.0. If we have a version <1.35, we must not attempt to -# find Boost.System as it didn't exist by then. -if test $boost_major_version -ge 135; then -BOOST_SYSTEM([$1]) -fi # end of the Boost.System check. -boost_filesystem_save_LIBS=$LIBS -boost_filesystem_save_LDFLAGS=$LDFLAGS -m4_pattern_allow([^BOOST_SYSTEM_(LIBS|LDFLAGS)$])dnl -LIBS="$LIBS $BOOST_SYSTEM_LIBS" -LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS" -BOOST_FIND_LIB([filesystem], [$1], - [boost/filesystem/path.hpp], [boost::filesystem::path p;]) -LIBS=$boost_filesystem_save_LIBS -LDFLAGS=$boost_filesystem_save_LDFLAGS -])# BOOST_FILESYSTEM - - -# BOOST_FOREACH() -# --------------- -# Look for Boost.Foreach -AC_DEFUN([BOOST_FOREACH], -[BOOST_FIND_HEADER([boost/foreach.hpp])]) - - -# BOOST_FORMAT() -# -------------- -# Look for Boost.Format -# Note: we can't check for boost/format/format_fwd.hpp because the header isn't -# standalone. It can't be compiled because it triggers the following error: -# boost/format/detail/config_macros.hpp:88: error: 'locale' in namespace 'std' -# does not name a type -AC_DEFUN([BOOST_FORMAT], -[BOOST_FIND_HEADER([boost/format.hpp])]) - - -# BOOST_FUNCTION() -# ---------------- -# Look for Boost.Function -AC_DEFUN([BOOST_FUNCTION], -[BOOST_FIND_HEADER([boost/function.hpp])]) - - -# BOOST_GRAPH([PREFERRED-RT-OPT]) -# ------------------------------- -# Look for Boost.Graphs. For the documentation of PREFERRED-RT-OPT, see the -# documentation of BOOST_FIND_LIB above. -AC_DEFUN([BOOST_GRAPH], -[BOOST_FIND_LIB([graph], [$1], - [boost/graph/adjacency_list.hpp], [boost::adjacency_list<> g;]) -])# BOOST_GRAPH - - -# BOOST_IOSTREAMS([PREFERRED-RT-OPT]) -# ------------------------------- -# Look for Boost.IOStreams. For the documentation of PREFERRED-RT-OPT, see the -# documentation of BOOST_FIND_LIB above. -AC_DEFUN([BOOST_IOSTREAMS], -[BOOST_FIND_LIB([iostreams], [$1], - [boost/iostreams/device/file_descriptor.hpp], - [boost::iostreams::file_descriptor fd(0); fd.close();]) -])# BOOST_IOSTREAMS - - -# BOOST_HASH() -# ------------ -# Look for Boost.Functional/Hash -AC_DEFUN([BOOST_HASH], -[BOOST_FIND_HEADER([boost/functional/hash.hpp])]) - - -# BOOST_LAMBDA() -# -------------- -# Look for Boost.Lambda -AC_DEFUN([BOOST_LAMBDA], -[BOOST_FIND_HEADER([boost/lambda/lambda.hpp])]) - - -# BOOST_MATH() -# ------------ -# Look for Boost.Math -# TODO: This library isn't header-only but it comes in multiple different -# flavors that don't play well with BOOST_FIND_LIB (e.g, libboost_math_c99, -# libboost_math_c99f, libboost_math_c99l, libboost_math_tr1, -# libboost_math_tr1f, libboost_math_tr1l). This macro must be fixed to do the -# right thing anyway. -AC_DEFUN([BOOST_MATH], -[BOOST_FIND_HEADER([boost/math/special_functions.hpp])]) - - -# BOOST_MULTIARRAY() -# ------------------ -# Look for Boost.MultiArray -AC_DEFUN([BOOST_MULTIARRAY], -[BOOST_FIND_HEADER([boost/multi_array.hpp])]) - - -# BOOST_NUMERIC_CONVERSION() -# -------------------------- -# Look for Boost.NumericConversion (policy-based numeric conversion) -AC_DEFUN([BOOST_NUMERIC_CONVERSION], -[BOOST_FIND_HEADER([boost/numeric/conversion/converter.hpp]) -])# BOOST_NUMERIC_CONVERSION - - -# BOOST_OPTIONAL() -# ---------------- -# Look for Boost.Optional -AC_DEFUN([BOOST_OPTIONAL], -[BOOST_FIND_HEADER([boost/optional.hpp])]) - - -# BOOST_PREPROCESSOR() -# -------------------- -# Look for Boost.Preprocessor -AC_DEFUN([BOOST_PREPROCESSOR], -[BOOST_FIND_HEADER([boost/preprocessor/repeat.hpp])]) - - -# BOOST_PROGRAM_OPTIONS([PREFERRED-RT-OPT]) -# ----------------------------------------- -# Look for Boost.Program_options. For the documentation of PREFERRED-RT-OPT, see -# the documentation of BOOST_FIND_LIB above. -AC_DEFUN([BOOST_PROGRAM_OPTIONS], -[BOOST_FIND_LIB([program_options], [$1], - [boost/program_options.hpp], - [boost::program_options::options_description d("test");]) -])# BOOST_PROGRAM_OPTIONS - - -# BOOST_REF() -# ----------- -# Look for Boost.Ref -AC_DEFUN([BOOST_REF], -[BOOST_FIND_HEADER([boost/ref.hpp])]) - - -# BOOST_REGEX([PREFERRED-RT-OPT]) -# ------------------------------- -# Look for Boost.Regex. For the documentation of PREFERRED-RT-OPT, see the -# documentation of BOOST_FIND_LIB above. -AC_DEFUN([BOOST_REGEX], -[BOOST_FIND_LIB([regex], [$1], - [boost/regex.hpp], - [boost::regex exp("*"); boost::regex_match("foo", exp);]) -])# BOOST_REGEX - - -# BOOST_SERIALIZATION([PREFERRED-RT-OPT]) -# --------------------------------------- -# Look for Boost.Serialization. For the documentation of PREFERRED-RT-OPT, see -# the documentation of BOOST_FIND_LIB above. -AC_DEFUN([BOOST_SERIALIZATION], -[BOOST_FIND_LIB([serialization], [$1], - [boost/archive/text_oarchive.hpp], - [std::ostream* o = 0; // Cheap way to get an ostream... - boost::archive::text_oarchive t(*o);]) -])# BOOST_SIGNALS - - -# BOOST_SIGNALS([PREFERRED-RT-OPT]) -# --------------------------------- -# Look for Boost.Signals. For the documentation of PREFERRED-RT-OPT, see the -# documentation of BOOST_FIND_LIB above. -AC_DEFUN([BOOST_SIGNALS], -[BOOST_FIND_LIB([signals], [$1], - [boost/signal.hpp], - [boost::signal s;]) -])# BOOST_SIGNALS - - -# BOOST_SMART_PTR() -# ----------------- -# Look for Boost.SmartPtr -AC_DEFUN([BOOST_SMART_PTR], -[BOOST_FIND_HEADER([boost/scoped_ptr.hpp]) -BOOST_FIND_HEADER([boost/shared_ptr.hpp]) -]) - - -# BOOST_STATICASSERT() -# -------------------- -# Look for Boost.StaticAssert -AC_DEFUN([BOOST_STATICASSERT], -[BOOST_FIND_HEADER([boost/static_assert.hpp])]) - - -# BOOST_STRING_ALGO() -# ------------------- -# Look for Boost.StringAlgo -AC_DEFUN([BOOST_STRING_ALGO], -[BOOST_FIND_HEADER([boost/algorithm/string.hpp]) -]) - - -# BOOST_SYSTEM([PREFERRED-RT-OPT]) -# -------------------------------- -# Look for Boost.System. For the documentation of PREFERRED-RT-OPT, see the -# documentation of BOOST_FIND_LIB above. This library was introduced in Boost -# 1.35.0. -AC_DEFUN([BOOST_SYSTEM], -[BOOST_FIND_LIB([system], [$1], - [boost/system/error_code.hpp], - [boost::system::error_code e; e.clear();]) -])# BOOST_SYSTEM - - -# BOOST_TEST([PREFERRED-RT-OPT]) -# ------------------------------ -# Look for Boost.Test. For the documentation of PREFERRED-RT-OPT, see the -# documentation of BOOST_FIND_LIB above. -AC_DEFUN([BOOST_TEST], -[m4_pattern_allow([^BOOST_CHECK$])dnl -BOOST_FIND_LIB([unit_test_framework], [$1], - [boost/test/unit_test.hpp], [BOOST_CHECK(2 == 2);], - [using boost::unit_test::test_suite; - test_suite* init_unit_test_suite(int argc, char ** argv) - { return NULL; }]) -])# BOOST_TEST - - -# BOOST_THREADS([PREFERRED-RT-OPT]) -# --------------------------------- -# Look for Boost.Thread. For the documentation of PREFERRED-RT-OPT, see the -# documentation of BOOST_FIND_LIB above. -# FIXME: Provide an alias "BOOST_THREAD". -AC_DEFUN([BOOST_THREADS], -[dnl Having the pthread flag is required at least on GCC3 where -dnl boost/thread.hpp would complain if we try to compile without -dnl -pthread on GNU/Linux. -AC_REQUIRE([_BOOST_PTHREAD_FLAG])dnl -boost_threads_save_LIBS=$LIBS -boost_threads_save_CPPFLAGS=$CPPFLAGS -LIBS="$LIBS $boost_cv_pthread_flag" -# Yes, we *need* to put the -pthread thing in CPPFLAGS because with GCC3, -# boost/thread.hpp will trigger a #error if -pthread isn't used: -# boost/config/requires_threads.hpp:47:5: #error "Compiler threading support -# is not turned on. Please set the correct command line options for -# threading: -pthread (Linux), -pthreads (Solaris) or -mthreads (Mingw32)" -CPPFLAGS="$CPPFLAGS $boost_cv_pthread_flag" -BOOST_FIND_LIB([thread], [$1], - [boost/thread.hpp], [boost::thread t; boost::mutex m;]) -BOOST_THREAD_LIBS="$BOOST_THREAD_LIBS $boost_cv_pthread_flag" -BOOST_CPPFLAGS="$BOOST_CPPFLAGS $boost_cv_pthread_flag" -LIBS=$boost_threads_save_LIBS -CPPFLAGS=$boost_threads_save_CPPFLAGS -])# BOOST_THREADS - - -# BOOST_TOKENIZER() -# ----------------- -# Look for Boost.Tokenizer -AC_DEFUN([BOOST_TOKENIZER], -[BOOST_FIND_HEADER([boost/tokenizer.hpp])]) - - -# BOOST_TRIBOOL() -# --------------- -# Look for Boost.Tribool -AC_DEFUN([BOOST_TRIBOOL], -[BOOST_FIND_HEADER([boost/logic/tribool_fwd.hpp]) -BOOST_FIND_HEADER([boost/logic/tribool.hpp]) -]) - - -# BOOST_TUPLE() -# ------------- -# Look for Boost.Tuple -AC_DEFUN([BOOST_TUPLE], -[BOOST_FIND_HEADER([boost/tuple/tuple.hpp])]) - - -# BOOST_TYPETRAITS() -# -------------------- -# Look for Boost.TypeTraits -AC_DEFUN([BOOST_TYPETRAITS], -[BOOST_FIND_HEADER([boost/type_traits.hpp])]) - - -# BOOST_UTILITY() -# --------------- -# Look for Boost.Utility (noncopyable, result_of, base-from-member idiom, -# etc.) -AC_DEFUN([BOOST_UTILITY], -[BOOST_FIND_HEADER([boost/utility.hpp])]) - - -# BOOST_VARIANT() -# --------------- -# Look for Boost.Variant. -AC_DEFUN([BOOST_VARIANT], -[BOOST_FIND_HEADER([boost/variant/variant_fwd.hpp]) -BOOST_FIND_HEADER([boost/variant.hpp])]) - - -# BOOST_WAVE([PREFERRED-RT-OPT]) -# ------------------------------ -# NOTE: If you intend to use Wave/Spirit with thread support, make sure you -# call BOOST_THREADS first. -# Look for Boost.Wave. For the documentation of PREFERRED-RT-OPT, see the -# documentation of BOOST_FIND_LIB above. -AC_DEFUN([BOOST_WAVE], -[AC_REQUIRE([BOOST_FILESYSTEM])dnl -AC_REQUIRE([BOOST_DATE_TIME])dnl -boost_wave_save_LIBS=$LIBS -boost_wave_save_LDFLAGS=$LDFLAGS -m4_pattern_allow([^BOOST_((FILE)?SYSTEM|DATE_TIME|THREAD)_(LIBS|LDFLAGS)$])dnl -LIBS="$LIBS $BOOST_SYSTEM_LIBS $BOOST_FILESYSTEM_LIBS $BOOST_DATE_TIME_LIBS\ -$BOOST_THREAD_LIBS" -LDFLAGS="$LDFLAGS $BOOST_SYSTEM_LDFLAGS $BOOST_FILESYSTEM_LDFLAGS\ -$BOOST_DATE_TIME_LDFLAGS $BOOST_THREAD_LDFLAGS" -BOOST_FIND_LIB([wave], [$1], - [boost/wave.hpp], - [boost::wave::token_id id; get_token_name(id);]) -LIBS=$boost_wave_save_LIBS -LDFLAGS=$boost_wave_save_LDFLAGS -])# BOOST_WAVE - - -# BOOST_XPRESSIVE() -# ----------------- -# Look for Boost.Xpressive (new since 1.36.0). -AC_DEFUN([BOOST_XPRESSIVE], -[BOOST_FIND_HEADER([boost/xpressive/xpressive.hpp])]) - - -# ----------------- # -# Internal helpers. # -# ----------------- # - - -# _BOOST_PTHREAD_FLAG() -# --------------------- -# Internal helper for BOOST_THREADS. Based on ACX_PTHREAD: -# http://autoconf-archive.cryp.to/acx_pthread.html -AC_DEFUN([_BOOST_PTHREAD_FLAG], -[AC_REQUIRE([AC_PROG_CXX])dnl -AC_REQUIRE([AC_CANONICAL_HOST])dnl -AC_LANG_PUSH([C++])dnl -AC_CACHE_CHECK([for the flags needed to use pthreads], [boost_cv_pthread_flag], -[ boost_cv_pthread_flag= - # The ordering *is* (sometimes) important. Some notes on the - # individual items follow: - # (none): in case threads are in libc; should be tried before -Kthread and - # other compiler flags to prevent continual compiler warnings - # -lpthreads: AIX (must check this before -lpthread) - # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) - # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) - # -llthread: LinuxThreads port on FreeBSD (also preferred to -pthread) - # -pthread: GNU Linux/GCC (kernel threads), BSD/GCC (userland threads) - # -pthreads: Solaris/GCC - # -mthreads: MinGW32/GCC, Lynx/GCC - # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it - # doesn't hurt to check since this sometimes defines pthreads too; - # also defines -D_REENTRANT) - # ... -mt is also the pthreads flag for HP/aCC - # -lpthread: GNU Linux, etc. - # --thread-safe: KAI C++ - case $host_os in #( - *solaris*) - # On Solaris (at least, for some versions), libc contains stubbed - # (non-functional) versions of the pthreads routines, so link-based - # tests will erroneously succeed. (We need to link with -pthreads/-mt/ - # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather - # a function called by this macro, so we could check for that, but - # who knows whether they'll stub that too in a future libc.) So, - # we'll just look for -pthreads and -lpthread first: - boost_pthread_flags="-pthreads -lpthread -mt -pthread";; #( - *) - boost_pthread_flags="-lpthreads -Kthread -kthread -llthread -pthread \ - -pthreads -mthreads -lpthread --thread-safe -mt";; - esac - # Generate the test file. - AC_LANG_CONFTEST([AC_LANG_PROGRAM([#include ], - [pthread_t th; pthread_join(th, 0); - pthread_attr_init(0); pthread_cleanup_push(0, 0); - pthread_create(0,0,0,0); pthread_cleanup_pop(0);])]) - for boost_pthread_flag in '' $boost_pthread_flags; do - boost_pthread_ok=false -dnl Re-use the test file already generated. - boost_pthreads__save_LIBS=$LIBS - LIBS="$LIBS $boost_pthread_flag" - AC_LINK_IFELSE([], - [if grep ".*$boost_pthread_flag" conftest.err; then - echo "This flag seems to have triggered warnings" >&AS_MESSAGE_LOG_FD - else - boost_pthread_ok=:; boost_cv_pthread_flag=$boost_pthread_flag - fi]) - LIBS=$boost_pthreads__save_LIBS - $boost_pthread_ok && break - done -]) -AC_LANG_POP([C++])dnl -])# _BOOST_PTHREAD_FLAG - - -# _BOOST_gcc_test(MAJOR, MINOR) -# ----------------------------- -# Internal helper for _BOOST_FIND_COMPILER_TAG. -m4_define([_BOOST_gcc_test], -["defined __GNUC__ && __GNUC__ == $1 && __GNUC_MINOR__ == $2 && !defined __ICC @ gcc$1$2"])dnl - - -# _BOOST_FIND_COMPILER_TAG() -# -------------------------- -# Internal. When Boost is installed without --layout=system, each library -# filename will hold a suffix that encodes the compiler used during the -# build. The Boost build system seems to call this a `tag'. -AC_DEFUN([_BOOST_FIND_COMPILER_TAG], -[AC_REQUIRE([AC_PROG_CXX])dnl -AC_REQUIRE([AC_CANONICAL_HOST])dnl -AC_CACHE_CHECK([for the toolset name used by Boost for $CXX], [boost_cv_lib_tag], -[AC_LANG_PUSH([C++])dnl - boost_cv_lib_tag=unknown - # The following tests are mostly inspired by boost/config/auto_link.hpp - # The list is sorted to most recent/common to oldest compiler (in order - # to increase the likelihood of finding the right compiler with the - # least number of compilation attempt). - # Beware that some tests are sensible to the order (for instance, we must - # look for MinGW before looking for GCC3). - # I used one compilation test per compiler with a #error to recognize - # each compiler so that it works even when cross-compiling (let me know - # if you know a better approach). - # Known missing tags (known from Boost's tools/build/v2/tools/common.jam): - # como, edg, kcc, bck, mp, sw, tru, xlc - # I'm not sure about my test for `il' (be careful: Intel's ICC pre-defines - # the same defines as GCC's). - # TODO: Move the test on GCC 4.4 up once it's released. - for i in \ - _BOOST_gcc_test(4, 3) \ - _BOOST_gcc_test(4, 2) \ - _BOOST_gcc_test(4, 1) \ - _BOOST_gcc_test(4, 0) \ - "defined __GNUC__ && __GNUC__ == 3 && !defined __ICC \ - && (defined WIN32 || defined WINNT || defined _WIN32 || defined __WIN32 \ - || defined __WIN32__ || defined __WINNT || defined __WINNT__) @ mgw" \ - _BOOST_gcc_test(3, 4) \ - _BOOST_gcc_test(3, 3) \ - "defined _MSC_VER && _MSC_VER >= 1500 @ vc90" \ - "defined _MSC_VER && _MSC_VER == 1400 @ vc80" \ - _BOOST_gcc_test(3, 2) \ - "defined _MSC_VER && _MSC_VER == 1310 @ vc71" \ - _BOOST_gcc_test(3, 1) \ - _BOOST_gcc_test(3, 0) \ - "defined __BORLANDC__ @ bcb" \ - "defined __ICC && (defined __unix || defined __unix__) @ il" \ - "defined __ICL @ iw" \ - "defined _MSC_VER && _MSC_VER == 1300 @ vc7" \ - _BOOST_gcc_test(4, 4) \ - _BOOST_gcc_test(2, 95) \ - "defined __MWERKS__ && __MWERKS__ <= 0x32FF @ cw9" \ - "defined _MSC_VER && _MSC_VER < 1300 && !defined UNDER_CE @ vc6" \ - "defined _MSC_VER && _MSC_VER < 1300 && defined UNDER_CE @ evc4" \ - "defined __MWERKS__ && __MWERKS__ <= 0x31FF @ cw8" - do - boost_tag_test=`expr "X$i" : 'X\([[^@]]*\) @ '` - boost_tag=`expr "X$i" : 'X[[^@]]* @ \(.*\)'` - AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ -#if $boost_tag_test -/* OK */ -#else -# error $boost_tag_test -#endif -]])], [boost_cv_lib_tag=$boost_tag; break], []) - done -AC_LANG_POP([C++])dnl - case $boost_cv_lib_tag in #( - # Some newer (>= 1.35?) versions of Boost seem to only use "gcc" as opposed - # to "gcc41" for instance. - *-gcc | *'-gcc ') :;; #( Don't re-add -gcc: it's already in there. - gcc*) - boost_tag_x= - case $host_os in #( - darwin*) - if test $boost_major_version -ge 136; then - # The `x' added in r46793 of Boost. - boost_tag_x=x - fi;; - esac - # We can specify multiple tags in this variable because it's used by - # BOOST_FIND_LIB that does a `for tag in -$boost_cv_lib_tag' ... - boost_cv_lib_tag="$boost_tag_x$boost_cv_lib_tag -${boost_tag_x}gcc" - ;; #( - unknown) - AC_MSG_WARN([[could not figure out which toolset name to use for $CXX]]) - boost_cv_lib_tag= - ;; - esac -])dnl end of AC_CACHE_CHECK -])# _BOOST_FIND_COMPILER_TAG - - -# _BOOST_GUESS_WHETHER_TO_USE_MT() -# -------------------------------- -# Compile a small test to try to guess whether we should favor MT (Multi -# Thread) flavors of Boost. Sets boost_guess_use_mt accordingly. -AC_DEFUN([_BOOST_GUESS_WHETHER_TO_USE_MT], -[# Check whether we do better use `mt' even though we weren't ask to. -AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ -#if defined _REENTRANT || defined _MT || defined __MT__ -/* use -mt */ -#else -# error MT not needed -#endif -]])], [boost_guess_use_mt=:], [boost_guess_use_mt=false]) -]) - -# _BOOST_AC_LINK_IFELSE(PROGRAM, [ACTION-IF-TRUE], [ACTION-IF-FALSE]) -# ------------------------------------------------------------------- -# Fork of _AC_LINK_IFELSE that preserves conftest.o across calls. Fragile, -# will break when Autoconf changes its internals. Requires that you manually -# rm -f conftest.$ac_objext in between to really different tests, otherwise -# you will try to link a conftest.o left behind by a previous test. -# Used to aggressively optimize BOOST_FIND_LIB (see the big comment in this -# macro). -# -# Don't use "break" in the actions, as it would short-circuit some code -# this macro runs after the actions. -m4_define([_BOOST_AC_LINK_IFELSE], -[m4_ifvaln([$1], [AC_LANG_CONFTEST([$1])])dnl -rm -f conftest$ac_exeext -boost_save_ac_ext=$ac_ext -boost_use_source=: -# If we already have a .o, re-use it. We change $ac_ext so that $ac_link -# tries to link the existing object file instead of compiling from source. -test -f conftest.$ac_objext && ac_ext=$ac_objext && boost_use_source=false && - _AS_ECHO_LOG([re-using the existing conftest.$ac_objext]) -AS_IF([_AC_DO_STDERR($ac_link) && { - test -z "$ac_[]_AC_LANG_ABBREV[]_werror_flag" || - test ! -s conftest.err - } && test -s conftest$ac_exeext && { - test "$cross_compiling" = yes || - $as_executable_p conftest$ac_exeext -dnl FIXME: use AS_TEST_X instead when 2.61 is widespread enough. - }], - [$2], - [if $boost_use_source; then - _AC_MSG_LOG_CONFTEST - fi - $3]) -ac_objext=$boost_save_ac_objext -ac_ext=$boost_save_ac_ext -dnl Delete also the IPA/IPO (Inter Procedural Analysis/Optimization) -dnl information created by the PGI compiler (conftest_ipa8_conftest.oo), -dnl as it would interfere with the next link command. -rm -f core conftest.err conftest_ipa8_conftest.oo \ - conftest$ac_exeext m4_ifval([$1], [conftest.$ac_ext])[]dnl -])# _BOOST_AC_LINK_IFELSE - -# Local Variables: -# mode: autoconf -# End: diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h index 7e0e18b53..a10b09a7a 100644 --- a/mert/BleuScorer.h +++ b/mert/BleuScorer.h @@ -56,7 +56,7 @@ private: typedef map,int,CompareNgrams> counts_t; typedef map,int,CompareNgrams>::iterator counts_iterator; - typedef map,int,CompareNgrams>::iterator counts_const_iterator; + typedef map,int,CompareNgrams>::const_iterator counts_const_iterator; typedef ScopedVector refcounts_t; /** diff --git a/mert/Makefile.am b/mert/Makefile.am deleted file mode 100644 index 0b16a2f4f..000000000 --- a/mert/Makefile.am +++ /dev/null @@ -1,45 +0,0 @@ -lib_LTLIBRARIES = libmert.la -bin_PROGRAMS = mert extractor evaluator pro -AM_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE $(BOOST_CPPFLAGS) - -libmert_la_SOURCES = \ -Util.cpp \ -FileStream.cpp \ -Timer.cpp \ -ScoreStats.cpp ScoreArray.cpp ScoreData.cpp \ -ScoreDataIterator.cpp \ -FeatureStats.cpp FeatureArray.cpp FeatureData.cpp \ -FeatureDataIterator.cpp \ -Data.cpp \ -BleuScorer.cpp \ -Point.cpp \ -PerScorer.cpp \ -Scorer.cpp \ -ScorerFactory.cpp \ -Optimizer.cpp \ -TERsrc/alignmentStruct.cpp \ -TERsrc/hashMap.cpp \ -TERsrc/hashMapStringInfos.cpp \ -TERsrc/stringHasher.cpp \ -TERsrc/terAlignment.cpp \ -TERsrc/terShift.cpp \ -TERsrc/hashMapInfos.cpp \ -TERsrc/infosHasher.cpp \ -TERsrc/stringInfosHasher.cpp \ -TERsrc/tercalc.cpp \ -TERsrc/tools.cpp \ -TerScorer.cpp \ -CderScorer.cpp \ -MergeScorer.cpp - -mert_SOURCES = mert.cpp $(top_builddir)/moses/src/ThreadPool.cpp -extractor_SOURCES = extractor.cpp -evaluator_SOURCES = evaluator.cpp -pro_SOURCES = pro.cpp - -extractor_LDADD = libmert.la -lm -lz -mert_LDADD = libmert.la -lm -lz $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS) -evaluator_LDADD = libmert.la -lm -lz -pro_LDADD = libmert.la $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIBS) -pro_DEPENDENCIES = $(top_srcdir)/kenlm/libkenlm.la libmert.la - diff --git a/misc/Makefile.am b/misc/Makefile.am deleted file mode 100644 index 729ad2e0b..000000000 --- a/misc/Makefile.am +++ /dev/null @@ -1,16 +0,0 @@ -bin_PROGRAMS = processPhraseTable processLexicalTable queryLexicalTable queryPhraseTable - -processPhraseTable_SOURCES = GenerateTuples.cpp processPhraseTable.cpp -processLexicalTable_SOURCES = processLexicalTable.cpp -queryLexicalTable_SOURCES = queryLexicalTable.cpp -queryPhraseTable_SOURCES = queryPhraseTable.cpp - -AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS) - -processPhraseTable_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/moses/src -L$(top_srcdir)/OnDiskPt/src -lmoses -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS) - -processLexicalTable_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/moses/src -L$(top_srcdir)/OnDiskPt/src -lmoses -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS) - -queryLexicalTable_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/moses/src -L$(top_srcdir)/OnDiskPt/src -lmoses -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS) - -queryPhraseTable_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/moses/src -L$(top_srcdir)/OnDiskPt/src -lmoses -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS) diff --git a/mkinstalldirs b/mkinstalldirs deleted file mode 100755 index d2d5f21b6..000000000 --- a/mkinstalldirs +++ /dev/null @@ -1,111 +0,0 @@ -#! /bin/sh -# mkinstalldirs --- make directory hierarchy -# Author: Noah Friedman -# Created: 1993-05-16 -# Public domain - -errstatus=0 -dirmode="" - -usage="\ -Usage: mkinstalldirs [-h] [--help] [-m mode] dir ..." - -# process command line arguments -while test $# -gt 0 ; do - case $1 in - -h | --help | --h*) # -h for help - echo "$usage" 1>&2 - exit 0 - ;; - -m) # -m PERM arg - shift - test $# -eq 0 && { echo "$usage" 1>&2; exit 1; } - dirmode=$1 - shift - ;; - --) # stop option processing - shift - break - ;; - -*) # unknown option - echo "$usage" 1>&2 - exit 1 - ;; - *) # first non-opt arg - break - ;; - esac -done - -for file -do - if test -d "$file"; then - shift - else - break - fi -done - -case $# in - 0) exit 0 ;; -esac - -case $dirmode in - '') - if mkdir -p -- . 2>/dev/null; then - echo "mkdir -p -- $*" - exec mkdir -p -- "$@" - fi - ;; - *) - if mkdir -m "$dirmode" -p -- . 2>/dev/null; then - echo "mkdir -m $dirmode -p -- $*" - exec mkdir -m "$dirmode" -p -- "$@" - fi - ;; -esac - -for file -do - set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'` - shift - - pathcomp= - for d - do - pathcomp="$pathcomp$d" - case $pathcomp in - -*) pathcomp=./$pathcomp ;; - esac - - if test ! -d "$pathcomp"; then - echo "mkdir $pathcomp" - - mkdir "$pathcomp" || lasterr=$? - - if test ! -d "$pathcomp"; then - errstatus=$lasterr - else - if test ! -z "$dirmode"; then - echo "chmod $dirmode $pathcomp" - lasterr="" - chmod "$dirmode" "$pathcomp" || lasterr=$? - - if test ! -z "$lasterr"; then - errstatus=$lasterr - fi - fi - fi - fi - - pathcomp="$pathcomp/" - done -done - -exit $errstatus - -# Local Variables: -# mode: shell-script -# sh-indentation: 2 -# End: -# mkinstalldirs ends here diff --git a/moses-chart-cmd/src/Makefile.am b/moses-chart-cmd/src/Makefile.am deleted file mode 100644 index 04d350c22..000000000 --- a/moses-chart-cmd/src/Makefile.am +++ /dev/null @@ -1,10 +0,0 @@ -bin_PROGRAMS = moses_chart -moses_chart_SOURCES = Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp -AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS) - -moses_chart_LDADD = -L$(top_srcdir)/moses/src -L$(top_srcdir)/OnDiskPt/src -lmoses -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS) -moses_chart_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.la $(top_srcdir)/OnDiskPt/src/libOnDiskPt.a - - - - diff --git a/moses-cmd/src/Makefile.am b/moses-cmd/src/Makefile.am deleted file mode 100644 index 11b2084ca..000000000 --- a/moses-cmd/src/Makefile.am +++ /dev/null @@ -1,10 +0,0 @@ -bin_PROGRAMS = moses lmbrgrid - -AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS) - -moses_SOURCES = Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp LatticeMBR.cpp -moses_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS) - - -lmbrgrid_SOURCES = LatticeMBRGrid.cpp LatticeMBR.cpp IOWrapper.cpp -lmbrgrid_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS) diff --git a/moses/src/LM/Implementation.cpp b/moses/src/LM/Implementation.cpp index f649eef06..589ed375a 100644 --- a/moses/src/LM/Implementation.cpp +++ b/moses/src/LM/Implementation.cpp @@ -100,7 +100,7 @@ void LanguageModelImplementation::CalcScore(const Phrase &phrase, float &fullSco if (word == GetSentenceStartArray()) { // do nothing, don't include prob for unigram if (currPos != 0) { - std::cerr << "Your data contains in a position other than the first word." << std::endl; + std::cerr << "Either your data contains in a position other than the first word or your language model is missing . Did you build your ARPA using IRSTLM and forget to run add-start-end.sh?" << std::endl; abort(); } } else { diff --git a/moses/src/LM/Jamfile b/moses/src/LM/Jamfile index 99e7a5783..345194aa8 100644 --- a/moses/src/LM/Jamfile +++ b/moses/src/LM/Jamfile @@ -6,7 +6,7 @@ if $(with-irstlm) != "" lib irstlm : : $(with-irstlm)/lib ; obj IRST.o : IRST.cpp ..//headers : $(with-irstlm)/include ; alias irst : IRST.o irstlm : : : LM_IRST ; - echo "" ; + echo "Forcing single-threaded build because of IRSTLM." ; echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ; echo "!!! You are linking the IRSTLM library; be sure the release is >= 5.70.02 !!!" ; echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ; diff --git a/moses/src/LM/Ken.cpp b/moses/src/LM/Ken.cpp index b8a78af19..9aa4d2788 100644 --- a/moses/src/LM/Ken.cpp +++ b/moses/src/LM/Ken.cpp @@ -198,7 +198,7 @@ template void LanguageModelKen::CalcScore(const Phrase &phr } else { lm::WordIndex index = TranslateID(word); if (index == m_ngram->GetVocabulary().BeginSentence()) { - std::cerr << "Your data contains in a position other than the first word." << std::endl; + std::cerr << "Either your data contains in a position other than the first word or your language model is missing . Did you build your ARPA using IRSTLM and forget to run add-start-end.sh?" << std::endl; abort(); } float score = TransformLMScore(m_ngram->Score(*state0, index, *state1)); diff --git a/moses/src/Makefile.am b/moses/src/Makefile.am deleted file mode 100644 index 96568da5b..000000000 --- a/moses/src/Makefile.am +++ /dev/null @@ -1,335 +0,0 @@ -lib_LTLIBRARIES = libmoses.la -AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS) - -libmoses_ladir = ${includedir} - -libmoses_la_HEADERS = \ - AlignmentInfo.h \ - AlignmentInfoCollection.h \ - BilingualDynSuffixArray.h \ - BitmapContainer.h \ - CellCollection.h \ - ChartCell.h \ - ChartCellCollection.h \ - ChartHypothesis.h \ - ChartHypothesisCollection.h \ - ChartManager.h \ - ChartRuleLookupManager.h \ - ChartRuleLookupManagerMemory.h \ - ChartRuleLookupManagerOnDisk.h \ - ChartTranslationOption.h \ - ChartTranslationOptionCollection.h \ - ChartTranslationOptionList.h \ - ChartTrellisDetour.h \ - ChartTrellisDetourQueue.h \ - ChartTrellisNode.h \ - ChartTrellisPath.h \ - ChartTrellisPathList.h \ - ConfusionNet.h \ - DecodeFeature.h \ - DecodeGraph.h \ - DecodeStep.h \ - DecodeStepGeneration.h \ - DecodeStepTranslation.h \ - Dictionary.h \ - DotChart.h \ - DotChartInMemory.h \ - DotChartOnDisk.h \ - DummyScoreProducers.h \ - DynSAInclude/file.h \ - DynSAInclude/vocab.h \ - DynSuffixArray.h \ - FFState.h \ - Factor.h \ - FactorCollection.h \ - FactorTypeSet.h \ - FeatureFunction.h \ - File.h \ - FilePtr.h \ - FloydWarshall.h \ - GenerationDictionary.h \ - GlobalLexicalModel.h \ - gzfilebuf.h \ - hash.h \ - Hypothesis.h \ - HypothesisStack.h \ - HypothesisStackCubePruning.h \ - HypothesisStackNormal.h \ - InputFileStream.h \ - InputType.h \ - LMList.h \ - LVoc.h \ - LM/Base.h \ - LM/Joint.h \ - LM/Factory.h \ - LM/Implementation.h \ - LM/MultiFactor.h \ - LM/Remote.h \ - LM/SingleFactor.h \ - LM/Ken.h \ - LexicalReordering.h \ - LexicalReorderingState.h \ - LexicalReorderingTable.h \ - Manager.h \ - NonTerminal.h \ - ObjectPool.h \ - PCNTools.h \ - PDTAimp.h \ - Parameter.h \ - PartialTranslOptColl.h \ - Phrase.h \ - PhraseDictionary.h \ - PhraseDictionaryALSuffixArray.h \ - PhraseDictionaryDynSuffixArray.h \ - PhraseDictionaryMemory.h \ - PhraseDictionarySCFG.h \ - PhraseDictionaryNode.h \ - PhraseDictionaryNodeSCFG.h \ - PhraseDictionaryOnDisk.h \ - PhraseDictionaryTree.h \ - PhraseDictionaryTreeAdaptor.h \ - PrefixTree.h \ - PrefixTreeMap.h \ - ReorderingConstraint.h \ - ReorderingStack.h \ - RuleCube.h \ - RuleCubeItem.h \ - RuleCubeQueue.h \ - RuleTableLoader.h \ - RuleTableLoaderCompact.h \ - RuleTableLoaderFactory.h \ - RuleTableLoaderHiero.h \ - RuleTableLoaderStandard.h \ - ScoreComponentCollection.h \ - ScoreIndexManager.h \ - ScoreProducer.h \ - Search.h \ - SearchCubePruning.h \ - SearchNormal.h \ - Sentence.h \ - SentenceStats.h \ - SquareMatrix.h \ - StaticData.h \ - TargetPhrase.h \ - TargetPhraseCollection.h \ - ThreadPool.h \ - Timer.h \ - TranslationOption.h \ - TranslationOptionCollection.h \ - TranslationOptionCollectionConfusionNet.h \ - TranslationOptionCollectionText.h \ - TranslationOptionList.h \ - TranslationSystem.h \ - TreeInput.h \ - TrellisPath.h \ - TrellisPathCollection.h \ - TrellisPathList.h \ - TypeDef.h \ - UniqueObject.h \ - UserMessage.h \ - Util.h \ - Word.h \ - WordLattice.h \ - WordsBitmap.h \ - WordsRange.h \ - XmlOption.h - -if PROTOBUF -libmoses_la_HEADERS += rule.pb.h hypergraph.pb.h -endif - -if SRI_LM -libmoses_la_HEADERS += LM/SRI.h \ - LM/ParallelBackoff.h -endif - -if IRST_LM -libmoses_la_HEADERS += LM/IRST.h -endif - -if RAND_LM -libmoses_la_HEADERS += LM/Rand.h -endif - -if ORLM_LM -libmoses_la_HEADERS += LM/ORLM.h \ - DynSAInclude/params.h \ - DynSAInclude/hash.h \ - DynSAInclude/quantizer.h \ - DynSAInclude/RandLMFilter.h \ - DynSAInclude/RandLMCache.h -endif - -if SYN_LM -libmoses_la_HEADERS += SyntacticLanguageModel.h -endif - -libmoses_la_SOURCES = \ - AlignmentInfo.cpp \ - AlignmentInfoCollection.cpp \ - BilingualDynSuffixArray.cpp \ - BitmapContainer.cpp \ - ChartCell.cpp \ - ChartCellCollection.cpp \ - ChartHypothesis.cpp \ - ChartHypothesisCollection.cpp \ - ChartManager.cpp \ - ChartRuleLookupManager.cpp \ - ChartRuleLookupManagerMemory.cpp \ - ChartRuleLookupManagerOnDisk.cpp \ - ChartTranslationOption.cpp \ - ChartTranslationOptionCollection.cpp \ - ChartTranslationOptionList.cpp \ - ChartTrellisDetour.cpp \ - ChartTrellisDetourQueue.cpp \ - ChartTrellisNode.cpp \ - ChartTrellisPath.cpp \ - ConfusionNet.cpp \ - DecodeFeature.cpp \ - DecodeGraph.cpp \ - DecodeStep.cpp \ - DecodeStepGeneration.cpp \ - DecodeStepTranslation.cpp \ - Dictionary.cpp \ - DotChart.cpp \ - DotChartInMemory.cpp \ - DotChartOnDisk.cpp \ - DummyScoreProducers.cpp \ - DynSAInclude/file.cpp \ - DynSAInclude/vocab.cpp \ - DynSuffixArray.cpp \ - FFState.cpp \ - Factor.cpp \ - FactorCollection.cpp \ - FactorTypeSet.cpp \ - FeatureFunction.cpp \ - FloydWarshall.cpp \ - GenerationDictionary.cpp \ - GlobalLexicalModel.cpp \ - hash.cpp \ - Hypothesis.cpp \ - HypothesisStack.cpp \ - HypothesisStackCubePruning.cpp \ - HypothesisStackNormal.cpp \ - InputFileStream.cpp \ - InputType.cpp \ - LMList.cpp \ - LVoc.cpp \ - LM/Base.cpp \ - LM/Factory.cpp \ - LM/Implementation.cpp \ - LM/Joint.cpp \ - LM/Ken.cpp \ - LM/MultiFactor.cpp \ - LM/Remote.cpp \ - LM/SingleFactor.cpp \ - LexicalReordering.cpp \ - LexicalReorderingState.cpp \ - LexicalReorderingTable.cpp \ - Manager.cpp \ - PCNTools.cpp \ - Parameter.cpp \ - PartialTranslOptColl.cpp \ - Phrase.cpp \ - PhraseDictionary.cpp \ - PhraseDictionaryALSuffixArray.cpp \ - PhraseDictionaryDynSuffixArray.cpp \ - PhraseDictionaryHiero.cpp \ - PhraseDictionaryMemory.cpp \ - PhraseDictionarySCFG.cpp \ - PhraseDictionaryNode.cpp \ - PhraseDictionaryNodeSCFG.cpp \ - PhraseDictionaryOnDisk.cpp \ - PhraseDictionaryTree.cpp \ - PhraseDictionaryTreeAdaptor.cpp \ - PrefixTreeMap.cpp \ - ReorderingConstraint.cpp \ - ReorderingStack.cpp \ - RuleCube.cpp \ - RuleCubeItem.cpp \ - RuleCubeQueue.cpp \ - RuleTableLoaderCompact.cpp \ - RuleTableLoaderFactory.cpp \ - RuleTableLoaderHiero.cpp \ - RuleTableLoaderStandard.cpp \ - ScoreComponentCollection.cpp \ - ScoreIndexManager.cpp \ - ScoreProducer.cpp \ - Search.cpp \ - SearchCubePruning.cpp \ - SearchNormal.cpp \ - Sentence.cpp \ - SentenceStats.cpp \ - SquareMatrix.cpp \ - StaticData.cpp \ - TargetPhrase.cpp \ - TargetPhraseCollection.cpp \ - ThreadPool.cpp \ - Timer.cpp \ - TranslationOption.cpp \ - TranslationOptionCollection.cpp \ - TranslationOptionCollectionConfusionNet.cpp \ - TranslationOptionCollectionText.cpp \ - TranslationOptionList.cpp \ - TranslationSystem.cpp \ - TreeInput.cpp \ - TrellisPath.cpp \ - TrellisPathCollection.cpp \ - UserMessage.cpp \ - Util.cpp \ - Word.cpp \ - WordLattice.cpp \ - WordsBitmap.cpp \ - WordsRange.cpp \ - XmlOption.cpp - -if PROTOBUF -BUILT_SOURCES = \ - rule.pb.h \ - rule.pb.cc \ - hypergraph.pb.h \ - hypergraph.pb.cc - -CLEANFILES = $(BUILT_SOURCES) -SUFFIXES = .proto - -rule.pb.cc: rule.proto - @PROTOC@ --cpp_out=. $< -rule.pb.h: rule.proto - @PROTOC@ --cpp_out=. $< - -hypergraph.pb.cc: hypergraph.proto - @PROTOC@ --cpp_out=. $< -hypergraph.pb.h: hypergraph.proto - @PROTOC@ --cpp_out=. $< - -libmoses_la_SOURCES += rule.pb.cc hypergraph.pb.cc - -endif - -if SRI_LM -libmoses_la_SOURCES += LM/SRI.cpp \ - LM/ParallelBackoff.cpp - -endif - -if IRST_LM -libmoses_la_SOURCES += LM/IRST.cpp -endif - -if RAND_LM -libmoses_la_SOURCES += LM/Rand.cpp -endif - -if ORLM_LM -libmoses_la_SOURCES += LM/ORLM.cpp \ - DynSAInclude/onlineRLM.h \ - DynSAInclude/perfecthash.h \ - DynSAInclude/params.cpp -endif - -if SYN_LM -libmoses_la_SOURCES += SyntacticLanguageModel.cpp -endif - -libmoses_la_LIBADD = $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS) diff --git a/regenerate-makefiles.sh b/regenerate-makefiles.sh deleted file mode 100755 index 8470e3f1e..000000000 --- a/regenerate-makefiles.sh +++ /dev/null @@ -1,109 +0,0 @@ -#!/bin/bash -cat <=2.60 of autoconf -# And version >=1.4.7 of m4 - -# For Mac OSX users: -# Standard distribution usually includes versions 1.6. -# Get versions 1.9 or higher -# Set the following variable to the correct paths -#ACLOCAL="/path/to/aclocal-1.9" -#AUTOMAKE="/path/to/automake-1.9" - -function die () { - echo "$@" >&2 - - # Try to be as helpful as possible by detecting OS and making recommendations - if (( $(lsb_release -a | fgrep -ci "ubuntu") > 0 )); then - echo >&2 - echo >&2 "Need to install build autotools on Ubuntu? Use:" - echo >&2 "sudo aptitude install autoconf automake libtool build-essential" - fi - if (( $(uname -a | fgrep -ci "darwin") > 0 )); then - echo >&2 - echo >&2 "Having problems on Mac OSX?" - echo >&2 "You might have an old version of aclocal/automake. You'll need to upgrade these." - fi - exit 1 -} - -if [ -z "$ACLOCAL" ]; then - ACLOCAL=`which aclocal` - [ -n "$ACLOCAL" ] || die "aclocal not found on your system. Please install it or set $ACLOCAL" -fi - -if [ -z "$AUTOMAKE" ]; then - AUTOMAKE=`which automake` - [ -n "$AUTOMAKE" ] || die "automake not found on your system. Please install it or set $AUTOMAKE" -fi - -if [ -z "$AUTOCONF" ]; then - AUTOCONF=`which autoconf` - [ -n "$AUTOCONF" ] || die "autoconf not found on your system. Please install it or set $AUTOCONF" -fi - -if [ -z "$LIBTOOLIZE" ]; then - LIBTOOLIZE=`which libtoolize` - - if [ -z "$LIBTOOLIZE" ]; then - LIBTOOLIZE=`which glibtoolize` - fi - - [ -n "$LIBTOOLIZE" ] || die "libtoolize/glibtoolize not found on your system. Please install it or set $LIBTOOLIZE" -fi - -echo >&2 "Detected aclocal: $($ACLOCAL --version | head -n1)" -echo >&2 "Detected autoconf: $($AUTOCONF --version | head -n1)" -echo >&2 "Detected automake: $($AUTOMAKE --version | head -n1)" -echo >&2 "Detected libtoolize: $($LIBTOOLIZE --version | head -n1)" - -echo "Calling $ACLOCAL -I m4..." -$ACLOCAL -I m4 || die "aclocal failed" - -echo "Calling $AUTOCONF..." -$AUTOCONF || die "autoconf failed" - -echo "Calling $LIBTOOLIZE" -$LIBTOOLIZE || die "libtoolize failed" - -echo "Calling $AUTOMAKE --add-missing..." -$AUTOMAKE --add-missing || die "automake failed" - -case `uname -s` in - Darwin) - cores=$(sysctl -n hw.ncpu) - ;; - Linux) - cores=$(cat /proc/cpuinfo | fgrep -c processor) - ;; - *) - echo "Unknown platform." - cores= - ;; -esac - -if [ -z "$cores" ]; then - cores=2 # assume 2 cores if we can't figure it out - echo >&2 "Assuming 2 cores" -else - echo >&2 "Detected $cores cores" -fi - -echo -echo "You should now be able to configure and build:" -echo " ./configure [--with-srilm=/path/to/srilm] [--with-irstlm=/path/to/irstlm] [--with-randlm=/path/to/randlm] [--with-synlm] [--with-xmlrpc-c=/path/to/xmlrpc-c-config]" -echo " make -j ${cores}" -echo - diff --git a/scripts/.cvsignore b/scripts/.cvsignore deleted file mode 100644 index 0d20b6487..000000000 --- a/scripts/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -*.pyc diff --git a/scripts/Jamfile b/scripts/Jamfile index bfc9c65f4..48535faa3 100644 --- a/scripts/Jamfile +++ b/scripts/Jamfile @@ -1,6 +1,7 @@ #See ../Jamroot for options. import option ; +build-project ems/biconcor ; build-project training ; with-giza = [ option.get "with-giza" ] ; @@ -37,91 +38,21 @@ if $(location) { install ghkm : training/phrase-extract/extract-ghkm//extract-ghkm : $(location)/training/phrase-extract/extract-ghkm/tools ; install compactify : training/compact-rule-table//compactify : $(location)/training/compact-rule-table/tools ; - install phrase-extract : training/phrase-extract//released-programs : $(location)/training/phrase-extract ; + install phrase-extract : training/phrase-extract//programs : $(location)/training/phrase-extract ; install lexical-reordering : training/lexical-reordering//score : $(location)/training/lexical-reordering ; - install symal : training/symal//symal : $(location)/symal ; + install symal : training/symal//symal : $(location)/training/symal ; + + install biconcor : ems/biconcor//biconcor : $(location)/ems/biconcor ; if $(WITH-GIZA) != no { install train-model : training//train-model.perl : $(location)/training ; - } else { - alias train-model ; } install scripts : - analysis/README - analysis/sentence-by-sentence.pl + [ glob-tree README *.js *.pl *.perl *.pm *.py *.sh *.php : tests regression-testing other bin train_model.perl ] + [ glob tokenizer/nonbreaking_prefixes/* ems/example/*.* ems/example/data/* ems/web/* analysis/smtgui/* : ems/web/javascripts ] + generic/fsa-sample.fsa ems/experiment.machines ems/experiment.meta - ems/experiment.perl - ems/example/config.basic - ems/example/config.factored - ems/example/config.hierarchical - ems/example/config.syntax - ems/example/config.toy - ems/example/data/nc-5k.en - ems/example/data/nc-5k.fr - ems/example/data/test-ref.en.sgm - ems/example/data/test-src.fr.sgm - ems/support/analysis.perl - ems/support/berkeley-process.sh - ems/support/berkeley-train.sh - ems/support/consolidate-training-data.perl - ems/support/generic-multicore-parallelizer.perl - ems/support/generic-parallelizer.perl - ems/support/input-from-sgm.perl - ems/support/interpolate-lm.perl - ems/support/reference-from-sgm.perl - ems/support/remove-segmenation-markup.perl - ems/support/report-experiment-scores.perl - ems/support/reuse-weights.perl - ems/support/run-command-on-multiple-refsets.perl - ems/support/wrap-xml.perl - ems/web/analysis.php - ems/web/analysis_diff.php - ems/web/comment.php - ems/web/diff.php - ems/web/index.php - ems/web/lib.php - ems/web/overview.php - ems/web/setup - ems/web/javascripts/builder.js - ems/web/javascripts/controls.js - ems/web/javascripts/dragdrop.js - ems/web/javascripts/effects.js - ems/web/javascripts/prototype.js - ems/web/javascripts/scriptaculous.js - ems/web/javascripts/slider.js - ems/web/javascripts/sound.js - ems/web/javascripts/unittest.js - generic/compound-splitter.perl - generic/extract-factors.pl - generic/lopar2pos.pl - generic/moses-parallel.pl - generic/mteval-v12.pl - generic/multi-bleu.perl - generic/qsub-wrapper.pl - README - [ glob tokenizer/*.perl tokenizer/nonbreaking_prefixes/* ] - training/absolutize_moses_model.pl - training/build-generation-table.perl - training/clean-corpus-n.perl - training/clone_moses_model.pl - training/filter-model-given-input.pl - training/filter-rule-table.py - training/zmert-moses.pl - training/mert-moses.pl - training/mert-moses-multi.pl - training/postprocess-lopar.perl - training/reduce_combine.pl - training/combine_factors.pl - training/symal/giza2bal.pl - training/wrappers/parse-de-bitpar.perl - training/wrappers/parse-en-collins.perl - training/wrappers/make-factor-en-pos.mxpost.perl - training/wrappers/make-factor-pos.tree-tagger.perl - training/wrappers/make-factor-stem.perl - [ glob recaser/*.perl ] : . $(location) ; - - alias install : ghkm compactify phrase-extract lexical-reordering symal scripts train-model ; } diff --git a/scripts/ems/biconcor/Jamfile b/scripts/ems/biconcor/Jamfile index 800a975ea..003193067 100644 --- a/scripts/ems/biconcor/Jamfile +++ b/scripts/ems/biconcor/Jamfile @@ -1,3 +1,3 @@ -exe biconcur : Vocabulary.cpp SuffixArray.cpp TargetCorpus.cpp Alignment.cpp Mismatch.cpp PhrasePair.cpp PhrasePairCollection.cpp biconcor.cpp base64.cpp ; +exe biconcor : Vocabulary.cpp SuffixArray.cpp TargetCorpus.cpp Alignment.cpp Mismatch.cpp PhrasePair.cpp PhrasePairCollection.cpp biconcor.cpp base64.cpp ; -install legacy : biconcur : . ; +install legacy : biconcor : . ; diff --git a/scripts/recaser/train-recaser.perl b/scripts/recaser/train-recaser.perl index 8a2b17ede..a5a707554 100755 --- a/scripts/recaser/train-recaser.perl +++ b/scripts/recaser/train-recaser.perl @@ -8,7 +8,7 @@ binmode(STDIN, ":utf8"); binmode(STDOUT, ":utf8"); # apply switches -my ($DIR,$CORPUS,$SCRIPTS_ROOT_DIR,$CONFIG); +my ($DIR,$CORPUS,$SCRIPTS_ROOT_DIR,$CONFIG,$HELP,$ERROR); my $LM = "SRILM"; # SRILM is default. my $BUILD_LM = "build-lm.sh"; my $NGRAM_COUNT = "ngram-count"; @@ -16,24 +16,66 @@ my $TRAIN_SCRIPT = "train-factored-phrase-model.perl"; my $MAX_LEN = 1; my $FIRST_STEP = 1; my $LAST_STEP = 11; -die("train-recaser.perl --dir recaser --corpus cased") +$ERROR = "training Aborted." unless &GetOptions('first-step=i' => \$FIRST_STEP, 'last-step=i' => \$LAST_STEP, 'corpus=s' => \$CORPUS, 'config=s' => \$CONFIG, - 'dir=s' => \$DIR, - 'ngram-count=s' => \$NGRAM_COUNT, - 'build-lm=s' => \$BUILD_LM, - 'lm=s' => \$LM, - 'train-script=s' => \$TRAIN_SCRIPT, - 'scripts-root-dir=s' => \$SCRIPTS_ROOT_DIR, - 'max-len=i' => \$MAX_LEN); + 'dir=s' => \$DIR, + 'ngram-count=s' => \$NGRAM_COUNT, + 'build-lm=s' => \$BUILD_LM, + 'lm=s' => \$LM, + 'train-script=s' => \$TRAIN_SCRIPT, + 'scripts-root-dir=s' => \$SCRIPTS_ROOT_DIR, + 'max-len=i' => \$MAX_LEN, + 'help' => \$HELP); # check and set default to unset parameters -die("please specify working dir --dir") unless defined($DIR); -die("please specify --corpus") if !defined($CORPUS) +$ERROR = "please specify working dir --dir" unless defined($DIR) || defined($HELP); +$ERROR = "please specify --corpus" if !defined($CORPUS) && !defined($HELP) && $FIRST_STEP <= 2 && $LAST_STEP >= 1; +if ($HELP || $ERROR) { + if ($ERROR) { + print STDERR "ERROR: " . $ERROR . "\n"; + } + print STDERR "Usage: $0 --dir /output/recaser --corpus /Cased/corpus/files [options ...]"; + + print STDERR "\n\nOptions: + == MANDATORY == + --dir=dir ... outputted recaser directory. + --corpus=file ... inputted cased corpus. + + == OPTIONAL == + = Recaser Training configuration = + --train-script=file ... path to the train script (default: train-factored-phrase-model.perl in \$PATH). + --config=config ... training script configuration. + --scripts-root-dir=dir ... scripts directory. + --max-len=int ... max phrase length (default: 1). + + = Language Model Training configuration = + --lm=[IRSTLM,SRILM] ... language model (default: SRILM). + --build-lm=file ... path to build-lm.sh if not in \$PATH (used only with --lm=IRSTLM). + --ngram-count=file ... path to ngram-count.sh if not in \$PATH (used only with --lm=SRILM). + + = Steps this script will perform = + (1) Truecasing (disabled); + (2) Language Model Training; + (3) Data Preparation + (4-10) Recaser Model Training; + (11) Cleanup. + --first-step=[1-11] ... step where script starts (default: 1). + --last-step=[1-11] ... step where script ends (default: 11). + + --help ... this usage output.\n"; + if ($ERROR) { + exit(1); + } + else { + exit(0); + } +} + # main loop `mkdir -p $DIR`; &truecase() if 0 && $FIRST_STEP == 1; @@ -60,7 +102,7 @@ sub train_lm { } print STDERR "** Using $LM **" . "\n"; print STDERR $cmd."\n"; - print STDERR `$cmd`; + system($cmd) == 0 || die("Language model training failed with error " . ($? >> 8) . "\n"); } sub prepare_data { @@ -110,12 +152,18 @@ sub train_recase_model { $cmd .= " -scripts-root-dir $SCRIPTS_ROOT_DIR" if $SCRIPTS_ROOT_DIR; $cmd .= " -config $CONFIG" if $CONFIG; print STDERR $cmd."\n"; - print STDERR `$cmd`; + system($cmd) == 0 || die("Recaser model training failed with error " . ($? >> 8) . "\n"); } sub cleanup { print STDERR "\n(11) Cleaning up @ ".`date`; `rm -f $DIR/extract*`; + my $clean_1 = $?; `rm -f $DIR/aligned*`; + my $clean_2 = $?; `rm -f $DIR/lex*`; + my $clean_3 = $?; + if ($clean_1 + $clean_2 + $clean_3 != 0) { + print STDERR "Training successful but some files could not be cleaned.\n"; + } } diff --git a/scripts/tokenizer/nonbreaking_prefixes/README.txt b/scripts/tokenizer/nonbreaking_prefixes/README.txt new file mode 100644 index 000000000..02cdfccb9 --- /dev/null +++ b/scripts/tokenizer/nonbreaking_prefixes/README.txt @@ -0,0 +1,5 @@ +The language suffix can be found here: + +http://www.loc.gov/standards/iso639-2/php/code_list.php + + diff --git a/scripts/training/phrase-extract/Jamfile b/scripts/training/phrase-extract/Jamfile index 9d229a22f..369d8cc00 100644 --- a/scripts/training/phrase-extract/Jamfile +++ b/scripts/training/phrase-extract/Jamfile @@ -19,9 +19,7 @@ exe relax-parse : tables-core.cpp SyntaxTree.cpp XmlTree.cpp relax-parse.cpp ; exe statistics : tables-core.cpp AlignmentPhrase.cpp statistics.cpp InputFileStream ; -alias released-programs : extract extract-rules score consolidate ; - -alias programs : extract extract-rules extract-lex score consolidate consolidate-direct consolidate-direct consolidate-reverse relax-parse statistics ; +alias programs : extract extract-rules extract-lex score consolidate consolidate-direct consolidate-reverse relax-parse statistics ; install legacy : programs : . EXE ; diff --git a/util/Makefile.am b/util/Makefile.am deleted file mode 100644 index c567793ff..000000000 --- a/util/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ -lib_LTLIBRARIES = libkenutil.la - -AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS) - -libkenutil_la_SOURCES = \ - bit_packing.cc \ - ersatz_progress.cc \ - exception.cc \ - file.cc \ - file_piece.cc \ - murmur_hash.cc \ - mmap.cc