mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-05 02:22:21 +03:00
Merge branch 'master' of github.com:moses-smt/mosesdecoder
This commit is contained in:
commit
8c1c17e9f1
57
.gitignore
vendored
57
.gitignore
vendored
@ -1,65 +1,30 @@
|
||||
*.[oa]
|
||||
*.la
|
||||
*.lo
|
||||
*.Po
|
||||
*.so
|
||||
*.a
|
||||
*.swp
|
||||
*~
|
||||
CreateOnDisk/src/CreateOnDiskPt
|
||||
Makefile
|
||||
Makefile.in
|
||||
aclocal.m4
|
||||
autom4te.cache/
|
||||
config.h
|
||||
config.log
|
||||
config.guess
|
||||
config.status
|
||||
config.sub
|
||||
configure
|
||||
depcomp
|
||||
dist*
|
||||
install-sh
|
||||
jam-files/bjam
|
||||
jam-files/engine/bootstrap
|
||||
jam-files/engine/bin.*
|
||||
lm/.deps/
|
||||
lm/.libs/
|
||||
util/.deps/
|
||||
util/.libs/
|
||||
lm/build_binary
|
||||
lm/query
|
||||
libtool
|
||||
mert/.deps/
|
||||
mert/Makefile
|
||||
mert/Makefile.in
|
||||
mert/evaluator
|
||||
mert/extractor
|
||||
mert/mert
|
||||
mert/megam_i686.opt
|
||||
mert/pro
|
||||
misc/.deps/
|
||||
mert/.libs/
|
||||
misc/Makefile
|
||||
misc/Makefile.in
|
||||
misc/processLexicalTable
|
||||
misc/processPhraseTable
|
||||
misc/queryLexicalTable
|
||||
misc/queryPhraseTable
|
||||
moses-chart/src/.deps/
|
||||
moses-chart-cmd/src/moses_chart
|
||||
moses-cmd/src/.deps/
|
||||
moses-cmd/src/Makefile
|
||||
moses-cmd/src/Makefile.in
|
||||
moses-cmd/src/checkplf
|
||||
moses-cmd/src/lmbrgrid
|
||||
moses-cmd/src/moses
|
||||
moses/src/.deps/
|
||||
moses/src/.libs/
|
||||
moses/src/Makefile
|
||||
moses/src/Makefile.in
|
||||
regression-testing/moses-reg-test-data-*
|
||||
regression-testing/tests/mert.extractor-bin/FEATSTAT*
|
||||
regression-testing/tests/mert.extractor-bin/SCORESTAT*
|
||||
scripts-2*
|
||||
scripts/ems/biconcor/biconcor
|
||||
scripts/release-exclude
|
||||
scripts/training/cmert-0.5/mert
|
||||
@ -73,17 +38,6 @@ scripts/training/phrase-extract/consolidate
|
||||
scripts/training/phrase-extract/consolidate-direct
|
||||
scripts/training/phrase-extract/consolidate-reverse
|
||||
scripts/training/phrase-extract/extract
|
||||
scripts/training/phrase-extract/extract-ghkm/config.guess
|
||||
scripts/training/phrase-extract/extract-ghkm/config.h.in
|
||||
scripts/training/phrase-extract/extract-ghkm/config.sub
|
||||
scripts/training/phrase-extract/extract-ghkm/depcomp
|
||||
scripts/training/phrase-extract/extract-ghkm/install-sh
|
||||
scripts/training/phrase-extract/extract-ghkm/m4/libtool.m4
|
||||
scripts/training/phrase-extract/extract-ghkm/m4/ltoptions.m4
|
||||
scripts/training/phrase-extract/extract-ghkm/m4/ltsugar.m4
|
||||
scripts/training/phrase-extract/extract-ghkm/m4/ltversion.m4
|
||||
scripts/training/phrase-extract/extract-ghkm/m4/lt~obsolete.m4
|
||||
scripts/training/phrase-extract/extract-ghkm/missing
|
||||
scripts/training/phrase-extract/extract-ghkm/tools/extract-ghkm
|
||||
scripts/training/phrase-extract/extract-lex
|
||||
scripts/training/phrase-extract/extract-rules
|
||||
@ -92,12 +46,5 @@ scripts/training/phrase-extract/score
|
||||
scripts/training/phrase-extract/statistics
|
||||
scripts/training/symal/symal
|
||||
scripts/training/train-model.perl
|
||||
stamp-h1
|
||||
ltmain.sh
|
||||
m4/libtool.m4
|
||||
m4/ltoptions.m4
|
||||
m4/ltsugar.m4
|
||||
m4/ltversion.m4
|
||||
m4/lt~obsolete.m4
|
||||
dist
|
||||
bin
|
||||
|
@ -1,64 +1,109 @@
|
||||
0) Preliminaries
|
||||
PRELIMINARIES
|
||||
|
||||
Before building you need to decide what language model toolkit (SRI's,
|
||||
IRST's, or Ken's) you want to use.
|
||||
Moses is primarily targeted at gcc on UNIX.
|
||||
|
||||
If you want to use SRI's, you will need to download its source
|
||||
and build it. The SRILM can be downloaded from
|
||||
Moses requires gcc, Boost >= 1.36, and zlib including the headers that some
|
||||
distributions package separately (i.e. -dev or -devel packages). Source is
|
||||
available at http://boost.org .
|
||||
|
||||
There are several optional dependencies:
|
||||
|
||||
GIZA++ from http://code.google.com/p/giza-pp/ is used to build phrase tables.
|
||||
|
||||
Moses server requires xmlrpc-c with abyss-server. Source is available from
|
||||
http://xmlrpc-c.sourceforge.net/.
|
||||
|
||||
The scripts support building ARPA format language models with SRILM or IRSTLM.
|
||||
To apply models inside the decoder, you can use SRILM, IRSTLM, or KenLM. The
|
||||
ARPA format is exchangable so that e.g. you can build a model with SRILM and
|
||||
run the decoder with IRSTLM or KenLM.
|
||||
|
||||
If you want to use SRILM, you will need to download its source and build it.
|
||||
The SRILM can be downloaded from
|
||||
http://www.speech.sri.com/projects/srilm/download.html .
|
||||
On x86_64, the default machine type is broken. Edit sbin/machine-type, find
|
||||
this code
|
||||
else if (`uname -m` == x86_64) then
|
||||
set MACHINE_TYPE = i686
|
||||
and change it to
|
||||
else if (`uname -m` == x86_64) then
|
||||
set MACHINE_TYPE = i686-m64
|
||||
You may have to chmod +w sbin/machine-type first.
|
||||
|
||||
|
||||
If you want to use IRST's, you will need to download its source and
|
||||
build it. The IRSTLM can be downloaded from either the SourceForge
|
||||
website
|
||||
If you want to use IRSTLM, you will need to download its source and build it.
|
||||
The IRSTLM can be downloaded from either the SourceForge website
|
||||
http://sourceforge.net/projects/irstlm
|
||||
or the official IRSTLM website
|
||||
http://hlt.fbk.eu/en/irstlm
|
||||
|
||||
|
||||
Ken's LM is included with the Moses distribution.
|
||||
KenLM is included with Moses.
|
||||
|
||||
--------------------------------------------------------------------------
|
||||
|
||||
1) Instructions for building with SRILM
|
||||
ADVICE ON INSTALLING EXTERNAL LIBRARIES
|
||||
|
||||
Build SRILM according to their release instructions. Make sure that
|
||||
you DO NOT override the MACHINE_TYPE variable on the command line when
|
||||
you do so, as this can lead to problems locating the library.
|
||||
Generally, for trouble installing external libraries, you should get support
|
||||
directly from the library maker:
|
||||
|
||||
./bjam [--with-srilm=/path/to/srilm]
|
||||
Boost: http://www.boost.org/doc/libs/1_48_0/more/getting_started/unix-variants.html
|
||||
IRSTLM: https://list.fbk.eu/sympa/subscribe/user-irstlm
|
||||
SRILM: http://www.speech.sri.com/projects/srilm/#srilm-user
|
||||
|
||||
However, here's some general advice on installing software (for bash users):
|
||||
|
||||
#Determine where you want to install packages
|
||||
PREFIX=$HOME/usr
|
||||
#If your system has lib64 directories, lib64 should be used AND NOT lib
|
||||
if [ -d /lib64 ]; then
|
||||
LIBDIR=$PREFIX/lib64
|
||||
else
|
||||
LIBDIR=$PREFIX/lib
|
||||
fi
|
||||
#If you're installing to a non-standard path, tell programs where to find things:
|
||||
export PATH=$PREFIX/bin${PATH:+:$PATH}
|
||||
export LD_LIBRARY_PATH=$LIBDIR${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
|
||||
export LIBRARY_PATH=$LIBDIR${LIBRARY_PATH:+:$LIBRARY_PATH}
|
||||
export CPATH=$PREFIX/include${CPATH:+:$CPATH}
|
||||
|
||||
Add all the above code to your .bashrc or .bash_login as appropriate. Then
|
||||
you're ready to install packages in non-standard paths:
|
||||
|
||||
#For autotools packages e.g. xmlrpc-c
|
||||
./configure --prefix=$PREFIX --libdir=$PREFIX/lib64 [other options here]
|
||||
|
||||
#For Boost:
|
||||
./bootstrap.sh
|
||||
./b2 --prefix=$PREFIX --libdir=$PREFIX/lib64 link=static,shared threading=multi install
|
||||
|
||||
--------------------------------------------------------------------------
|
||||
|
||||
2) Instructions for building with IRSTLM
|
||||
BUILDING
|
||||
|
||||
Build IRSTLM according to its release instructions.
|
||||
Building consists of running
|
||||
./bjam [options]
|
||||
|
||||
./bjam [--with-irstlm=/path/to/irstlm]
|
||||
Common options are:
|
||||
--with-srilm=/path/to/srilm to compile the decoder with SRILM support
|
||||
--with-irstlm=/path/to/irstlm to compile the decoder with IRSTLM support
|
||||
--with-giza=/path/to/giza to enable training scripts
|
||||
-jN where N is the number of CPUs
|
||||
|
||||
Binaries will appear in dist/bin.
|
||||
|
||||
For further documentation, run
|
||||
./bjam --help
|
||||
|
||||
--------------------------------------------------------------------------
|
||||
|
||||
3) Instructions for building with Ken's LM
|
||||
|
||||
./bjam
|
||||
|
||||
--------------------------------------------------------------------------
|
||||
|
||||
|
||||
ALTERNATIVE WAYS TO BUILD ON UNIX AND OTHER PLATFORMS
|
||||
|
||||
Microsoft Windows
|
||||
-----------------
|
||||
Tested on 32-bit Windows XP and Vista using Visual Studio 2005.
|
||||
Again, refer to the old manual
|
||||
http://homepages.inf.ed.ac.uk/s0565741/papers/developers-manual.pdf
|
||||
The Windows build doesn't use the SRI or IRST language model libraries as they can't be compiled
|
||||
under Windows using Visual Studio. Instead, an internal language model, which behave like SRILM is used,
|
||||
however, it can only handle up to trigrams.
|
||||
Moses is primarily targeted at gcc on UNIX. Windows users should consult
|
||||
http://ssli.ee.washington.edu/people/amittai/Moses-on-Win7.pdf .
|
||||
|
||||
Binaries for all external libraries needed can be downloaded from
|
||||
http://www.statmt.org/moses/?n=Moses.LibrariesUsed
|
||||
|
||||
Only the decoder is developed and tested under Windows. There are difficulties using the training scripts under Windows, even with Cygwin.
|
||||
|
||||
|
||||
Only the decoder is developed and tested under Windows. There are difficulties
|
||||
using the training scripts under Windows, even with Cygwin.
|
||||
|
@ -1,6 +0,0 @@
|
||||
bin_PROGRAMS = CreateOnDiskPt
|
||||
CreateOnDiskPt_SOURCES = Main.cpp
|
||||
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS)
|
||||
|
||||
CreateOnDiskPt_LDADD = -L$(top_srcdir)/OnDiskPt/src -L$(top_srcdir)/moses/src -lOnDiskPt -lmoses $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
|
||||
CreateOnDiskPt_DEPENDENCIES = $(top_srcdir)/OnDiskPt/src/libOnDiskPt.a $(top_srcdir)/moses/src/libmoses.la
|
26
Jamroot
26
Jamroot
@ -59,12 +59,12 @@
|
||||
path-constant TOP : . ;
|
||||
|
||||
# Shell with trailing line removed http://lists.boost.org/boost-build/2007/08/17051.php
|
||||
rule trim-nl ( str ) {
|
||||
rule trim-nl ( str extras * ) {
|
||||
return [ MATCH "([^
|
||||
]*)" : $(str) ] ;
|
||||
]*)" : $(str) ] $(extras) ;
|
||||
}
|
||||
rule _shell ( cmd ) {
|
||||
return [ trim-nl [ SHELL $(cmd) ] ] ;
|
||||
rule _shell ( cmd : extras * ) {
|
||||
return [ trim-nl [ SHELL $(cmd) : $(extras) ] ] ;
|
||||
}
|
||||
|
||||
import option ;
|
||||
@ -110,7 +110,7 @@ if $(boost-version) < 103600 {
|
||||
exit You have Boost $(boost-version). Moses requires at least 103600 (and preferably newer). : 1 ;
|
||||
}
|
||||
#Are we linking static binaries against shared boost?
|
||||
boost-auto-shared = [ auto_shared "boost_program_options" : L-boost-search ] ;
|
||||
boost-auto-shared = [ auto_shared "boost_program_options" : $(L-boost-search) ] ;
|
||||
#Convenience rule for boost libraries. Defines library boost_$(name).
|
||||
rule boost_lib ( name macro ) {
|
||||
#Link multi-threaded programs against the -mt version if available. Old
|
||||
@ -137,14 +137,20 @@ rule external_lib ( name ) {
|
||||
|
||||
external_lib z ;
|
||||
|
||||
requirements = ;
|
||||
|
||||
#libSegFault prints a stack trace on segfault. Link against it if available.
|
||||
if [ test_flags "-lSegfault" ] {
|
||||
external_lib SegFault ;
|
||||
segfault = <library>SegFault ;
|
||||
requirements += <library>SegFault ;
|
||||
}
|
||||
|
||||
trace = [ option.get "notrace" : <define>TRACE_ENABLE=1 ] ;
|
||||
boost-pool = [ option.get "enable-boost-pool" : : "<define>USE_BOOST_POOL" ] ;
|
||||
requirements += [ option.get "notrace" : <define>TRACE_ENABLE=1 ] ;
|
||||
requirements += [ option.get "enable-boost-pool" : : <define>USE_BOOST_POOL ] ;
|
||||
|
||||
if [ option.get "with-irstlm" ] {
|
||||
requirements += <threading>single ;
|
||||
}
|
||||
|
||||
import os ;
|
||||
|
||||
@ -164,9 +170,7 @@ project : requirements
|
||||
<threading>multi:<define>WITH_THREADS
|
||||
<threading>multi:<library>boost_thread
|
||||
<define>_FILE_OFFSET_BITS=64 <define>_LARGE_FILES
|
||||
$(segfault)
|
||||
$(trace)
|
||||
$(boost-pool)
|
||||
$(requirements)
|
||||
<cxxflags>$(cxxflags)
|
||||
<cflags>$(cflags)
|
||||
<linkflags>$(ldflags)
|
||||
|
14
Makefile.am
14
Makefile.am
@ -1,14 +0,0 @@
|
||||
# not a GNU package. You can remove this line, if
|
||||
# have all needed files, that a GNU package needs
|
||||
AUTOMAKE_OPTIONS = foreign
|
||||
|
||||
ACLOCAL_AMFLAGS = -I m4
|
||||
|
||||
# order is important here: build moses before moses-cmd
|
||||
if WITH_MERT
|
||||
MERT = mert
|
||||
endif
|
||||
if WITH_SERVER
|
||||
SERVER = contrib/server
|
||||
endif
|
||||
SUBDIRS = util lm moses/src OnDiskPt/src moses-cmd/src misc moses-chart-cmd/src CreateOnDisk/src $(MERT) $(SERVER)
|
@ -1,14 +0,0 @@
|
||||
lib_LIBRARIES = libOnDiskPt.a
|
||||
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS)
|
||||
libOnDiskPt_a_SOURCES = \
|
||||
OnDiskWrapper.cpp \
|
||||
SourcePhrase.cpp \
|
||||
TargetPhrase.cpp \
|
||||
Word.cpp \
|
||||
Phrase.cpp \
|
||||
PhraseNode.cpp \
|
||||
TargetPhraseCollection.cpp \
|
||||
Vocab.cpp
|
||||
|
||||
|
||||
|
116
config.h.in
116
config.h.in
@ -1,116 +0,0 @@
|
||||
/* config.h.in. Generated from configure.in by autoheader. */
|
||||
|
||||
/* Defined if the requested minimum BOOST version is satisfied */
|
||||
#undef HAVE_BOOST
|
||||
|
||||
/* Define to 1 if you have <boost/program_options.hpp> */
|
||||
#undef HAVE_BOOST_PROGRAM_OPTIONS_HPP
|
||||
|
||||
/* Define to 1 if you have <boost/scoped_ptr.hpp> */
|
||||
#undef HAVE_BOOST_SCOPED_PTR_HPP
|
||||
|
||||
/* Define to 1 if you have <boost/shared_ptr.hpp> */
|
||||
#undef HAVE_BOOST_SHARED_PTR_HPP
|
||||
|
||||
/* Define to 1 if you have <boost/thread.hpp> */
|
||||
#undef HAVE_BOOST_THREAD_HPP
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#undef HAVE_DLFCN_H
|
||||
|
||||
/* flag for DMapLM */
|
||||
#undef HAVE_DMAPLM
|
||||
|
||||
/* Define to 1 if you have the <getopt.h> header file. */
|
||||
#undef HAVE_GETOPT_H
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
/* flag for IRSTLM */
|
||||
#undef HAVE_IRSTLM
|
||||
|
||||
/* Define to 1 if you have the `oolm' library (-loolm). */
|
||||
#undef HAVE_LIBOOLM
|
||||
|
||||
/* Define to 1 if you have the `tcmalloc' library (-ltcmalloc). */
|
||||
#undef HAVE_LIBTCMALLOC
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#undef HAVE_MEMORY_H
|
||||
|
||||
/* Define to 1 if you have the <nl-cpt.h> header file. */
|
||||
#undef HAVE_NL_CPT_H
|
||||
|
||||
/* flag for ORLM */
|
||||
#undef HAVE_ORLM
|
||||
|
||||
/* flag for protobuf */
|
||||
#undef HAVE_PROTOBUF
|
||||
|
||||
/* flag for RandLM */
|
||||
#undef HAVE_RANDLM
|
||||
|
||||
/* flag for SRILM */
|
||||
#undef HAVE_SRILM
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#undef HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#undef HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#undef HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#undef HAVE_STRING_H
|
||||
|
||||
/* flag for Syntactic Parser */
|
||||
#undef HAVE_SYNLM
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#undef HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#undef HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* flag for zlib */
|
||||
#undef HAVE_ZLIB
|
||||
|
||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||
*/
|
||||
#undef LT_OBJDIR
|
||||
|
||||
/* Name of package */
|
||||
#undef PACKAGE
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#undef PACKAGE_BUGREPORT
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#undef PACKAGE_NAME
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#undef PACKAGE_TARNAME
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#undef PACKAGE_URL
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
/* Flag to enable use of Boost pool */
|
||||
#undef USE_BOOST_POOL
|
||||
|
||||
/* Version number of package */
|
||||
#undef VERSION
|
332
configure.in
332
configure.in
@ -1,332 +0,0 @@
|
||||
AC_INIT(moses/src)
|
||||
|
||||
AM_CONFIG_HEADER(config.h)
|
||||
AM_INIT_AUTOMAKE(moses, 0.1)
|
||||
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
|
||||
AC_PROG_CXX
|
||||
AC_PROG_CXXCPP
|
||||
AC_LANG_CPLUSPLUS
|
||||
|
||||
AC_DISABLE_SHARED
|
||||
AC_PROG_LIBTOOL
|
||||
# Shared library are disabled for default
|
||||
#LT_INIT([disable-shared])
|
||||
|
||||
AX_XMLRPC_C
|
||||
BOOST_REQUIRE([1.36.0])
|
||||
BOOST_SMART_PTR
|
||||
BOOST_PROGRAM_OPTIONS
|
||||
|
||||
AC_ARG_WITH(protobuf,
|
||||
[AC_HELP_STRING([--with-protobuf=PATH], [(optional) path to Google protobuf])],
|
||||
[with_protobuf=$withval],
|
||||
[with_protobuf=no]
|
||||
)
|
||||
|
||||
AC_ARG_WITH(srilm,
|
||||
[AC_HELP_STRING([--with-srilm=PATH], [(optional) path to SRI's LM toolkit])],
|
||||
[with_srilm=$withval],
|
||||
[with_srilm=no]
|
||||
)
|
||||
|
||||
AC_ARG_WITH(srilm-dynamic,
|
||||
[AC_HELP_STRING([--with-srilm-dynamic], [(optional) link dynamically with srilm])],
|
||||
[with_srilm_dynamic=yes],
|
||||
[with_srilm_dynamic=no]
|
||||
)
|
||||
|
||||
AC_ARG_WITH(srilm-arch,
|
||||
[AC_HELP_STRING([--with-srilm-arch=ARCH], [(optional) architecture for which SRILM was built])],
|
||||
[with_srilm_arch=$withval],
|
||||
[with_srilm_arch=no]
|
||||
)
|
||||
|
||||
|
||||
AC_ARG_WITH(irstlm,
|
||||
[AC_HELP_STRING([--with-irstlm=PATH], [(optional) path to IRST's LM toolkit])],
|
||||
[with_irstlm=$withval],
|
||||
[with_irstlm=no]
|
||||
)
|
||||
|
||||
AC_ARG_WITH(randlm,
|
||||
[AC_HELP_STRING([--with-randlm=PATH], [(optional) path to RandLM toolkit])],
|
||||
[with_randlm=$withval],
|
||||
[with_randlm=no]
|
||||
)
|
||||
AC_ARG_WITH(orlm,
|
||||
[AC_HELP_STRING([--with-orlm=PATH], [(optional) path to ORLM])],
|
||||
[with_orlm=$withval],
|
||||
[with_orlm=no]
|
||||
)
|
||||
AC_ARG_WITH(dmaplm,
|
||||
[AC_HELP_STRING([--with-dmaplm=PATH], [(optional) path to DMapLM])],
|
||||
[with_dmaplm=$withval],
|
||||
[with_dmaplm=no]
|
||||
)
|
||||
|
||||
AC_ARG_WITH(synlm,
|
||||
[AC_HELP_STRING([--with-synlm], [(optional) Include syntactic language model parser; default is no])],
|
||||
[with_synlm=$withval],
|
||||
[with_synlm=no]
|
||||
)
|
||||
|
||||
AC_ARG_WITH(notrace,
|
||||
[AC_HELP_STRING([--notrace], [disable trace])],
|
||||
[without_trace=yes],
|
||||
)
|
||||
|
||||
|
||||
|
||||
AC_ARG_ENABLE(profiling,
|
||||
[AC_HELP_STRING([--enable-profiling], [moses will dump profiling info])],
|
||||
[CPPFLAGS="$CPPFLAGS -pg"; LDFLAGS="$LDFLAGS -pg" ]
|
||||
)
|
||||
|
||||
AC_ARG_ENABLE(optimization,
|
||||
[AC_HELP_STRING([--enable-optimization], [compile with -O3 flag])],
|
||||
[CPPFLAGS="$CPPFLAGS -O3"; LDFLAGS="$LDFLAGS -O3" ]
|
||||
)
|
||||
|
||||
AC_ARG_ENABLE(threads,
|
||||
[AC_HELP_STRING([--enable-threads], [compile threadsafe library and multi-threaded moses (mosesmt)])],
|
||||
[],
|
||||
[enable_threads=no]
|
||||
)
|
||||
|
||||
AC_ARG_WITH(zlib,
|
||||
[AC_HELP_STRING([--with-zlib=PATH], [(optional) path to zlib])],
|
||||
[with_zlib=$withval],
|
||||
[with_zlib=no]
|
||||
)
|
||||
|
||||
AC_ARG_WITH(tcmalloc,
|
||||
[AC_HELP_STRING([--with-tcmalloc], [(optional) link with tcmalloc; default is no])],
|
||||
[with_tcmalloc=$withval],
|
||||
[with_tcmalloc=no]
|
||||
)
|
||||
|
||||
AC_ARG_ENABLE(boost-pool,
|
||||
[AC_HELP_STRING([--enable-boost-pool], [(optional) try to improve speed by selectively using Boost pool allocation (may increase total memory use); default is yes if Boost enabled])],
|
||||
[enable_boost_pool=yes],
|
||||
[enable_boost_pool=no]
|
||||
)
|
||||
|
||||
|
||||
AM_CONDITIONAL([INTERNAL_LM], false)
|
||||
AM_CONDITIONAL([SRI_LM], false)
|
||||
AM_CONDITIONAL([IRST_LM], false)
|
||||
AM_CONDITIONAL([KEN_LM], false)
|
||||
AM_CONDITIONAL([RAND_LM], false)
|
||||
AM_CONDITIONAL([ORLM_LM], false)
|
||||
AM_CONDITIONAL([DMAP_LM], false)
|
||||
AM_CONDITIONAL([SYN_LM], false)
|
||||
AM_CONDITIONAL([PROTOBUF], false)
|
||||
AM_CONDITIONAL([am__fastdepCC], false)
|
||||
AM_CONDITIONAL([WITH_THREADS],false)
|
||||
|
||||
|
||||
if test "x$without_trace" = 'xyes'
|
||||
then
|
||||
AC_MSG_NOTICE([trace disabled, most regression test will fail])
|
||||
else
|
||||
AC_MSG_NOTICE([trace enabled (default)])
|
||||
CPPFLAGS="$CPPFLAGS -DTRACE_ENABLE=1"
|
||||
fi
|
||||
|
||||
if test "x$enable_threads" = 'xyes'
|
||||
then
|
||||
AC_MSG_NOTICE([Building threaded moses])
|
||||
BOOST_THREADS
|
||||
CPPFLAGS="$CPPFLAGS -DWITH_THREADS"
|
||||
AM_CONDITIONAL([WITH_THREADS],true)
|
||||
else
|
||||
AC_MSG_NOTICE([Building non-threaded moses. This will disable the moses server])
|
||||
fi
|
||||
|
||||
if test "x$with_protobuf" != 'xno'
|
||||
then
|
||||
SAVE_CPPFLAGS="$CPPFLAGS"
|
||||
CPPFLAGS="$CPPFLAGS -I${with_protobuf}/include"
|
||||
|
||||
AC_CHECK_HEADER(google/protobuf/message.h,
|
||||
[AC_DEFINE([HAVE_PROTOBUF], [], [flag for protobuf])],
|
||||
[AC_MSG_ERROR([Cannot find protobuf!])])
|
||||
|
||||
LIB_PROTOBUF="-lprotobuf"
|
||||
LDFLAGS="$LDFLAGS -L${with_protobuf}/lib"
|
||||
LIBS="$LIBS $LIB_PROTOBUF"
|
||||
AC_PATH_PROG(PROTOC,protoc,,"${PATH}:${with_protobuf}/bin")
|
||||
FMTLIBS="$FMTLIBS libprotobuf.a"
|
||||
AM_CONDITIONAL([PROTOBUF], true)
|
||||
fi
|
||||
|
||||
if test "x$with_srilm" != 'xno'
|
||||
then
|
||||
SAVE_CPPFLAGS="$CPPFLAGS"
|
||||
CPPFLAGS="$CPPFLAGS -I${with_srilm}/include"
|
||||
|
||||
AC_CHECK_HEADER(Ngram.h,
|
||||
[AC_DEFINE([HAVE_SRILM], [], [flag for SRILM])],
|
||||
[AC_MSG_ERROR([Cannot find SRILM!])])
|
||||
|
||||
if test "x$with_srilm_dynamic" != 'xyes'
|
||||
then
|
||||
LIB_SRILM="-loolm -ldstruct -lmisc -lflm"
|
||||
# ROOT/lib/i686-m64/liboolm.a
|
||||
# ROOT/lib/i686-m64/libdstruct.a
|
||||
# ROOT/lib/i686-m64/libmisc.a
|
||||
if test "x$with_srilm_arch" != 'xno'
|
||||
then
|
||||
MY_ARCH=${with_srilm_arch}
|
||||
else
|
||||
MY_ARCH=`${with_srilm}/sbin/machine-type`
|
||||
fi
|
||||
LDFLAGS="$LDFLAGS -L${with_srilm}/lib/${MY_ARCH} -L${with_srilm}/flm/obj/${MY_ARCH}"
|
||||
LIBS="$LIBS $LIB_SRILM"
|
||||
FMTLIBS="$FMTLIBS liboolm.a libdstruct.a libmisc.a"
|
||||
else
|
||||
LDFLAGS="$LDFLAGS -L${with_srilm}/lib"
|
||||
LIBS="$LIBS -lsrilm"
|
||||
fi
|
||||
AC_CHECK_LIB([oolm], [trigram_init], [], [AC_MSG_ERROR([Cannot find SRILM's library in ${with_srilm}/lib/${MY_ARCH} ])])
|
||||
AM_CONDITIONAL([SRI_LM], true)
|
||||
fi
|
||||
|
||||
if test "x$with_irstlm" != 'xno'
|
||||
then
|
||||
SAVE_CPPFLAGS="$CPPFLAGS"
|
||||
CPPFLAGS="$CPPFLAGS -I${with_irstlm}/include"
|
||||
|
||||
|
||||
AC_MSG_NOTICE([])
|
||||
AC_MSG_NOTICE([!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!])
|
||||
AC_MSG_NOTICE([!!! You are linking the IRSTLM library; be sure the release is >= 5.70.02 !!!])
|
||||
AC_MSG_NOTICE([!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!])
|
||||
AC_MSG_NOTICE([])
|
||||
|
||||
|
||||
AC_CHECK_HEADER(n_gram.h,
|
||||
[AC_DEFINE([HAVE_IRSTLM], [], [flag for IRSTLM])],
|
||||
[AC_MSG_ERROR([Cannot find IRST-LM in ${with_irstlm}])])
|
||||
|
||||
MY_ARCH=`uname -m`
|
||||
LIB_IRSTLM="-lirstlm"
|
||||
LDFLAGS="$LDFLAGS -L${with_irstlm}/lib"
|
||||
LIBS="$LIBS $LIB_IRSTLM"
|
||||
FMTLIBS="$FMTLIBS libirstlm.a"
|
||||
AM_CONDITIONAL([IRST_LM], true)
|
||||
fi
|
||||
|
||||
CPPFLAGS="$CPPFLAGS -I\$(top_srcdir)"
|
||||
#LDFLAGS="$LDFLAGS -L\$(top_srcdir)/util -lkenutil -L\$(top_srcdir)/lm -lkenlm -lz"
|
||||
#KENUTIL_DEPS="\$(top_srcdir)/util/libkenutil.la"
|
||||
#KENLM_DEPS="\$(top_srcdir)/lm/libkenlm.la"
|
||||
#FMTLIBS="$FMTLIBS libkenutil.la libkenlm.la"
|
||||
#AC_SUBST(KENUTIL_DEPS)
|
||||
#AC_SUBST(KENLM_DEPS)
|
||||
|
||||
if test "x$with_randlm" != 'xno'
|
||||
then
|
||||
SAVE_CPPFLAGS="$CPPFLAGS"
|
||||
CPPFLAGS="$CPPFLAGS -I${with_randlm}/include"
|
||||
|
||||
AC_CHECK_HEADER(RandLM.h,
|
||||
[AC_DEFINE([HAVE_RANDLM], [], [flag for RandLM])],
|
||||
[AC_MSG_ERROR([Cannot find RandLM!])])
|
||||
|
||||
|
||||
MY_ARCH=`uname -m`
|
||||
LIB_RANDLM="-lrandlm"
|
||||
LDFLAGS="$LDFLAGS -L${with_randlm}/lib"
|
||||
LIBS="$LIBS $LIB_RANDLM"
|
||||
FMTLIBS="$FMTLIBS librandlm.a"
|
||||
AM_CONDITIONAL([RAND_LM], true)
|
||||
fi
|
||||
|
||||
if test "x$with_dmaplm" != 'xno'
|
||||
then
|
||||
SAVE_CPPFLAGS="$CPPFLAGS"
|
||||
CPPFLAGS="$CPPFLAGS -I${with_dmaplm}/src/DMap"
|
||||
|
||||
AC_CHECK_HEADER(StructLanguageModel.h,
|
||||
[AC_DEFINE([HAVE_DMAPLM], [], [flag for DMapLM])],
|
||||
[AC_MSG_ERROR([Cannot find DMapLM!])])
|
||||
|
||||
LDFLAGS="$LDFLAGS -L${with_dmaplm}/src/DMap"
|
||||
LIBS="$LIBS -lDMap"
|
||||
FMTLIBS="FMTLIBS libdmap.la"
|
||||
AM_CONDITIONAL([DMAP_LM], true)
|
||||
fi
|
||||
|
||||
if test "x$with_orlm" != 'xno'
|
||||
then
|
||||
SAVE_CPPFLAGS="$CPPFLAGS"
|
||||
CPPFLAGS="$CPPFLAGS -I${with_orlm}/"
|
||||
|
||||
AC_CHECK_HEADER(onlineRLM.h,
|
||||
#AC_CHECK_HEADER(multiOnlineRLM.h,
|
||||
[AC_DEFINE([HAVE_ORLM], [], [flag for ORLM])],
|
||||
[AC_MSG_ERROR([Cannot find ORLM!])])
|
||||
|
||||
MY_ARCH=`uname -m`
|
||||
AM_CONDITIONAL([ORLM_LM], true)
|
||||
fi
|
||||
if test "x$with_tcmalloc" != 'xno'
|
||||
then
|
||||
AC_CHECK_LIB([tcmalloc], [malloc], [], [AC_MSG_ERROR([Cannot find tcmalloc])])
|
||||
fi
|
||||
|
||||
|
||||
if test "x$enable_boost_pool" != 'xno'
|
||||
then
|
||||
AC_CHECK_HEADER(boost/pool/object_pool.hpp,
|
||||
[AC_DEFINE([USE_BOOST_POOL], [], [Flag to enable use of Boost pool])],
|
||||
[AC_MSG_WARN([Cannot find boost/pool/object_pool.hpp])]
|
||||
)
|
||||
fi
|
||||
|
||||
if test "x$with_synlm" != 'xno'
|
||||
then
|
||||
SAVE_CPPFLAGS="$CPPFLAGS"
|
||||
CPPFLAGS="$CPPFLAGS -I${PWD}/synlm/hhmm/rvtl/include -I${PWD}/synlm/hhmm/wsjparse/include -lm"
|
||||
|
||||
AC_CHECK_HEADERS(nl-cpt.h,
|
||||
[AC_DEFINE([HAVE_SYNLM], [], [flag for Syntactic Parser])],
|
||||
[AC_MSG_ERROR([Cannot find SYNLM in ${PWD}/synlm/hhmm])])
|
||||
|
||||
AM_CONDITIONAL([SYN_LM], true)
|
||||
|
||||
fi
|
||||
|
||||
|
||||
AM_CONDITIONAL([WITH_MERT],false)
|
||||
AC_CHECK_HEADERS([getopt.h],
|
||||
[AM_CONDITIONAL([WITH_MERT],true)],
|
||||
[AC_MSG_WARN([Cannot find getopt.h - disabling new mert])])
|
||||
|
||||
AM_CONDITIONAL([WITH_SERVER],false)
|
||||
if test "x$have_xmlrpc_c" = "xyes" && test "x$enable_threads" = "xyes"; then
|
||||
AM_CONDITIONAL([WITH_SERVER],true)
|
||||
else
|
||||
AC_MSG_NOTICE([Disabling server])
|
||||
fi
|
||||
|
||||
if test "x$with_zlib" != 'xno'
|
||||
then
|
||||
CPPFLAGS="$CPPFLAGS -I${with_zlib}/include"
|
||||
LDFLAGS="$LDFLAGS -L${with_zlib}/lib"
|
||||
fi
|
||||
|
||||
# zlib is always required (see ./moses/src/gzfilebuf.h)
|
||||
# TODO: This shouldn't be presented to the user as a config option if it isn't actually an option
|
||||
AC_CHECK_HEADER(zlib.h,
|
||||
[AC_DEFINE([HAVE_ZLIB], [], [flag for zlib])],
|
||||
[AC_MSG_ERROR([Cannot find zlib.h. Please install it. For Debian, try 'sudo aptitude install zlib1g-dev'])])
|
||||
LIBS="$LIBS -lz"
|
||||
|
||||
|
||||
AC_CONFIG_FILES(Makefile OnDiskPt/src/Makefile moses/src/Makefile moses-cmd/src/Makefile moses-chart-cmd/src/Makefile misc/Makefile mert/Makefile contrib/server/Makefile CreateOnDisk/src/Makefile util/Makefile lm/Makefile)
|
||||
|
||||
AC_OUTPUT()
|
@ -1,22 +1,39 @@
|
||||
#If you get compilation errors here, make sure you have xmlrpc-c installed properly. . .
|
||||
#If you get compilation errors here, make sure you have xmlrpc-c installed properly, including the abyss server option.
|
||||
|
||||
import option ;
|
||||
import path ;
|
||||
|
||||
with-xmlrpc-c = [ option.get "with-xmlrpc-c" ] ;
|
||||
if $(with-xmlrpc-c) {
|
||||
build-moses-server = true ;
|
||||
shell-prefix = $(with-xmlrpc-c)/bin/ ;
|
||||
} else {
|
||||
if [ SHELL $(TOP)"/jam-files/test.sh -include xmlrpc-c/base.hpp -lxmlrpc_server_abyss++" ] = 0 {
|
||||
build-moses-server = true ;
|
||||
xmlrpc-command = $(with-xmlrpc-c)/bin/xmlrpc-c-config ;
|
||||
if ! [ path.exists $(xmlrpc-command) ] {
|
||||
exit Could not find $(xmlrpc-command) : 1 ;
|
||||
}
|
||||
shell-prefix = "" ;
|
||||
} else {
|
||||
xmlrpc-check = [ _shell "xmlrpc-c-config --features 2>/dev/null" : exit-status ] ;
|
||||
if $(xmlrpc-check[2]) = 0 {
|
||||
if [ MATCH "(abyss-server)" : $(xmlrpc-check[1]) ] {
|
||||
build-moses-server = true ;
|
||||
} else {
|
||||
echo "Found xmlrpc-c but it does not have abyss-server. Skipping mosesserver." ;
|
||||
}
|
||||
}
|
||||
xmlrpc-command = "xmlrpc-c-config" ;
|
||||
}
|
||||
|
||||
rule shell_or_die ( cmd ) {
|
||||
local ret = [ _shell $(cmd) : exit-status ] ;
|
||||
if $(ret[2]) != 0 {
|
||||
exit "Failed to run $(cmd)" : 1 ;
|
||||
}
|
||||
return $(ret[1]) ;
|
||||
}
|
||||
|
||||
if $(build-moses-server) = true
|
||||
{
|
||||
xmlrpc-linkflags = [ _shell "$(shell-prefix)xmlrpc-c-config c++2 abyss-server --libs" ] ;
|
||||
xmlrpc-cxxflags = [ _shell "$(shell-prefix)xmlrpc-c-config c++2 abyss-server --cflags" ] ;
|
||||
xmlrpc-linkflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --libs" ] ;
|
||||
xmlrpc-cxxflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --cflags" ] ;
|
||||
|
||||
exe mosesserver : mosesserver.cpp ../../moses/src//moses ../../OnDiskPt/src//OnDiskPt : <linkflags>$(xmlrpc-linkflags) <cxxflags>$(xmlrpc-cxxflags) ;
|
||||
} else {
|
||||
|
@ -1,5 +0,0 @@
|
||||
bin_PROGRAMS = mosesserver
|
||||
mosesserver_SOURCES = mosesserver.cpp
|
||||
mosesserver_CPPFLAGS = -W -Wall -I$(top_srcdir)/moses/src $(XMLRPC_C_CPPFLAGS) $(BOOST_CPPFLAGS)
|
||||
mosesserver_LDADD = -L$(top_srcdir)/moses/src -lmoses -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(XMLRPC_C_LIBS) $(BOOST_THREAD_LIBS)
|
||||
mosesserver_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.la $(top_srcdir)/OnDiskPt/src/libOnDiskPt.a
|
@ -86,15 +86,7 @@ function run_single_test () {
|
||||
|
||||
err=""
|
||||
|
||||
echo "## ./bjam clean" >> $longlog
|
||||
./bjam clean >> $longlog 2>&1 || warn "bjam clean failed, suspicious"
|
||||
|
||||
echo "## ./bjam $MCC_CONFIGURE_ARGS" >> $longlog
|
||||
if [ -z "$err" ]; then
|
||||
./bjam $MCC_CONFIGURE_ARGS >> $longlog 2>&1 || err="bjam"
|
||||
fi
|
||||
|
||||
cd regression-testing
|
||||
cd regression-testing
|
||||
regtest_file=$(echo "$REGTEST_ARCHIVE" | sed 's/^.*\///')
|
||||
|
||||
# download data for regression tests if necessary
|
||||
@ -104,15 +96,22 @@ function run_single_test () {
|
||||
tar xzf $regtest_file
|
||||
touch $regtest_file.ok
|
||||
fi
|
||||
regtest_dir=$PWD/$(basename $regtest_file .tgz)
|
||||
cd ..
|
||||
|
||||
|
||||
echo "## ./bjam clean" >> $longlog
|
||||
./bjam clean $MCC_CONFIGURE_ARGS --with-regtest=$regtest_dir >> $longlog 2>&1 || warn "bjam clean failed, suspicious"
|
||||
|
||||
echo "## ./bjam $MCC_CONFIGURE_ARGS" >> $longlog
|
||||
if [ -z "$err" ]; then
|
||||
./bjam $MCC_CONFIGURE_ARGS >> $longlog 2>&1 || err="bjam"
|
||||
fi
|
||||
|
||||
echo "## regression tests" >> $longlog
|
||||
if [ -z "$err" ]; then
|
||||
./run-test-suite.perl &>> $longlog
|
||||
regtest_status=$?
|
||||
[ $regtest_status -eq 1 ] && die "Failed to run regression tests"
|
||||
[ $regtest_status -eq 2 ] && err="regression tests"
|
||||
./bjam $MCC_CONFIGURE_ARGS --with-regtest=$regtest_dir >> $longlog 2>&1 || err="regression tests"
|
||||
fi
|
||||
cd ..
|
||||
|
||||
if [ -z "$err" ] && [ "$MCC_RUN_EMS" = "yes" ]; then
|
||||
echo "## EMS" >> $longlog
|
||||
|
@ -1,25 +0,0 @@
|
||||
lib_LTLIBRARIES = libkenlm.la
|
||||
bin_PROGRAMS = query build_binary
|
||||
|
||||
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS)
|
||||
libkenlm_la_SOURCES = \
|
||||
bhiksha.cc \
|
||||
binary_format.cc \
|
||||
config.cc \
|
||||
lm_exception.cc \
|
||||
model.cc \
|
||||
search_hashed.cc \
|
||||
search_trie.cc \
|
||||
quantize.cc \
|
||||
read_arpa.cc \
|
||||
trie.cc \
|
||||
trie_sort.cc \
|
||||
virtual_interface.cc \
|
||||
vocab.cc
|
||||
|
||||
query_SOURCES = ngram_query.cc
|
||||
query_LDADD = libkenlm.la $(top_srcdir)/util/libkenutil.la
|
||||
|
||||
build_binary_SOURCES = build_binary.cc
|
||||
build_binary_LDADD = libkenlm.la $(top_srcdir)/util/libkenutil.la
|
||||
|
@ -160,44 +160,45 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
if (optind + 1 == argc) {
|
||||
ShowSizes(argv[optind], config);
|
||||
} else if (optind + 2 == argc) {
|
||||
return 0;
|
||||
}
|
||||
const char *model_type, *from_file;
|
||||
if (optind + 2 == argc) {
|
||||
model_type = "probing";
|
||||
from_file = argv[optind];
|
||||
config.write_mmap = argv[optind + 1];
|
||||
if (quantize || set_backoff_bits) ProbingQuantizationUnsupported();
|
||||
ProbingModel(argv[optind], config);
|
||||
} else if (optind + 3 == argc) {
|
||||
const char *model_type = argv[optind];
|
||||
const char *from_file = argv[optind + 1];
|
||||
model_type = argv[optind];
|
||||
from_file = argv[optind + 1];
|
||||
config.write_mmap = argv[optind + 2];
|
||||
if (!strcmp(model_type, "probing")) {
|
||||
if (quantize || set_backoff_bits) ProbingQuantizationUnsupported();
|
||||
ProbingModel(from_file, config);
|
||||
} else if (!strcmp(model_type, "trie")) {
|
||||
if (quantize) {
|
||||
if (bhiksha) {
|
||||
QuantArrayTrieModel(from_file, config);
|
||||
} else {
|
||||
QuantTrieModel(from_file, config);
|
||||
}
|
||||
} else {
|
||||
Usage(argv[0]);
|
||||
}
|
||||
if (!strcmp(model_type, "probing")) {
|
||||
if (quantize || set_backoff_bits) ProbingQuantizationUnsupported();
|
||||
ProbingModel(from_file, config);
|
||||
} else if (!strcmp(model_type, "trie")) {
|
||||
if (quantize) {
|
||||
if (bhiksha) {
|
||||
QuantArrayTrieModel(from_file, config);
|
||||
} else {
|
||||
if (bhiksha) {
|
||||
ArrayTrieModel(from_file, config);
|
||||
} else {
|
||||
TrieModel(from_file, config);
|
||||
}
|
||||
QuantTrieModel(from_file, config);
|
||||
}
|
||||
} else {
|
||||
Usage(argv[0]);
|
||||
if (bhiksha) {
|
||||
ArrayTrieModel(from_file, config);
|
||||
} else {
|
||||
TrieModel(from_file, config);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Usage(argv[0]);
|
||||
}
|
||||
}
|
||||
catch (const std::exception &e) {
|
||||
std::cerr << "Built " << config.write_mmap << " successfully." << std::endl;
|
||||
} catch (const std::exception &e) {
|
||||
std::cerr << e.what() << std::endl;
|
||||
std::cerr << "ERROR" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::cerr << "SUCCESS" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
@ -229,7 +229,7 @@ void MissingSentenceMarker(const Config &config, const char *str) throw(SpecialW
|
||||
if (config.messages) *config.messages << "Missing special word " << str << "; will treat it as <unk>.";
|
||||
break;
|
||||
case THROW_UP:
|
||||
UTIL_THROW(SpecialWordMissingException, "The ARPA file is missing " << str << " and the model is configured to reject these models. Run build_binary -s to disable this check.");
|
||||
UTIL_THROW(SpecialWordMissingException, "The ARPA file is missing " << str << " and the model is configured to reject these models. If you built your APRA with IRSTLM and forgot to run add-start-end.sh, complain to <bertoldi at fbk.eu> stating that you think build-lm.sh should do this by default, then go back and retrain your model from the start. To bypass this check and treat " << str << " as an OOV, pass -s. The resulting model will not work with e.g. Moses.");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,52 +0,0 @@
|
||||
AC_DEFUN([AX_XMLRPC_C], [
|
||||
AC_MSG_CHECKING(for XMLRPC-C)
|
||||
|
||||
AC_ARG_WITH(xmlrpc-c,
|
||||
[ --with-xmlrpc-c=PATH Enable XMLRPC-C support. Setting the PATH to yes will search for xmlrpc-c-config on the shell PATH,],
|
||||
[
|
||||
if test "$withval" = "no"; then
|
||||
AC_MSG_RESULT(no)
|
||||
|
||||
else
|
||||
if test "$withval" = "yes"; then
|
||||
xmlrpc_cc_prg="xmlrpc-c-config"
|
||||
else
|
||||
xmlrpc_cc_prg="$withval"
|
||||
fi
|
||||
|
||||
if eval $xmlrpc_cc_prg --version 2>/dev/null >/dev/null; then
|
||||
XMLRPC_C_CPPFLAGS=`$xmlrpc_cc_prg --cflags c++2 abyss-server`
|
||||
XMLRPC_C_LIBS=`$xmlrpc_cc_prg c++2 abyss-server --libs`
|
||||
CXXFLAGS_SAVED=$CXXFLAGS
|
||||
CXXFLAGS="$CXXFLAGS $XMLRPC_C_CPPFLAGS"
|
||||
LIBS_SAVED=$LIBS
|
||||
LIBS="$LIBS $XMLRPC_C_LIBS"
|
||||
|
||||
AC_TRY_LINK(
|
||||
[ #include <xmlrpc-c/server.h>
|
||||
],[ xmlrpc_registry_new(NULL); ],
|
||||
[
|
||||
AC_MSG_RESULT(ok)
|
||||
], [
|
||||
AC_MSG_RESULT(failed)
|
||||
AC_MSG_ERROR(Could not compile XMLRPC-C test.)
|
||||
])
|
||||
|
||||
dnl AC_DEFINE(HAVE_XMLRPC_C, 1, Support for XMLRPC-C.)
|
||||
have_xmlrpc_c=yes
|
||||
AC_SUBST(XMLRPC_C_LIBS)
|
||||
AC_SUBST(XMLRPC_C_CPPFLAGS)
|
||||
|
||||
LIBS=$LIBS_SAVED
|
||||
CXXFLAGS=$CXXFLAGS_SAVED
|
||||
|
||||
else
|
||||
AC_MSG_RESULT(failed)
|
||||
AC_MSG_ERROR(Could not compile XMLRPC-C test.)
|
||||
fi
|
||||
fi
|
||||
|
||||
],[
|
||||
AC_MSG_RESULT(ignored)
|
||||
])
|
||||
])
|
1035
m4/boost.m4
1035
m4/boost.m4
File diff suppressed because it is too large
Load Diff
@ -56,7 +56,7 @@ private:
|
||||
|
||||
typedef map<vector<int>,int,CompareNgrams> counts_t;
|
||||
typedef map<vector<int>,int,CompareNgrams>::iterator counts_iterator;
|
||||
typedef map<vector<int>,int,CompareNgrams>::iterator counts_const_iterator;
|
||||
typedef map<vector<int>,int,CompareNgrams>::const_iterator counts_const_iterator;
|
||||
typedef ScopedVector<counts_t> refcounts_t;
|
||||
|
||||
/**
|
||||
|
@ -1,45 +0,0 @@
|
||||
lib_LTLIBRARIES = libmert.la
|
||||
bin_PROGRAMS = mert extractor evaluator pro
|
||||
AM_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE $(BOOST_CPPFLAGS)
|
||||
|
||||
libmert_la_SOURCES = \
|
||||
Util.cpp \
|
||||
FileStream.cpp \
|
||||
Timer.cpp \
|
||||
ScoreStats.cpp ScoreArray.cpp ScoreData.cpp \
|
||||
ScoreDataIterator.cpp \
|
||||
FeatureStats.cpp FeatureArray.cpp FeatureData.cpp \
|
||||
FeatureDataIterator.cpp \
|
||||
Data.cpp \
|
||||
BleuScorer.cpp \
|
||||
Point.cpp \
|
||||
PerScorer.cpp \
|
||||
Scorer.cpp \
|
||||
ScorerFactory.cpp \
|
||||
Optimizer.cpp \
|
||||
TERsrc/alignmentStruct.cpp \
|
||||
TERsrc/hashMap.cpp \
|
||||
TERsrc/hashMapStringInfos.cpp \
|
||||
TERsrc/stringHasher.cpp \
|
||||
TERsrc/terAlignment.cpp \
|
||||
TERsrc/terShift.cpp \
|
||||
TERsrc/hashMapInfos.cpp \
|
||||
TERsrc/infosHasher.cpp \
|
||||
TERsrc/stringInfosHasher.cpp \
|
||||
TERsrc/tercalc.cpp \
|
||||
TERsrc/tools.cpp \
|
||||
TerScorer.cpp \
|
||||
CderScorer.cpp \
|
||||
MergeScorer.cpp
|
||||
|
||||
mert_SOURCES = mert.cpp $(top_builddir)/moses/src/ThreadPool.cpp
|
||||
extractor_SOURCES = extractor.cpp
|
||||
evaluator_SOURCES = evaluator.cpp
|
||||
pro_SOURCES = pro.cpp
|
||||
|
||||
extractor_LDADD = libmert.la -lm -lz
|
||||
mert_LDADD = libmert.la -lm -lz $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
|
||||
evaluator_LDADD = libmert.la -lm -lz
|
||||
pro_LDADD = libmert.la $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIBS)
|
||||
pro_DEPENDENCIES = $(top_srcdir)/kenlm/libkenlm.la libmert.la
|
||||
|
@ -1,16 +0,0 @@
|
||||
bin_PROGRAMS = processPhraseTable processLexicalTable queryLexicalTable queryPhraseTable
|
||||
|
||||
processPhraseTable_SOURCES = GenerateTuples.cpp processPhraseTable.cpp
|
||||
processLexicalTable_SOURCES = processLexicalTable.cpp
|
||||
queryLexicalTable_SOURCES = queryLexicalTable.cpp
|
||||
queryPhraseTable_SOURCES = queryPhraseTable.cpp
|
||||
|
||||
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS)
|
||||
|
||||
processPhraseTable_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/moses/src -L$(top_srcdir)/OnDiskPt/src -lmoses -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
|
||||
|
||||
processLexicalTable_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/moses/src -L$(top_srcdir)/OnDiskPt/src -lmoses -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
|
||||
|
||||
queryLexicalTable_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/moses/src -L$(top_srcdir)/OnDiskPt/src -lmoses -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
|
||||
|
||||
queryPhraseTable_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/moses/src -L$(top_srcdir)/OnDiskPt/src -lmoses -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
|
111
mkinstalldirs
111
mkinstalldirs
@ -1,111 +0,0 @@
|
||||
#! /bin/sh
|
||||
# mkinstalldirs --- make directory hierarchy
|
||||
# Author: Noah Friedman <friedman@prep.ai.mit.edu>
|
||||
# Created: 1993-05-16
|
||||
# Public domain
|
||||
|
||||
errstatus=0
|
||||
dirmode=""
|
||||
|
||||
usage="\
|
||||
Usage: mkinstalldirs [-h] [--help] [-m mode] dir ..."
|
||||
|
||||
# process command line arguments
|
||||
while test $# -gt 0 ; do
|
||||
case $1 in
|
||||
-h | --help | --h*) # -h for help
|
||||
echo "$usage" 1>&2
|
||||
exit 0
|
||||
;;
|
||||
-m) # -m PERM arg
|
||||
shift
|
||||
test $# -eq 0 && { echo "$usage" 1>&2; exit 1; }
|
||||
dirmode=$1
|
||||
shift
|
||||
;;
|
||||
--) # stop option processing
|
||||
shift
|
||||
break
|
||||
;;
|
||||
-*) # unknown option
|
||||
echo "$usage" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
*) # first non-opt arg
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
for file
|
||||
do
|
||||
if test -d "$file"; then
|
||||
shift
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
case $# in
|
||||
0) exit 0 ;;
|
||||
esac
|
||||
|
||||
case $dirmode in
|
||||
'')
|
||||
if mkdir -p -- . 2>/dev/null; then
|
||||
echo "mkdir -p -- $*"
|
||||
exec mkdir -p -- "$@"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
if mkdir -m "$dirmode" -p -- . 2>/dev/null; then
|
||||
echo "mkdir -m $dirmode -p -- $*"
|
||||
exec mkdir -m "$dirmode" -p -- "$@"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
for file
|
||||
do
|
||||
set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'`
|
||||
shift
|
||||
|
||||
pathcomp=
|
||||
for d
|
||||
do
|
||||
pathcomp="$pathcomp$d"
|
||||
case $pathcomp in
|
||||
-*) pathcomp=./$pathcomp ;;
|
||||
esac
|
||||
|
||||
if test ! -d "$pathcomp"; then
|
||||
echo "mkdir $pathcomp"
|
||||
|
||||
mkdir "$pathcomp" || lasterr=$?
|
||||
|
||||
if test ! -d "$pathcomp"; then
|
||||
errstatus=$lasterr
|
||||
else
|
||||
if test ! -z "$dirmode"; then
|
||||
echo "chmod $dirmode $pathcomp"
|
||||
lasterr=""
|
||||
chmod "$dirmode" "$pathcomp" || lasterr=$?
|
||||
|
||||
if test ! -z "$lasterr"; then
|
||||
errstatus=$lasterr
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
pathcomp="$pathcomp/"
|
||||
done
|
||||
done
|
||||
|
||||
exit $errstatus
|
||||
|
||||
# Local Variables:
|
||||
# mode: shell-script
|
||||
# sh-indentation: 2
|
||||
# End:
|
||||
# mkinstalldirs ends here
|
@ -1,10 +0,0 @@
|
||||
bin_PROGRAMS = moses_chart
|
||||
moses_chart_SOURCES = Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp
|
||||
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS)
|
||||
|
||||
moses_chart_LDADD = -L$(top_srcdir)/moses/src -L$(top_srcdir)/OnDiskPt/src -lmoses -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
|
||||
moses_chart_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.la $(top_srcdir)/OnDiskPt/src/libOnDiskPt.a
|
||||
|
||||
|
||||
|
||||
|
@ -1,10 +0,0 @@
|
||||
bin_PROGRAMS = moses lmbrgrid
|
||||
|
||||
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS)
|
||||
|
||||
moses_SOURCES = Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp LatticeMBR.cpp
|
||||
moses_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
|
||||
|
||||
|
||||
lmbrgrid_SOURCES = LatticeMBRGrid.cpp LatticeMBR.cpp IOWrapper.cpp
|
||||
lmbrgrid_LDADD = $(top_builddir)/moses/src/libmoses.la -L$(top_srcdir)/OnDiskPt/src -lOnDiskPt $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
|
@ -100,7 +100,7 @@ void LanguageModelImplementation::CalcScore(const Phrase &phrase, float &fullSco
|
||||
if (word == GetSentenceStartArray()) {
|
||||
// do nothing, don't include prob for <s> unigram
|
||||
if (currPos != 0) {
|
||||
std::cerr << "Your data contains <s> in a position other than the first word." << std::endl;
|
||||
std::cerr << "Either your data contains <s> in a position other than the first word or your language model is missing <s>. Did you build your ARPA using IRSTLM and forget to run add-start-end.sh?" << std::endl;
|
||||
abort();
|
||||
}
|
||||
} else {
|
||||
|
@ -6,7 +6,7 @@ if $(with-irstlm) != ""
|
||||
lib irstlm : : <search>$(with-irstlm)/lib ;
|
||||
obj IRST.o : IRST.cpp ..//headers : <include>$(with-irstlm)/include ;
|
||||
alias irst : IRST.o irstlm : : : <define>LM_IRST ;
|
||||
echo "" ;
|
||||
echo "Forcing single-threaded build because of IRSTLM." ;
|
||||
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
|
||||
echo "!!! You are linking the IRSTLM library; be sure the release is >= 5.70.02 !!!" ;
|
||||
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
|
||||
|
@ -198,7 +198,7 @@ template <class Model> void LanguageModelKen<Model>::CalcScore(const Phrase &phr
|
||||
} else {
|
||||
lm::WordIndex index = TranslateID(word);
|
||||
if (index == m_ngram->GetVocabulary().BeginSentence()) {
|
||||
std::cerr << "Your data contains <s> in a position other than the first word." << std::endl;
|
||||
std::cerr << "Either your data contains <s> in a position other than the first word or your language model is missing <s>. Did you build your ARPA using IRSTLM and forget to run add-start-end.sh?" << std::endl;
|
||||
abort();
|
||||
}
|
||||
float score = TransformLMScore(m_ngram->Score(*state0, index, *state1));
|
||||
|
@ -1,335 +0,0 @@
|
||||
lib_LTLIBRARIES = libmoses.la
|
||||
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS)
|
||||
|
||||
libmoses_ladir = ${includedir}
|
||||
|
||||
libmoses_la_HEADERS = \
|
||||
AlignmentInfo.h \
|
||||
AlignmentInfoCollection.h \
|
||||
BilingualDynSuffixArray.h \
|
||||
BitmapContainer.h \
|
||||
CellCollection.h \
|
||||
ChartCell.h \
|
||||
ChartCellCollection.h \
|
||||
ChartHypothesis.h \
|
||||
ChartHypothesisCollection.h \
|
||||
ChartManager.h \
|
||||
ChartRuleLookupManager.h \
|
||||
ChartRuleLookupManagerMemory.h \
|
||||
ChartRuleLookupManagerOnDisk.h \
|
||||
ChartTranslationOption.h \
|
||||
ChartTranslationOptionCollection.h \
|
||||
ChartTranslationOptionList.h \
|
||||
ChartTrellisDetour.h \
|
||||
ChartTrellisDetourQueue.h \
|
||||
ChartTrellisNode.h \
|
||||
ChartTrellisPath.h \
|
||||
ChartTrellisPathList.h \
|
||||
ConfusionNet.h \
|
||||
DecodeFeature.h \
|
||||
DecodeGraph.h \
|
||||
DecodeStep.h \
|
||||
DecodeStepGeneration.h \
|
||||
DecodeStepTranslation.h \
|
||||
Dictionary.h \
|
||||
DotChart.h \
|
||||
DotChartInMemory.h \
|
||||
DotChartOnDisk.h \
|
||||
DummyScoreProducers.h \
|
||||
DynSAInclude/file.h \
|
||||
DynSAInclude/vocab.h \
|
||||
DynSuffixArray.h \
|
||||
FFState.h \
|
||||
Factor.h \
|
||||
FactorCollection.h \
|
||||
FactorTypeSet.h \
|
||||
FeatureFunction.h \
|
||||
File.h \
|
||||
FilePtr.h \
|
||||
FloydWarshall.h \
|
||||
GenerationDictionary.h \
|
||||
GlobalLexicalModel.h \
|
||||
gzfilebuf.h \
|
||||
hash.h \
|
||||
Hypothesis.h \
|
||||
HypothesisStack.h \
|
||||
HypothesisStackCubePruning.h \
|
||||
HypothesisStackNormal.h \
|
||||
InputFileStream.h \
|
||||
InputType.h \
|
||||
LMList.h \
|
||||
LVoc.h \
|
||||
LM/Base.h \
|
||||
LM/Joint.h \
|
||||
LM/Factory.h \
|
||||
LM/Implementation.h \
|
||||
LM/MultiFactor.h \
|
||||
LM/Remote.h \
|
||||
LM/SingleFactor.h \
|
||||
LM/Ken.h \
|
||||
LexicalReordering.h \
|
||||
LexicalReorderingState.h \
|
||||
LexicalReorderingTable.h \
|
||||
Manager.h \
|
||||
NonTerminal.h \
|
||||
ObjectPool.h \
|
||||
PCNTools.h \
|
||||
PDTAimp.h \
|
||||
Parameter.h \
|
||||
PartialTranslOptColl.h \
|
||||
Phrase.h \
|
||||
PhraseDictionary.h \
|
||||
PhraseDictionaryALSuffixArray.h \
|
||||
PhraseDictionaryDynSuffixArray.h \
|
||||
PhraseDictionaryMemory.h \
|
||||
PhraseDictionarySCFG.h \
|
||||
PhraseDictionaryNode.h \
|
||||
PhraseDictionaryNodeSCFG.h \
|
||||
PhraseDictionaryOnDisk.h \
|
||||
PhraseDictionaryTree.h \
|
||||
PhraseDictionaryTreeAdaptor.h \
|
||||
PrefixTree.h \
|
||||
PrefixTreeMap.h \
|
||||
ReorderingConstraint.h \
|
||||
ReorderingStack.h \
|
||||
RuleCube.h \
|
||||
RuleCubeItem.h \
|
||||
RuleCubeQueue.h \
|
||||
RuleTableLoader.h \
|
||||
RuleTableLoaderCompact.h \
|
||||
RuleTableLoaderFactory.h \
|
||||
RuleTableLoaderHiero.h \
|
||||
RuleTableLoaderStandard.h \
|
||||
ScoreComponentCollection.h \
|
||||
ScoreIndexManager.h \
|
||||
ScoreProducer.h \
|
||||
Search.h \
|
||||
SearchCubePruning.h \
|
||||
SearchNormal.h \
|
||||
Sentence.h \
|
||||
SentenceStats.h \
|
||||
SquareMatrix.h \
|
||||
StaticData.h \
|
||||
TargetPhrase.h \
|
||||
TargetPhraseCollection.h \
|
||||
ThreadPool.h \
|
||||
Timer.h \
|
||||
TranslationOption.h \
|
||||
TranslationOptionCollection.h \
|
||||
TranslationOptionCollectionConfusionNet.h \
|
||||
TranslationOptionCollectionText.h \
|
||||
TranslationOptionList.h \
|
||||
TranslationSystem.h \
|
||||
TreeInput.h \
|
||||
TrellisPath.h \
|
||||
TrellisPathCollection.h \
|
||||
TrellisPathList.h \
|
||||
TypeDef.h \
|
||||
UniqueObject.h \
|
||||
UserMessage.h \
|
||||
Util.h \
|
||||
Word.h \
|
||||
WordLattice.h \
|
||||
WordsBitmap.h \
|
||||
WordsRange.h \
|
||||
XmlOption.h
|
||||
|
||||
if PROTOBUF
|
||||
libmoses_la_HEADERS += rule.pb.h hypergraph.pb.h
|
||||
endif
|
||||
|
||||
if SRI_LM
|
||||
libmoses_la_HEADERS += LM/SRI.h \
|
||||
LM/ParallelBackoff.h
|
||||
endif
|
||||
|
||||
if IRST_LM
|
||||
libmoses_la_HEADERS += LM/IRST.h
|
||||
endif
|
||||
|
||||
if RAND_LM
|
||||
libmoses_la_HEADERS += LM/Rand.h
|
||||
endif
|
||||
|
||||
if ORLM_LM
|
||||
libmoses_la_HEADERS += LM/ORLM.h \
|
||||
DynSAInclude/params.h \
|
||||
DynSAInclude/hash.h \
|
||||
DynSAInclude/quantizer.h \
|
||||
DynSAInclude/RandLMFilter.h \
|
||||
DynSAInclude/RandLMCache.h
|
||||
endif
|
||||
|
||||
if SYN_LM
|
||||
libmoses_la_HEADERS += SyntacticLanguageModel.h
|
||||
endif
|
||||
|
||||
libmoses_la_SOURCES = \
|
||||
AlignmentInfo.cpp \
|
||||
AlignmentInfoCollection.cpp \
|
||||
BilingualDynSuffixArray.cpp \
|
||||
BitmapContainer.cpp \
|
||||
ChartCell.cpp \
|
||||
ChartCellCollection.cpp \
|
||||
ChartHypothesis.cpp \
|
||||
ChartHypothesisCollection.cpp \
|
||||
ChartManager.cpp \
|
||||
ChartRuleLookupManager.cpp \
|
||||
ChartRuleLookupManagerMemory.cpp \
|
||||
ChartRuleLookupManagerOnDisk.cpp \
|
||||
ChartTranslationOption.cpp \
|
||||
ChartTranslationOptionCollection.cpp \
|
||||
ChartTranslationOptionList.cpp \
|
||||
ChartTrellisDetour.cpp \
|
||||
ChartTrellisDetourQueue.cpp \
|
||||
ChartTrellisNode.cpp \
|
||||
ChartTrellisPath.cpp \
|
||||
ConfusionNet.cpp \
|
||||
DecodeFeature.cpp \
|
||||
DecodeGraph.cpp \
|
||||
DecodeStep.cpp \
|
||||
DecodeStepGeneration.cpp \
|
||||
DecodeStepTranslation.cpp \
|
||||
Dictionary.cpp \
|
||||
DotChart.cpp \
|
||||
DotChartInMemory.cpp \
|
||||
DotChartOnDisk.cpp \
|
||||
DummyScoreProducers.cpp \
|
||||
DynSAInclude/file.cpp \
|
||||
DynSAInclude/vocab.cpp \
|
||||
DynSuffixArray.cpp \
|
||||
FFState.cpp \
|
||||
Factor.cpp \
|
||||
FactorCollection.cpp \
|
||||
FactorTypeSet.cpp \
|
||||
FeatureFunction.cpp \
|
||||
FloydWarshall.cpp \
|
||||
GenerationDictionary.cpp \
|
||||
GlobalLexicalModel.cpp \
|
||||
hash.cpp \
|
||||
Hypothesis.cpp \
|
||||
HypothesisStack.cpp \
|
||||
HypothesisStackCubePruning.cpp \
|
||||
HypothesisStackNormal.cpp \
|
||||
InputFileStream.cpp \
|
||||
InputType.cpp \
|
||||
LMList.cpp \
|
||||
LVoc.cpp \
|
||||
LM/Base.cpp \
|
||||
LM/Factory.cpp \
|
||||
LM/Implementation.cpp \
|
||||
LM/Joint.cpp \
|
||||
LM/Ken.cpp \
|
||||
LM/MultiFactor.cpp \
|
||||
LM/Remote.cpp \
|
||||
LM/SingleFactor.cpp \
|
||||
LexicalReordering.cpp \
|
||||
LexicalReorderingState.cpp \
|
||||
LexicalReorderingTable.cpp \
|
||||
Manager.cpp \
|
||||
PCNTools.cpp \
|
||||
Parameter.cpp \
|
||||
PartialTranslOptColl.cpp \
|
||||
Phrase.cpp \
|
||||
PhraseDictionary.cpp \
|
||||
PhraseDictionaryALSuffixArray.cpp \
|
||||
PhraseDictionaryDynSuffixArray.cpp \
|
||||
PhraseDictionaryHiero.cpp \
|
||||
PhraseDictionaryMemory.cpp \
|
||||
PhraseDictionarySCFG.cpp \
|
||||
PhraseDictionaryNode.cpp \
|
||||
PhraseDictionaryNodeSCFG.cpp \
|
||||
PhraseDictionaryOnDisk.cpp \
|
||||
PhraseDictionaryTree.cpp \
|
||||
PhraseDictionaryTreeAdaptor.cpp \
|
||||
PrefixTreeMap.cpp \
|
||||
ReorderingConstraint.cpp \
|
||||
ReorderingStack.cpp \
|
||||
RuleCube.cpp \
|
||||
RuleCubeItem.cpp \
|
||||
RuleCubeQueue.cpp \
|
||||
RuleTableLoaderCompact.cpp \
|
||||
RuleTableLoaderFactory.cpp \
|
||||
RuleTableLoaderHiero.cpp \
|
||||
RuleTableLoaderStandard.cpp \
|
||||
ScoreComponentCollection.cpp \
|
||||
ScoreIndexManager.cpp \
|
||||
ScoreProducer.cpp \
|
||||
Search.cpp \
|
||||
SearchCubePruning.cpp \
|
||||
SearchNormal.cpp \
|
||||
Sentence.cpp \
|
||||
SentenceStats.cpp \
|
||||
SquareMatrix.cpp \
|
||||
StaticData.cpp \
|
||||
TargetPhrase.cpp \
|
||||
TargetPhraseCollection.cpp \
|
||||
ThreadPool.cpp \
|
||||
Timer.cpp \
|
||||
TranslationOption.cpp \
|
||||
TranslationOptionCollection.cpp \
|
||||
TranslationOptionCollectionConfusionNet.cpp \
|
||||
TranslationOptionCollectionText.cpp \
|
||||
TranslationOptionList.cpp \
|
||||
TranslationSystem.cpp \
|
||||
TreeInput.cpp \
|
||||
TrellisPath.cpp \
|
||||
TrellisPathCollection.cpp \
|
||||
UserMessage.cpp \
|
||||
Util.cpp \
|
||||
Word.cpp \
|
||||
WordLattice.cpp \
|
||||
WordsBitmap.cpp \
|
||||
WordsRange.cpp \
|
||||
XmlOption.cpp
|
||||
|
||||
if PROTOBUF
|
||||
BUILT_SOURCES = \
|
||||
rule.pb.h \
|
||||
rule.pb.cc \
|
||||
hypergraph.pb.h \
|
||||
hypergraph.pb.cc
|
||||
|
||||
CLEANFILES = $(BUILT_SOURCES)
|
||||
SUFFIXES = .proto
|
||||
|
||||
rule.pb.cc: rule.proto
|
||||
@PROTOC@ --cpp_out=. $<
|
||||
rule.pb.h: rule.proto
|
||||
@PROTOC@ --cpp_out=. $<
|
||||
|
||||
hypergraph.pb.cc: hypergraph.proto
|
||||
@PROTOC@ --cpp_out=. $<
|
||||
hypergraph.pb.h: hypergraph.proto
|
||||
@PROTOC@ --cpp_out=. $<
|
||||
|
||||
libmoses_la_SOURCES += rule.pb.cc hypergraph.pb.cc
|
||||
|
||||
endif
|
||||
|
||||
if SRI_LM
|
||||
libmoses_la_SOURCES += LM/SRI.cpp \
|
||||
LM/ParallelBackoff.cpp
|
||||
|
||||
endif
|
||||
|
||||
if IRST_LM
|
||||
libmoses_la_SOURCES += LM/IRST.cpp
|
||||
endif
|
||||
|
||||
if RAND_LM
|
||||
libmoses_la_SOURCES += LM/Rand.cpp
|
||||
endif
|
||||
|
||||
if ORLM_LM
|
||||
libmoses_la_SOURCES += LM/ORLM.cpp \
|
||||
DynSAInclude/onlineRLM.h \
|
||||
DynSAInclude/perfecthash.h \
|
||||
DynSAInclude/params.cpp
|
||||
endif
|
||||
|
||||
if SYN_LM
|
||||
libmoses_la_SOURCES += SyntacticLanguageModel.cpp
|
||||
endif
|
||||
|
||||
libmoses_la_LIBADD = $(top_srcdir)/util/libkenutil.la $(top_srcdir)/lm/libkenlm.la $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
|
@ -1,109 +0,0 @@
|
||||
#!/bin/bash
|
||||
cat <<EOF
|
||||
Moses is moving to Boost Jam. To build Moses, run one command:
|
||||
./bjam [--with-srilm=/path/to/srilm] [--with-irstlm=/path/to/irstlm] -j4
|
||||
|
||||
If that's not working for you, complain to moses-support then run
|
||||
./regenerate-makefiles.sh --force to continue using autotools.
|
||||
EOF
|
||||
if [ z"$1" != z--force ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# NOTE:
|
||||
# Versions 1.9 (or higher) of aclocal and automake are required.
|
||||
# And version >=2.60 of autoconf
|
||||
# And version >=1.4.7 of m4
|
||||
|
||||
# For Mac OSX users:
|
||||
# Standard distribution usually includes versions 1.6.
|
||||
# Get versions 1.9 or higher
|
||||
# Set the following variable to the correct paths
|
||||
#ACLOCAL="/path/to/aclocal-1.9"
|
||||
#AUTOMAKE="/path/to/automake-1.9"
|
||||
|
||||
function die () {
|
||||
echo "$@" >&2
|
||||
|
||||
# Try to be as helpful as possible by detecting OS and making recommendations
|
||||
if (( $(lsb_release -a | fgrep -ci "ubuntu") > 0 )); then
|
||||
echo >&2
|
||||
echo >&2 "Need to install build autotools on Ubuntu? Use:"
|
||||
echo >&2 "sudo aptitude install autoconf automake libtool build-essential"
|
||||
fi
|
||||
if (( $(uname -a | fgrep -ci "darwin") > 0 )); then
|
||||
echo >&2
|
||||
echo >&2 "Having problems on Mac OSX?"
|
||||
echo >&2 "You might have an old version of aclocal/automake. You'll need to upgrade these."
|
||||
fi
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [ -z "$ACLOCAL" ]; then
|
||||
ACLOCAL=`which aclocal`
|
||||
[ -n "$ACLOCAL" ] || die "aclocal not found on your system. Please install it or set $ACLOCAL"
|
||||
fi
|
||||
|
||||
if [ -z "$AUTOMAKE" ]; then
|
||||
AUTOMAKE=`which automake`
|
||||
[ -n "$AUTOMAKE" ] || die "automake not found on your system. Please install it or set $AUTOMAKE"
|
||||
fi
|
||||
|
||||
if [ -z "$AUTOCONF" ]; then
|
||||
AUTOCONF=`which autoconf`
|
||||
[ -n "$AUTOCONF" ] || die "autoconf not found on your system. Please install it or set $AUTOCONF"
|
||||
fi
|
||||
|
||||
if [ -z "$LIBTOOLIZE" ]; then
|
||||
LIBTOOLIZE=`which libtoolize`
|
||||
|
||||
if [ -z "$LIBTOOLIZE" ]; then
|
||||
LIBTOOLIZE=`which glibtoolize`
|
||||
fi
|
||||
|
||||
[ -n "$LIBTOOLIZE" ] || die "libtoolize/glibtoolize not found on your system. Please install it or set $LIBTOOLIZE"
|
||||
fi
|
||||
|
||||
echo >&2 "Detected aclocal: $($ACLOCAL --version | head -n1)"
|
||||
echo >&2 "Detected autoconf: $($AUTOCONF --version | head -n1)"
|
||||
echo >&2 "Detected automake: $($AUTOMAKE --version | head -n1)"
|
||||
echo >&2 "Detected libtoolize: $($LIBTOOLIZE --version | head -n1)"
|
||||
|
||||
echo "Calling $ACLOCAL -I m4..."
|
||||
$ACLOCAL -I m4 || die "aclocal failed"
|
||||
|
||||
echo "Calling $AUTOCONF..."
|
||||
$AUTOCONF || die "autoconf failed"
|
||||
|
||||
echo "Calling $LIBTOOLIZE"
|
||||
$LIBTOOLIZE || die "libtoolize failed"
|
||||
|
||||
echo "Calling $AUTOMAKE --add-missing..."
|
||||
$AUTOMAKE --add-missing || die "automake failed"
|
||||
|
||||
case `uname -s` in
|
||||
Darwin)
|
||||
cores=$(sysctl -n hw.ncpu)
|
||||
;;
|
||||
Linux)
|
||||
cores=$(cat /proc/cpuinfo | fgrep -c processor)
|
||||
;;
|
||||
*)
|
||||
echo "Unknown platform."
|
||||
cores=
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ -z "$cores" ]; then
|
||||
cores=2 # assume 2 cores if we can't figure it out
|
||||
echo >&2 "Assuming 2 cores"
|
||||
else
|
||||
echo >&2 "Detected $cores cores"
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "You should now be able to configure and build:"
|
||||
echo " ./configure [--with-srilm=/path/to/srilm] [--with-irstlm=/path/to/irstlm] [--with-randlm=/path/to/randlm] [--with-synlm] [--with-xmlrpc-c=/path/to/xmlrpc-c-config]"
|
||||
echo " make -j ${cores}"
|
||||
echo
|
||||
|
@ -1 +0,0 @@
|
||||
*.pyc
|
@ -1,6 +1,7 @@
|
||||
#See ../Jamroot for options.
|
||||
import option ;
|
||||
|
||||
build-project ems/biconcor ;
|
||||
build-project training ;
|
||||
|
||||
with-giza = [ option.get "with-giza" ] ;
|
||||
@ -37,91 +38,21 @@ if $(location) {
|
||||
install ghkm : training/phrase-extract/extract-ghkm//extract-ghkm : <location>$(location)/training/phrase-extract/extract-ghkm/tools ;
|
||||
install compactify : training/compact-rule-table//compactify : <location>$(location)/training/compact-rule-table/tools ;
|
||||
|
||||
install phrase-extract : training/phrase-extract//released-programs : <location>$(location)/training/phrase-extract ;
|
||||
install phrase-extract : training/phrase-extract//programs : <location>$(location)/training/phrase-extract ;
|
||||
install lexical-reordering : training/lexical-reordering//score : <location>$(location)/training/lexical-reordering ;
|
||||
install symal : training/symal//symal : <location>$(location)/symal ;
|
||||
install symal : training/symal//symal : <location>$(location)/training/symal ;
|
||||
|
||||
install biconcor : ems/biconcor//biconcor : <location>$(location)/ems/biconcor ;
|
||||
|
||||
if $(WITH-GIZA) != no {
|
||||
install train-model : training//train-model.perl : <location>$(location)/training ;
|
||||
} else {
|
||||
alias train-model ;
|
||||
}
|
||||
|
||||
install scripts :
|
||||
analysis/README
|
||||
analysis/sentence-by-sentence.pl
|
||||
[ glob-tree README *.js *.pl *.perl *.pm *.py *.sh *.php : tests regression-testing other bin train_model.perl ]
|
||||
[ glob tokenizer/nonbreaking_prefixes/* ems/example/*.* ems/example/data/* ems/web/* analysis/smtgui/* : ems/web/javascripts ]
|
||||
generic/fsa-sample.fsa
|
||||
ems/experiment.machines
|
||||
ems/experiment.meta
|
||||
ems/experiment.perl
|
||||
ems/example/config.basic
|
||||
ems/example/config.factored
|
||||
ems/example/config.hierarchical
|
||||
ems/example/config.syntax
|
||||
ems/example/config.toy
|
||||
ems/example/data/nc-5k.en
|
||||
ems/example/data/nc-5k.fr
|
||||
ems/example/data/test-ref.en.sgm
|
||||
ems/example/data/test-src.fr.sgm
|
||||
ems/support/analysis.perl
|
||||
ems/support/berkeley-process.sh
|
||||
ems/support/berkeley-train.sh
|
||||
ems/support/consolidate-training-data.perl
|
||||
ems/support/generic-multicore-parallelizer.perl
|
||||
ems/support/generic-parallelizer.perl
|
||||
ems/support/input-from-sgm.perl
|
||||
ems/support/interpolate-lm.perl
|
||||
ems/support/reference-from-sgm.perl
|
||||
ems/support/remove-segmenation-markup.perl
|
||||
ems/support/report-experiment-scores.perl
|
||||
ems/support/reuse-weights.perl
|
||||
ems/support/run-command-on-multiple-refsets.perl
|
||||
ems/support/wrap-xml.perl
|
||||
ems/web/analysis.php
|
||||
ems/web/analysis_diff.php
|
||||
ems/web/comment.php
|
||||
ems/web/diff.php
|
||||
ems/web/index.php
|
||||
ems/web/lib.php
|
||||
ems/web/overview.php
|
||||
ems/web/setup
|
||||
ems/web/javascripts/builder.js
|
||||
ems/web/javascripts/controls.js
|
||||
ems/web/javascripts/dragdrop.js
|
||||
ems/web/javascripts/effects.js
|
||||
ems/web/javascripts/prototype.js
|
||||
ems/web/javascripts/scriptaculous.js
|
||||
ems/web/javascripts/slider.js
|
||||
ems/web/javascripts/sound.js
|
||||
ems/web/javascripts/unittest.js
|
||||
generic/compound-splitter.perl
|
||||
generic/extract-factors.pl
|
||||
generic/lopar2pos.pl
|
||||
generic/moses-parallel.pl
|
||||
generic/mteval-v12.pl
|
||||
generic/multi-bleu.perl
|
||||
generic/qsub-wrapper.pl
|
||||
README
|
||||
[ glob tokenizer/*.perl tokenizer/nonbreaking_prefixes/* ]
|
||||
training/absolutize_moses_model.pl
|
||||
training/build-generation-table.perl
|
||||
training/clean-corpus-n.perl
|
||||
training/clone_moses_model.pl
|
||||
training/filter-model-given-input.pl
|
||||
training/filter-rule-table.py
|
||||
training/zmert-moses.pl
|
||||
training/mert-moses.pl
|
||||
training/mert-moses-multi.pl
|
||||
training/postprocess-lopar.perl
|
||||
training/reduce_combine.pl
|
||||
training/combine_factors.pl
|
||||
training/symal/giza2bal.pl
|
||||
training/wrappers/parse-de-bitpar.perl
|
||||
training/wrappers/parse-en-collins.perl
|
||||
training/wrappers/make-factor-en-pos.mxpost.perl
|
||||
training/wrappers/make-factor-pos.tree-tagger.perl
|
||||
training/wrappers/make-factor-stem.perl
|
||||
[ glob recaser/*.perl ]
|
||||
: <install-source-root>. <location>$(location) ;
|
||||
|
||||
alias install : ghkm compactify phrase-extract lexical-reordering symal scripts train-model ;
|
||||
}
|
||||
|
@ -1,3 +1,3 @@
|
||||
exe biconcur : Vocabulary.cpp SuffixArray.cpp TargetCorpus.cpp Alignment.cpp Mismatch.cpp PhrasePair.cpp PhrasePairCollection.cpp biconcor.cpp base64.cpp ;
|
||||
exe biconcor : Vocabulary.cpp SuffixArray.cpp TargetCorpus.cpp Alignment.cpp Mismatch.cpp PhrasePair.cpp PhrasePairCollection.cpp biconcor.cpp base64.cpp ;
|
||||
|
||||
install legacy : biconcur : <location>. ;
|
||||
install legacy : biconcor : <location>. ;
|
||||
|
@ -8,7 +8,7 @@ binmode(STDIN, ":utf8");
|
||||
binmode(STDOUT, ":utf8");
|
||||
|
||||
# apply switches
|
||||
my ($DIR,$CORPUS,$SCRIPTS_ROOT_DIR,$CONFIG);
|
||||
my ($DIR,$CORPUS,$SCRIPTS_ROOT_DIR,$CONFIG,$HELP,$ERROR);
|
||||
my $LM = "SRILM"; # SRILM is default.
|
||||
my $BUILD_LM = "build-lm.sh";
|
||||
my $NGRAM_COUNT = "ngram-count";
|
||||
@ -16,24 +16,66 @@ my $TRAIN_SCRIPT = "train-factored-phrase-model.perl";
|
||||
my $MAX_LEN = 1;
|
||||
my $FIRST_STEP = 1;
|
||||
my $LAST_STEP = 11;
|
||||
die("train-recaser.perl --dir recaser --corpus cased")
|
||||
$ERROR = "training Aborted."
|
||||
unless &GetOptions('first-step=i' => \$FIRST_STEP,
|
||||
'last-step=i' => \$LAST_STEP,
|
||||
'corpus=s' => \$CORPUS,
|
||||
'config=s' => \$CONFIG,
|
||||
'dir=s' => \$DIR,
|
||||
'ngram-count=s' => \$NGRAM_COUNT,
|
||||
'build-lm=s' => \$BUILD_LM,
|
||||
'lm=s' => \$LM,
|
||||
'train-script=s' => \$TRAIN_SCRIPT,
|
||||
'scripts-root-dir=s' => \$SCRIPTS_ROOT_DIR,
|
||||
'max-len=i' => \$MAX_LEN);
|
||||
'dir=s' => \$DIR,
|
||||
'ngram-count=s' => \$NGRAM_COUNT,
|
||||
'build-lm=s' => \$BUILD_LM,
|
||||
'lm=s' => \$LM,
|
||||
'train-script=s' => \$TRAIN_SCRIPT,
|
||||
'scripts-root-dir=s' => \$SCRIPTS_ROOT_DIR,
|
||||
'max-len=i' => \$MAX_LEN,
|
||||
'help' => \$HELP);
|
||||
|
||||
# check and set default to unset parameters
|
||||
die("please specify working dir --dir") unless defined($DIR);
|
||||
die("please specify --corpus") if !defined($CORPUS)
|
||||
$ERROR = "please specify working dir --dir" unless defined($DIR) || defined($HELP);
|
||||
$ERROR = "please specify --corpus" if !defined($CORPUS) && !defined($HELP)
|
||||
&& $FIRST_STEP <= 2 && $LAST_STEP >= 1;
|
||||
|
||||
if ($HELP || $ERROR) {
|
||||
if ($ERROR) {
|
||||
print STDERR "ERROR: " . $ERROR . "\n";
|
||||
}
|
||||
print STDERR "Usage: $0 --dir /output/recaser --corpus /Cased/corpus/files [options ...]";
|
||||
|
||||
print STDERR "\n\nOptions:
|
||||
== MANDATORY ==
|
||||
--dir=dir ... outputted recaser directory.
|
||||
--corpus=file ... inputted cased corpus.
|
||||
|
||||
== OPTIONAL ==
|
||||
= Recaser Training configuration =
|
||||
--train-script=file ... path to the train script (default: train-factored-phrase-model.perl in \$PATH).
|
||||
--config=config ... training script configuration.
|
||||
--scripts-root-dir=dir ... scripts directory.
|
||||
--max-len=int ... max phrase length (default: 1).
|
||||
|
||||
= Language Model Training configuration =
|
||||
--lm=[IRSTLM,SRILM] ... language model (default: SRILM).
|
||||
--build-lm=file ... path to build-lm.sh if not in \$PATH (used only with --lm=IRSTLM).
|
||||
--ngram-count=file ... path to ngram-count.sh if not in \$PATH (used only with --lm=SRILM).
|
||||
|
||||
= Steps this script will perform =
|
||||
(1) Truecasing (disabled);
|
||||
(2) Language Model Training;
|
||||
(3) Data Preparation
|
||||
(4-10) Recaser Model Training;
|
||||
(11) Cleanup.
|
||||
--first-step=[1-11] ... step where script starts (default: 1).
|
||||
--last-step=[1-11] ... step where script ends (default: 11).
|
||||
|
||||
--help ... this usage output.\n";
|
||||
if ($ERROR) {
|
||||
exit(1);
|
||||
}
|
||||
else {
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
# main loop
|
||||
`mkdir -p $DIR`;
|
||||
&truecase() if 0 && $FIRST_STEP == 1;
|
||||
@ -60,7 +102,7 @@ sub train_lm {
|
||||
}
|
||||
print STDERR "** Using $LM **" . "\n";
|
||||
print STDERR $cmd."\n";
|
||||
print STDERR `$cmd`;
|
||||
system($cmd) == 0 || die("Language model training failed with error " . ($? >> 8) . "\n");
|
||||
}
|
||||
|
||||
sub prepare_data {
|
||||
@ -110,12 +152,18 @@ sub train_recase_model {
|
||||
$cmd .= " -scripts-root-dir $SCRIPTS_ROOT_DIR" if $SCRIPTS_ROOT_DIR;
|
||||
$cmd .= " -config $CONFIG" if $CONFIG;
|
||||
print STDERR $cmd."\n";
|
||||
print STDERR `$cmd`;
|
||||
system($cmd) == 0 || die("Recaser model training failed with error " . ($? >> 8) . "\n");
|
||||
}
|
||||
|
||||
sub cleanup {
|
||||
print STDERR "\n(11) Cleaning up @ ".`date`;
|
||||
`rm -f $DIR/extract*`;
|
||||
my $clean_1 = $?;
|
||||
`rm -f $DIR/aligned*`;
|
||||
my $clean_2 = $?;
|
||||
`rm -f $DIR/lex*`;
|
||||
my $clean_3 = $?;
|
||||
if ($clean_1 + $clean_2 + $clean_3 != 0) {
|
||||
print STDERR "Training successful but some files could not be cleaned.\n";
|
||||
}
|
||||
}
|
||||
|
5
scripts/tokenizer/nonbreaking_prefixes/README.txt
Normal file
5
scripts/tokenizer/nonbreaking_prefixes/README.txt
Normal file
@ -0,0 +1,5 @@
|
||||
The language suffix can be found here:
|
||||
|
||||
http://www.loc.gov/standards/iso639-2/php/code_list.php
|
||||
|
||||
|
@ -19,9 +19,7 @@ exe relax-parse : tables-core.cpp SyntaxTree.cpp XmlTree.cpp relax-parse.cpp ;
|
||||
|
||||
exe statistics : tables-core.cpp AlignmentPhrase.cpp statistics.cpp InputFileStream ;
|
||||
|
||||
alias released-programs : extract extract-rules score consolidate ;
|
||||
|
||||
alias programs : extract extract-rules extract-lex score consolidate consolidate-direct consolidate-direct consolidate-reverse relax-parse statistics ;
|
||||
alias programs : extract extract-rules extract-lex score consolidate consolidate-direct consolidate-reverse relax-parse statistics ;
|
||||
|
||||
install legacy : programs : <location>. <install-type>EXE ;
|
||||
|
||||
|
@ -1,12 +0,0 @@
|
||||
lib_LTLIBRARIES = libkenutil.la
|
||||
|
||||
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS)
|
||||
|
||||
libkenutil_la_SOURCES = \
|
||||
bit_packing.cc \
|
||||
ersatz_progress.cc \
|
||||
exception.cc \
|
||||
file.cc \
|
||||
file_piece.cc \
|
||||
murmur_hash.cc \
|
||||
mmap.cc
|
Loading…
Reference in New Issue
Block a user