mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
Merge moses-server branch (includes mt moses) into trunk.
Plain (single-thread) moses should configure and build as before. Multi-thread and server only available if appropriate options are selected at configure/compile time. git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2477 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
f75e3993ac
commit
c5d39f039f
@ -6,4 +6,7 @@ AUTOMAKE_OPTIONS = foreign
|
|||||||
if WITH_MERT
|
if WITH_MERT
|
||||||
MERT = mert
|
MERT = mert
|
||||||
endif
|
endif
|
||||||
SUBDIRS = moses/src moses-cmd/src misc $(MERT)
|
if WITH_SERVER
|
||||||
|
SERVER = server
|
||||||
|
endif
|
||||||
|
SUBDIRS = moses/src moses-cmd/src misc $(MERT) $(SERVER)
|
||||||
|
1463
config.guess
vendored
Executable file
1463
config.guess
vendored
Executable file
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,11 @@
|
|||||||
/* config.h.in. Generated from configure.in by autoheader. */
|
/* config.h.in. Generated from configure.in by autoheader. */
|
||||||
|
|
||||||
|
/* define if the Boost library is available */
|
||||||
|
#undef HAVE_BOOST
|
||||||
|
|
||||||
|
/* define if the Boost::Thread library is available */
|
||||||
|
#undef HAVE_BOOST_THREAD
|
||||||
|
|
||||||
/* Define to 1 if you have the <getopt.h> header file. */
|
/* Define to 1 if you have the <getopt.h> header file. */
|
||||||
#undef HAVE_GETOPT_H
|
#undef HAVE_GETOPT_H
|
||||||
|
|
||||||
|
1579
config.sub
vendored
Executable file
1579
config.sub
vendored
Executable file
File diff suppressed because it is too large
Load Diff
29
configure.in
29
configure.in
@ -8,6 +8,8 @@ AC_LANG_CPLUSPLUS
|
|||||||
AC_PROG_RANLIB
|
AC_PROG_RANLIB
|
||||||
#AM_PROG_LIBTOOL
|
#AM_PROG_LIBTOOL
|
||||||
|
|
||||||
|
AX_XMLRPC_C
|
||||||
|
|
||||||
AC_ARG_WITH(protobuf,
|
AC_ARG_WITH(protobuf,
|
||||||
[AC_HELP_STRING([--with-protobuf=PATH], [(optional) path to Google protobuf])],
|
[AC_HELP_STRING([--with-protobuf=PATH], [(optional) path to Google protobuf])],
|
||||||
[with_protobuf=$withval],
|
[with_protobuf=$withval],
|
||||||
@ -42,11 +44,29 @@ AC_ARG_ENABLE(optimization,
|
|||||||
[CPPFLAGS="$CPPFLAGS -O3"; LDFLAGS="$LDFLAGS -O3" ]
|
[CPPFLAGS="$CPPFLAGS -O3"; LDFLAGS="$LDFLAGS -O3" ]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
AC_ARG_ENABLE(threads,
|
||||||
|
[AC_HELP_STRING([--enable-threads], [compile threadsafe library and multi-threaded moses (mosesmt)])],
|
||||||
|
[with_threads=yes]
|
||||||
|
)
|
||||||
|
|
||||||
AM_CONDITIONAL([INTERNAL_LM], false)
|
AM_CONDITIONAL([INTERNAL_LM], false)
|
||||||
AM_CONDITIONAL([SRI_LM], false)
|
AM_CONDITIONAL([SRI_LM], false)
|
||||||
AM_CONDITIONAL([IRST_LM], false)
|
AM_CONDITIONAL([IRST_LM], false)
|
||||||
AM_CONDITIONAL([RAND_LM], false)
|
AM_CONDITIONAL([RAND_LM], false)
|
||||||
AM_CONDITIONAL([PROTOBUF], false)
|
AM_CONDITIONAL([PROTOBUF], false)
|
||||||
|
AM_CONDITIONAL([am__fastdepCC], false)
|
||||||
|
AM_CONDITIONAL([WITH_THREADS],false)
|
||||||
|
|
||||||
|
if test "x$with_threads" = 'xyes'
|
||||||
|
then
|
||||||
|
AC_MSG_NOTICE([Building threaded moses])
|
||||||
|
AX_BOOST_BASE([1.35.0])
|
||||||
|
AX_BOOST_THREAD
|
||||||
|
CPPFLAGS="$CPPFLAGS -DWITH_THREADS"
|
||||||
|
AM_CONDITIONAL([WITH_THREADS],true)
|
||||||
|
else
|
||||||
|
AC_MSG_NOTICE([Building non-threaded moses. This will disable the moses server])
|
||||||
|
fi
|
||||||
|
|
||||||
if test "x$with_protobuf" != 'xno'
|
if test "x$with_protobuf" != 'xno'
|
||||||
then
|
then
|
||||||
@ -136,9 +156,16 @@ AC_CHECK_HEADERS([getopt.h],
|
|||||||
[AM_CONDITIONAL([WITH_MERT],true)],
|
[AM_CONDITIONAL([WITH_MERT],true)],
|
||||||
[AC_MSG_WARN([Cannot find getopt.h - disabling new mert])])
|
[AC_MSG_WARN([Cannot find getopt.h - disabling new mert])])
|
||||||
|
|
||||||
|
AM_CONDITIONAL([WITH_SERVER],false)
|
||||||
|
if test "x$have_xmlrpc_c" = "xyes" && test "x$with_threads" = "xyes"; then
|
||||||
|
AM_CONDITIONAL([WITH_SERVER],true)
|
||||||
|
else
|
||||||
|
AC_MSG_NOTICE([Disabling server])
|
||||||
|
fi
|
||||||
|
|
||||||
LIBS="$LIBS -lz"
|
LIBS="$LIBS -lz"
|
||||||
|
|
||||||
|
|
||||||
AC_CONFIG_FILES(Makefile moses/src/Makefile moses-cmd/src/Makefile misc/Makefile mert/Makefile)
|
AC_CONFIG_FILES(Makefile moses/src/Makefile moses-cmd/src/Makefile misc/Makefile mert/Makefile server/Makefile)
|
||||||
|
|
||||||
AC_OUTPUT()
|
AC_OUTPUT()
|
||||||
|
223
m4/ax_boost_base.m4
Normal file
223
m4/ax_boost_base.m4
Normal file
@ -0,0 +1,223 @@
|
|||||||
|
# ===========================================================================
|
||||||
|
# http://autoconf-archive.cryp.to/ax_boost_base.html
|
||||||
|
# ===========================================================================
|
||||||
|
#
|
||||||
|
# SYNOPSIS
|
||||||
|
#
|
||||||
|
# AX_BOOST_BASE([MINIMUM-VERSION])
|
||||||
|
#
|
||||||
|
# DESCRIPTION
|
||||||
|
#
|
||||||
|
# Test for the Boost C++ libraries of a particular version (or newer)
|
||||||
|
#
|
||||||
|
# If no path to the installed boost library is given the macro searchs
|
||||||
|
# under /usr, /usr/local, /opt and /opt/local and evaluates the
|
||||||
|
# $BOOST_ROOT environment variable. Further documentation is available at
|
||||||
|
# <http://randspringer.de/boost/index.html>.
|
||||||
|
#
|
||||||
|
# This macro calls:
|
||||||
|
#
|
||||||
|
# AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS)
|
||||||
|
#
|
||||||
|
# And sets:
|
||||||
|
#
|
||||||
|
# HAVE_BOOST
|
||||||
|
#
|
||||||
|
# LAST MODIFICATION
|
||||||
|
#
|
||||||
|
# 2008-04-12
|
||||||
|
#
|
||||||
|
# COPYLEFT
|
||||||
|
#
|
||||||
|
# Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
|
||||||
|
#
|
||||||
|
# Copying and distribution of this file, with or without modification, are
|
||||||
|
# permitted in any medium without royalty provided the copyright notice
|
||||||
|
# and this notice are preserved.
|
||||||
|
|
||||||
|
AC_DEFUN([AX_BOOST_BASE],
|
||||||
|
[
|
||||||
|
AC_ARG_WITH([boost],
|
||||||
|
AS_HELP_STRING([--with-boost@<:@=DIR@:>@], [use boost (default is yes) - it is possible to specify the root directory for boost (optional)]),
|
||||||
|
[
|
||||||
|
if test "$withval" = "no"; then
|
||||||
|
want_boost="no"
|
||||||
|
elif test "$withval" = "yes"; then
|
||||||
|
want_boost="yes"
|
||||||
|
ac_boost_path=""
|
||||||
|
else
|
||||||
|
want_boost="yes"
|
||||||
|
ac_boost_path="$withval"
|
||||||
|
fi
|
||||||
|
],
|
||||||
|
[want_boost="yes"])
|
||||||
|
|
||||||
|
|
||||||
|
AC_ARG_WITH([boost-libdir],
|
||||||
|
AS_HELP_STRING([--with-boost-libdir=LIB_DIR],
|
||||||
|
[Force given directory for boost libraries. Note that this will overwrite library path detection, so use this parameter only if default library detection fails and you know exactly where your boost libraries are located.]),
|
||||||
|
[
|
||||||
|
if test -d $withval
|
||||||
|
then
|
||||||
|
ac_boost_lib_path="$withval"
|
||||||
|
else
|
||||||
|
AC_MSG_ERROR(--with-boost-libdir expected directory name)
|
||||||
|
fi
|
||||||
|
],
|
||||||
|
[ac_boost_lib_path=""]
|
||||||
|
)
|
||||||
|
|
||||||
|
if test "x$want_boost" = "xyes"; then
|
||||||
|
boost_lib_version_req=ifelse([$1], ,1.20.0,$1)
|
||||||
|
boost_lib_version_req_shorten=`expr $boost_lib_version_req : '\([[0-9]]*\.[[0-9]]*\)'`
|
||||||
|
boost_lib_version_req_major=`expr $boost_lib_version_req : '\([[0-9]]*\)'`
|
||||||
|
boost_lib_version_req_minor=`expr $boost_lib_version_req : '[[0-9]]*\.\([[0-9]]*\)'`
|
||||||
|
boost_lib_version_req_sub_minor=`expr $boost_lib_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'`
|
||||||
|
if test "x$boost_lib_version_req_sub_minor" = "x" ; then
|
||||||
|
boost_lib_version_req_sub_minor="0"
|
||||||
|
fi
|
||||||
|
WANT_BOOST_VERSION=`expr $boost_lib_version_req_major \* 100000 \+ $boost_lib_version_req_minor \* 100 \+ $boost_lib_version_req_sub_minor`
|
||||||
|
AC_MSG_CHECKING(for boostlib >= $boost_lib_version_req)
|
||||||
|
succeeded=no
|
||||||
|
|
||||||
|
dnl first we check the system location for boost libraries
|
||||||
|
dnl this location ist chosen if boost libraries are installed with the --layout=system option
|
||||||
|
dnl or if you install boost with RPM
|
||||||
|
if test "$ac_boost_path" != ""; then
|
||||||
|
BOOST_LDFLAGS="-L$ac_boost_path/lib"
|
||||||
|
BOOST_CPPFLAGS="-I$ac_boost_path/include"
|
||||||
|
else
|
||||||
|
for ac_boost_path_tmp in /usr /usr/local /opt /opt/local ; do
|
||||||
|
if test -d "$ac_boost_path_tmp/include/boost" && test -r "$ac_boost_path_tmp/include/boost"; then
|
||||||
|
BOOST_LDFLAGS="-L$ac_boost_path_tmp/lib"
|
||||||
|
BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include"
|
||||||
|
break;
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
dnl overwrite ld flags if we have required special directory with
|
||||||
|
dnl --with-boost-libdir parameter
|
||||||
|
if test "$ac_boost_lib_path" != ""; then
|
||||||
|
BOOST_LDFLAGS="-L$ac_boost_lib_path"
|
||||||
|
fi
|
||||||
|
|
||||||
|
CPPFLAGS_SAVED="$CPPFLAGS"
|
||||||
|
CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
|
||||||
|
export CPPFLAGS
|
||||||
|
|
||||||
|
LDFLAGS_SAVED="$LDFLAGS"
|
||||||
|
LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
|
||||||
|
export LDFLAGS
|
||||||
|
|
||||||
|
AC_LANG_PUSH(C++)
|
||||||
|
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||||
|
@%:@include <boost/version.hpp>
|
||||||
|
]], [[
|
||||||
|
#if BOOST_VERSION >= $WANT_BOOST_VERSION
|
||||||
|
// Everything is okay
|
||||||
|
#else
|
||||||
|
# error Boost version is too old
|
||||||
|
#endif
|
||||||
|
]])],[
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
succeeded=yes
|
||||||
|
found_system=yes
|
||||||
|
],[
|
||||||
|
])
|
||||||
|
AC_LANG_POP([C++])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
dnl if we found no boost with system layout we search for boost libraries
|
||||||
|
dnl built and installed without the --layout=system option or for a staged(not installed) version
|
||||||
|
if test "x$succeeded" != "xyes"; then
|
||||||
|
_version=0
|
||||||
|
if test "$ac_boost_path" != ""; then
|
||||||
|
if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then
|
||||||
|
for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do
|
||||||
|
_version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'`
|
||||||
|
V_CHECK=`expr $_version_tmp \> $_version`
|
||||||
|
if test "$V_CHECK" = "1" ; then
|
||||||
|
_version=$_version_tmp
|
||||||
|
fi
|
||||||
|
VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
|
||||||
|
BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
for ac_boost_path in /usr /usr/local /opt /opt/local ; do
|
||||||
|
if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then
|
||||||
|
for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do
|
||||||
|
_version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'`
|
||||||
|
V_CHECK=`expr $_version_tmp \> $_version`
|
||||||
|
if test "$V_CHECK" = "1" ; then
|
||||||
|
_version=$_version_tmp
|
||||||
|
best_path=$ac_boost_path
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
|
||||||
|
BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE"
|
||||||
|
if test "$ac_boost_lib_path" = ""
|
||||||
|
then
|
||||||
|
BOOST_LDFLAGS="-L$best_path/lib"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "x$BOOST_ROOT" != "x"; then
|
||||||
|
if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/lib" && test -r "$BOOST_ROOT/stage/lib"; then
|
||||||
|
version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'`
|
||||||
|
stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'`
|
||||||
|
stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'`
|
||||||
|
V_CHECK=`expr $stage_version_shorten \>\= $_version`
|
||||||
|
if test "$V_CHECK" = "1" -a "$ac_boost_lib_path" = "" ; then
|
||||||
|
AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT)
|
||||||
|
BOOST_CPPFLAGS="-I$BOOST_ROOT"
|
||||||
|
BOOST_LDFLAGS="-L$BOOST_ROOT/stage/lib"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
|
||||||
|
export CPPFLAGS
|
||||||
|
LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
|
||||||
|
export LDFLAGS
|
||||||
|
|
||||||
|
AC_LANG_PUSH(C++)
|
||||||
|
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
|
||||||
|
@%:@include <boost/version.hpp>
|
||||||
|
]], [[
|
||||||
|
#if BOOST_VERSION >= $WANT_BOOST_VERSION
|
||||||
|
// Everything is okay
|
||||||
|
#else
|
||||||
|
# error Boost version is too old
|
||||||
|
#endif
|
||||||
|
]])],[
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
succeeded=yes
|
||||||
|
found_system=yes
|
||||||
|
],[
|
||||||
|
])
|
||||||
|
AC_LANG_POP([C++])
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test "$succeeded" != "yes" ; then
|
||||||
|
if test "$_version" = "0" ; then
|
||||||
|
AC_MSG_ERROR([[We could not detect the boost libraries (version $boost_lib_version_req_shorten or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option. If you are sure you have boost installed, then check your version number looking in <boost/version.hpp>. See http://randspringer.de/boost for more documentation.]])
|
||||||
|
else
|
||||||
|
AC_MSG_NOTICE([Your boost libraries seems to old (version $_version).])
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
AC_SUBST(BOOST_CPPFLAGS)
|
||||||
|
AC_SUBST(BOOST_LDFLAGS)
|
||||||
|
AC_DEFINE(HAVE_BOOST,,[define if the Boost library is available])
|
||||||
|
fi
|
||||||
|
|
||||||
|
CPPFLAGS="$CPPFLAGS_SAVED"
|
||||||
|
LDFLAGS="$LDFLAGS_SAVED"
|
||||||
|
fi
|
||||||
|
|
||||||
|
])
|
143
m4/ax_boost_thread.m4
Normal file
143
m4/ax_boost_thread.m4
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
# ===========================================================================
|
||||||
|
# http://www.nongnu.org/autoconf-archive/ax_boost_thread.html
|
||||||
|
# ===========================================================================
|
||||||
|
#
|
||||||
|
# SYNOPSIS
|
||||||
|
#
|
||||||
|
# AX_BOOST_THREAD
|
||||||
|
#
|
||||||
|
# DESCRIPTION
|
||||||
|
#
|
||||||
|
# Test for Thread library from the Boost C++ libraries. The macro requires
|
||||||
|
# a preceding call to AX_BOOST_BASE. Further documentation is available at
|
||||||
|
# <http://randspringer.de/boost/index.html>.
|
||||||
|
#
|
||||||
|
# This macro calls:
|
||||||
|
#
|
||||||
|
# AC_SUBST(BOOST_THREAD_LIB)
|
||||||
|
#
|
||||||
|
# And sets:
|
||||||
|
#
|
||||||
|
# HAVE_BOOST_THREAD
|
||||||
|
#
|
||||||
|
# LICENSE
|
||||||
|
#
|
||||||
|
# Copyright (c) 2009 Thomas Porschberg <thomas@randspringer.de>
|
||||||
|
# Copyright (c) 2009 Michael Tindal
|
||||||
|
#
|
||||||
|
# Copying and distribution of this file, with or without modification, are
|
||||||
|
# permitted in any medium without royalty provided the copyright notice
|
||||||
|
# and this notice are preserved.
|
||||||
|
|
||||||
|
AC_DEFUN([AX_BOOST_THREAD],
|
||||||
|
[
|
||||||
|
AC_ARG_WITH([boost-thread],
|
||||||
|
AS_HELP_STRING([--with-boost-thread@<:@=special-lib@:>@],
|
||||||
|
[use the Thread library from boost - it is possible to specify a certain library for the linker
|
||||||
|
e.g. --with-boost-thread=boost_thread-gcc-mt ]),
|
||||||
|
[
|
||||||
|
if test "$withval" = "no"; then
|
||||||
|
want_boost="no"
|
||||||
|
elif test "$withval" = "yes"; then
|
||||||
|
want_boost="yes"
|
||||||
|
ax_boost_user_thread_lib=""
|
||||||
|
else
|
||||||
|
want_boost="yes"
|
||||||
|
ax_boost_user_thread_lib="$withval"
|
||||||
|
fi
|
||||||
|
],
|
||||||
|
[want_boost="yes"]
|
||||||
|
)
|
||||||
|
|
||||||
|
if test "x$want_boost" = "xyes"; then
|
||||||
|
AC_REQUIRE([AC_PROG_CC])
|
||||||
|
AC_REQUIRE([AC_CANONICAL_BUILD])
|
||||||
|
CPPFLAGS_SAVED="$CPPFLAGS"
|
||||||
|
CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
|
||||||
|
export CPPFLAGS
|
||||||
|
|
||||||
|
LDFLAGS_SAVED="$LDFLAGS"
|
||||||
|
LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
|
||||||
|
export LDFLAGS
|
||||||
|
|
||||||
|
AC_CACHE_CHECK(whether the Boost::Thread library is available,
|
||||||
|
ax_cv_boost_thread,
|
||||||
|
[AC_LANG_PUSH([C++])
|
||||||
|
CXXFLAGS_SAVE=$CXXFLAGS
|
||||||
|
|
||||||
|
if test "x$build_os" = "xsolaris" ; then
|
||||||
|
CXXFLAGS="-pthreads $CXXFLAGS"
|
||||||
|
elif test "x$build_os" = "xming32" ; then
|
||||||
|
CXXFLAGS="-mthreads $CXXFLAGS"
|
||||||
|
else
|
||||||
|
CXXFLAGS="-pthread $CXXFLAGS"
|
||||||
|
fi
|
||||||
|
AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include <boost/thread/thread.hpp>]],
|
||||||
|
[[boost::thread_group thrds;
|
||||||
|
return 0;]]),
|
||||||
|
ax_cv_boost_thread=yes, ax_cv_boost_thread=no)
|
||||||
|
CXXFLAGS=$CXXFLAGS_SAVE
|
||||||
|
AC_LANG_POP([C++])
|
||||||
|
])
|
||||||
|
if test "x$ax_cv_boost_thread" = "xyes"; then
|
||||||
|
if test "x$build_os" = "xsolaris" ; then
|
||||||
|
BOOST_CPPFLAGS="-pthreads $BOOST_CPPFLAGS"
|
||||||
|
elif test "x$build_os" = "xming32" ; then
|
||||||
|
BOOST_CPPFLAGS="-mthreads $BOOST_CPPFLAGS"
|
||||||
|
else
|
||||||
|
BOOST_CPPFLAGS="-pthread $BOOST_CPPFLAGS"
|
||||||
|
fi
|
||||||
|
|
||||||
|
AC_SUBST(BOOST_CPPFLAGS)
|
||||||
|
|
||||||
|
AC_DEFINE(HAVE_BOOST_THREAD,,[define if the Boost::Thread library is available])
|
||||||
|
BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'`
|
||||||
|
|
||||||
|
LDFLAGS_SAVE=$LDFLAGS
|
||||||
|
case "x$build_os" in
|
||||||
|
*bsd* )
|
||||||
|
LDFLAGS="-pthread $LDFLAGS"
|
||||||
|
break;
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
if test "x$ax_boost_user_thread_lib" = "x"; then
|
||||||
|
for libextension in `ls $BOOSTLIBDIR/libboost_thread*.so* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_thread.*\)\.so.*$;\1;'` `ls $BOOSTLIBDIR/libboost_thread*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_thread.*\)\.a*$;\1;'`; do
|
||||||
|
ax_lib=${libextension}
|
||||||
|
AC_CHECK_LIB($ax_lib, exit,
|
||||||
|
[BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break],
|
||||||
|
[link_thread="no"])
|
||||||
|
done
|
||||||
|
if test "x$link_thread" != "xyes"; then
|
||||||
|
for libextension in `ls $BOOSTLIBDIR/boost_thread*.dll* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_thread.*\)\.dll.*$;\1;'` `ls $BOOSTLIBDIR/boost_thread*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_thread.*\)\.a*$;\1;'` ; do
|
||||||
|
ax_lib=${libextension}
|
||||||
|
AC_CHECK_LIB($ax_lib, exit,
|
||||||
|
[BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break],
|
||||||
|
[link_thread="no"])
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
else
|
||||||
|
for ax_lib in $ax_boost_user_thread_lib boost_thread-$ax_boost_user_thread_lib; do
|
||||||
|
AC_CHECK_LIB($ax_lib, exit,
|
||||||
|
[BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break],
|
||||||
|
[link_thread="no"])
|
||||||
|
done
|
||||||
|
|
||||||
|
fi
|
||||||
|
if test "x$link_thread" = "xno"; then
|
||||||
|
AC_MSG_ERROR(Could not link against $ax_lib !)
|
||||||
|
else
|
||||||
|
case "x$build_os" in
|
||||||
|
*bsd* )
|
||||||
|
BOOST_LDFLAGS="-pthread $BOOST_LDFLAGS"
|
||||||
|
break;
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
CPPFLAGS="$CPPFLAGS_SAVED"
|
||||||
|
LDFLAGS="$LDFLAGS_SAVED"
|
||||||
|
fi
|
||||||
|
])
|
52
m4/ax_xmlrpc_c.m4
Normal file
52
m4/ax_xmlrpc_c.m4
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
AC_DEFUN([AX_XMLRPC_C], [
|
||||||
|
AC_MSG_CHECKING(for XMLRPC-C)
|
||||||
|
|
||||||
|
AC_ARG_WITH(xmlrpc-c,
|
||||||
|
[ --with-xmlrpc-c=PATH Enable XMLRPC-C support.],
|
||||||
|
[
|
||||||
|
if test "$withval" = "no"; then
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
|
||||||
|
else
|
||||||
|
if test "$withval" = "yes"; then
|
||||||
|
xmlrpc_cc_prg="xmlrpc-c-config"
|
||||||
|
else
|
||||||
|
xmlrpc_cc_prg="$withval"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if eval $xmlrpc_cc_prg --version 2>/dev/null >/dev/null; then
|
||||||
|
XMLRPC_C_CPPFLAGS=`$xmlrpc_cc_prg --cflags c++2 abyss-server`
|
||||||
|
XMLRPC_C_LIBS=`$xmlrpc_cc_prg c++2 abyss-server --libs`
|
||||||
|
CXXFLAGS_SAVED=$CXXFLAGS
|
||||||
|
CXXFLAGS="$CXXFLAGS $XMLRPC_C_CPPFLAGS"
|
||||||
|
LIBS_SAVED=$LIBS
|
||||||
|
LIBS="$LIBS $XMLRPC_C_LIBS"
|
||||||
|
|
||||||
|
AC_TRY_LINK(
|
||||||
|
[ #include <xmlrpc-c/server.h>
|
||||||
|
],[ xmlrpc_registry_new(NULL); ],
|
||||||
|
[
|
||||||
|
AC_MSG_RESULT(ok)
|
||||||
|
], [
|
||||||
|
AC_MSG_RESULT(failed)
|
||||||
|
AC_MSG_ERROR(Could not compile XMLRPC-C test.)
|
||||||
|
])
|
||||||
|
|
||||||
|
dnl AC_DEFINE(HAVE_XMLRPC_C, 1, Support for XMLRPC-C.)
|
||||||
|
have_xmlrpc_c=yes
|
||||||
|
AC_SUBST(XMLRPC_C_LIBS)
|
||||||
|
AC_SUBST(XMLRPC_C_CPPFLAGS)
|
||||||
|
|
||||||
|
LIBS=$LIBS_SAVED
|
||||||
|
CXXFLAGS=$CXXFLAGS_SAVED
|
||||||
|
|
||||||
|
else
|
||||||
|
AC_MSG_RESULT(failed)
|
||||||
|
AC_MSG_ERROR(Could not compile XMLRPC-C test.)
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT(ignored)
|
||||||
|
])
|
||||||
|
])
|
@ -4,13 +4,13 @@ processPhraseTable_SOURCES = GenerateTuples.cpp processPhraseTable.cpp
|
|||||||
processLexicalTable_SOURCES = processLexicalTable.cpp
|
processLexicalTable_SOURCES = processLexicalTable.cpp
|
||||||
queryLexicalTable_SOURCES = queryLexicalTable.cpp
|
queryLexicalTable_SOURCES = queryLexicalTable.cpp
|
||||||
|
|
||||||
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -I$(top_srcdir)/moses/src
|
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS)
|
||||||
|
|
||||||
processPhraseTable_LDADD = -L$(top_srcdir)/moses/src -lmoses
|
processPhraseTable_LDADD = -L$(top_srcdir)/moses/src -lmoses $(BOOST_LDFLAGS) $(BOOST_THREAD_LIB)
|
||||||
processPhraseTable_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
|
processPhraseTable_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
|
||||||
|
|
||||||
processLexicalTable_LDADD = -L$(top_srcdir)/moses/src -lmoses
|
processLexicalTable_LDADD = -L$(top_srcdir)/moses/src -lmoses $(BOOST_LDFLAGS) $(BOOST_THREAD_LIB)
|
||||||
processLexicalTable_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
|
processLexicalTable_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
|
||||||
|
|
||||||
queryLexicalTable_LDADD = -L$(top_srcdir)/moses/src -lmoses
|
queryLexicalTable_LDADD = -L$(top_srcdir)/moses/src -lmoses $(BOOST_LDFLAGS) $(BOOST_THREAD_LIB)
|
||||||
queryLexicalTable_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
|
queryLexicalTable_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
|
||||||
|
@ -210,47 +210,9 @@ void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void OutputWordAlignment(std::ostream &out, const TargetPhrase &phrase, size_t srcoffset, size_t trgoffset, FactorDirection direction)
|
|
||||||
{
|
|
||||||
size_t size = phrase.GetSize();
|
|
||||||
if (size){
|
|
||||||
out << " ";
|
|
||||||
/* out << phrase;
|
|
||||||
out << " ===> offset: (" << srcoffset << "," << trgoffset << ")";
|
|
||||||
out << " ===> size: (" << phrase.GetAlignmentPair().GetAlignmentPhrase(Input).GetSize() << ","
|
|
||||||
<< phrase.GetAlignmentPair().GetAlignmentPhrase(Output).GetSize() << ") ===> ";
|
|
||||||
*/
|
|
||||||
AlignmentPhrase alignphrase=phrase.GetAlignmentPair().GetAlignmentPhrase(direction);
|
|
||||||
/* alignphrase.print(out,0);
|
|
||||||
out << " ===> ";
|
|
||||||
// out << alignphrase << " ===> ";
|
|
||||||
*/
|
|
||||||
if (direction == Input){
|
|
||||||
alignphrase.Shift(trgoffset);
|
|
||||||
alignphrase.print(out,srcoffset);
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
alignphrase.Shift(srcoffset);
|
|
||||||
alignphrase.print(out,trgoffset);
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
// out << alignphrase << " ===> ";
|
|
||||||
out << "\n";
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void OutputWordAlignment(std::ostream &out, const Hypothesis *hypo, FactorDirection direction)
|
|
||||||
{
|
|
||||||
size_t srcoffset, trgoffset;
|
|
||||||
if ( hypo != NULL)
|
|
||||||
{
|
|
||||||
srcoffset=hypo->GetCurrSourceWordsRange().GetStartPos();
|
|
||||||
trgoffset=hypo->GetCurrTargetWordsRange().GetStartPos();
|
|
||||||
OutputWordAlignment(out, hypo->GetPrevHypo(),direction);
|
|
||||||
OutputWordAlignment(out, hypo->GetCurrTargetPhrase(), srcoffset, trgoffset, direction);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void IOWrapper::Backtrack(const Hypothesis *hypo){
|
void IOWrapper::Backtrack(const Hypothesis *hypo){
|
||||||
|
|
||||||
@ -282,7 +244,7 @@ void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
|
|||||||
|
|
||||||
void OutputInput(std::ostream& os, const Hypothesis* hypo)
|
void OutputInput(std::ostream& os, const Hypothesis* hypo)
|
||||||
{
|
{
|
||||||
size_t len = StaticData::Instance().GetInput()->GetSize();
|
size_t len = hypo->GetInput().GetSize();
|
||||||
std::vector<const Phrase*> inp_phrases(len, 0);
|
std::vector<const Phrase*> inp_phrases(len, 0);
|
||||||
OutputInput(inp_phrases, hypo);
|
OutputInput(inp_phrases, hypo);
|
||||||
for (size_t i=0; i<len; ++i)
|
for (size_t i=0; i<len; ++i)
|
||||||
@ -411,11 +373,11 @@ void IOWrapper::OutputNBestList(const TrellisPathList &nBestList, long translati
|
|||||||
// translation components
|
// translation components
|
||||||
if (StaticData::Instance().GetInputType()==SentenceInput){
|
if (StaticData::Instance().GetInputType()==SentenceInput){
|
||||||
// translation components for text input
|
// translation components for text input
|
||||||
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
|
vector<PhraseDictionaryFeature*> pds = StaticData::Instance().GetPhraseDictionaries();
|
||||||
if (pds.size() > 0) {
|
if (pds.size() > 0) {
|
||||||
if (labeledOutput)
|
if (labeledOutput)
|
||||||
*m_nBestStream << " tm:";
|
*m_nBestStream << " tm:";
|
||||||
vector<PhraseDictionary*>::iterator iter;
|
vector<PhraseDictionaryFeature*>::iterator iter;
|
||||||
for (iter = pds.begin(); iter != pds.end(); ++iter) {
|
for (iter = pds.begin(); iter != pds.end(); ++iter) {
|
||||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
||||||
for (size_t j = 0; j<scores.size(); ++j)
|
for (size_t j = 0; j<scores.size(); ++j)
|
||||||
@ -427,9 +389,9 @@ void IOWrapper::OutputNBestList(const TrellisPathList &nBestList, long translati
|
|||||||
// translation components for Confusion Network input
|
// translation components for Confusion Network input
|
||||||
// first translation component has GetNumInputScores() scores from the input Confusion Network
|
// first translation component has GetNumInputScores() scores from the input Confusion Network
|
||||||
// at the beginning of the vector
|
// at the beginning of the vector
|
||||||
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
|
vector<PhraseDictionaryFeature*> pds = StaticData::Instance().GetPhraseDictionaries();
|
||||||
if (pds.size() > 0) {
|
if (pds.size() > 0) {
|
||||||
vector<PhraseDictionary*>::iterator iter;
|
vector<PhraseDictionaryFeature*>::iterator iter;
|
||||||
|
|
||||||
iter = pds.begin();
|
iter = pds.begin();
|
||||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
||||||
@ -496,25 +458,7 @@ void IOWrapper::OutputNBestList(const TrellisPathList &nBestList, long translati
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (includeWordAlignment){
|
|
||||||
//word-to-word alignment (source-to-target)
|
|
||||||
*m_nBestStream << " |||";
|
|
||||||
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
|
|
||||||
{
|
|
||||||
const Hypothesis &edge = *edges[currEdge];
|
|
||||||
WordsRange targetRange = path.GetTargetWordsRange(edge);
|
|
||||||
OutputWordAlignment(*m_nBestStream, edge.GetCurrTargetPhrase(),edge.GetCurrSourceWordsRange().GetStartPos(),targetRange.GetStartPos(), Input);
|
|
||||||
}
|
|
||||||
|
|
||||||
//word-to-word alignment (target-to-source)
|
|
||||||
*m_nBestStream << " |||";
|
|
||||||
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
|
|
||||||
{
|
|
||||||
const Hypothesis &edge = *edges[currEdge];
|
|
||||||
WordsRange targetRange = path.GetTargetWordsRange(edge);
|
|
||||||
OutputWordAlignment(*m_nBestStream, edge.GetCurrTargetPhrase(),edge.GetCurrSourceWordsRange().GetStartPos(),targetRange.GetStartPos(), Output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
*m_nBestStream << endl;
|
*m_nBestStream << endl;
|
||||||
}
|
}
|
||||||
@ -522,3 +466,51 @@ void IOWrapper::OutputNBestList(const TrellisPathList &nBestList, long translati
|
|||||||
|
|
||||||
*m_nBestStream<<std::flush;
|
*m_nBestStream<<std::flush;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source)
|
||||||
|
{
|
||||||
|
delete source;
|
||||||
|
switch(inputType)
|
||||||
|
{
|
||||||
|
case SentenceInput: source = ioWrapper.GetInput(new Sentence(Input)); break;
|
||||||
|
case ConfusionNetworkInput: source = ioWrapper.GetInput(new ConfusionNet); break;
|
||||||
|
case WordLatticeInput: source = ioWrapper.GetInput(new WordLattice); break;
|
||||||
|
default: TRACE_ERR("Unknown input type: " << inputType << "\n");
|
||||||
|
}
|
||||||
|
return (source ? true : false);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
IOWrapper *GetIODevice(const StaticData &staticData)
|
||||||
|
{
|
||||||
|
IOWrapper *ioWrapper;
|
||||||
|
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
|
||||||
|
,&outputFactorOrder = staticData.GetOutputFactorOrder();
|
||||||
|
FactorMask inputFactorUsed(inputFactorOrder);
|
||||||
|
|
||||||
|
// io
|
||||||
|
if (staticData.GetParam("input-file").size() == 1)
|
||||||
|
{
|
||||||
|
VERBOSE(2,"IO from File" << endl);
|
||||||
|
string filePath = staticData.GetParam("input-file")[0];
|
||||||
|
|
||||||
|
ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
|
||||||
|
, staticData.GetNBestSize()
|
||||||
|
, staticData.GetNBestFilePath()
|
||||||
|
, filePath);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
VERBOSE(1,"IO from STDOUT/STDIN" << endl);
|
||||||
|
ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
|
||||||
|
, staticData.GetNBestSize()
|
||||||
|
, staticData.GetNBestFilePath());
|
||||||
|
}
|
||||||
|
ioWrapper->ResetTranslationId();
|
||||||
|
|
||||||
|
IFVERBOSE(1)
|
||||||
|
PrintUserTime("Created input-output object");
|
||||||
|
|
||||||
|
return ioWrapper;
|
||||||
|
}
|
||||||
|
@ -44,6 +44,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||||||
#include "TrellisPathList.h"
|
#include "TrellisPathList.h"
|
||||||
#include "InputFileStream.h"
|
#include "InputFileStream.h"
|
||||||
#include "InputType.h"
|
#include "InputType.h"
|
||||||
|
#include "WordLattice.h"
|
||||||
|
|
||||||
class IOWrapper
|
class IOWrapper
|
||||||
{
|
{
|
||||||
@ -98,3 +99,7 @@ public:
|
|||||||
return *m_outputSearchGraphStream;
|
return *m_outputSearchGraphStream;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
IOWrapper *GetIODevice(const Moses::StaticData &staticData);
|
||||||
|
bool ReadInput(IOWrapper &ioWrapper, Moses::InputTypeEnum inputType, Moses::InputType*& source);
|
||||||
|
void OutputSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder ,bool reportSegmentation, bool reportAllFactors);
|
||||||
|
@ -61,25 +61,14 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||||||
#include "hypergraph.pb.h"
|
#include "hypergraph.pb.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace Moses;
|
using namespace Moses;
|
||||||
|
|
||||||
bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source)
|
|
||||||
{
|
|
||||||
delete source;
|
|
||||||
switch(inputType)
|
|
||||||
{
|
|
||||||
case SentenceInput: source = ioWrapper.GetInput(new Sentence(Input)); break;
|
|
||||||
case ConfusionNetworkInput: source = ioWrapper.GetInput(new ConfusionNet); break;
|
|
||||||
case WordLatticeInput: source = ioWrapper.GetInput(new WordLattice); break;
|
|
||||||
default: TRACE_ERR("Unknown input type: " << inputType << "\n");
|
|
||||||
}
|
|
||||||
return (source ? true : false);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char* argv[])
|
int main(int argc, char* argv[])
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
#ifdef HAVE_PROTOBUF
|
#ifdef HAVE_PROTOBUF
|
||||||
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
GOOGLE_PROTOBUF_VERIFY_VERSION;
|
||||||
#endif
|
#endif
|
||||||
@ -223,35 +212,4 @@ int main(int argc, char* argv[])
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
IOWrapper *GetIODevice(const StaticData &staticData)
|
|
||||||
{
|
|
||||||
IOWrapper *ioWrapper;
|
|
||||||
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
|
|
||||||
,&outputFactorOrder = staticData.GetOutputFactorOrder();
|
|
||||||
FactorMask inputFactorUsed(inputFactorOrder);
|
|
||||||
|
|
||||||
// io
|
|
||||||
if (staticData.GetParam("input-file").size() == 1)
|
|
||||||
{
|
|
||||||
VERBOSE(2,"IO from File" << endl);
|
|
||||||
string filePath = staticData.GetParam("input-file")[0];
|
|
||||||
|
|
||||||
ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
|
|
||||||
, staticData.GetNBestSize()
|
|
||||||
, staticData.GetNBestFilePath()
|
|
||||||
, filePath);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
VERBOSE(1,"IO from STDOUT/STDIN" << endl);
|
|
||||||
ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
|
|
||||||
, staticData.GetNBestSize()
|
|
||||||
, staticData.GetNBestFilePath());
|
|
||||||
}
|
|
||||||
ioWrapper->ResetTranslationId();
|
|
||||||
|
|
||||||
IFVERBOSE(1)
|
|
||||||
PrintUserTime("Created input-output object");
|
|
||||||
|
|
||||||
return ioWrapper;
|
|
||||||
}
|
|
||||||
|
@ -39,4 +39,3 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||||||
class IOWrapper;
|
class IOWrapper;
|
||||||
|
|
||||||
int main(int argc, char* argv[]);
|
int main(int argc, char* argv[]);
|
||||||
IOWrapper *GetIODevice(const Moses::StaticData &staticData);
|
|
||||||
|
216
moses-cmd/src/MainMT.cpp
Normal file
216
moses-cmd/src/MainMT.cpp
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
// $Id: $
|
||||||
|
|
||||||
|
/***********************************************************************
|
||||||
|
Moses - factored phrase-based language decoder
|
||||||
|
Copyright (C) 2009 University of Edinburgh
|
||||||
|
|
||||||
|
This library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with this library; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
***********************************************************************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main for multithreaded moses.
|
||||||
|
**/
|
||||||
|
|
||||||
|
#include <sstream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <boost/thread/mutex.hpp>
|
||||||
|
|
||||||
|
#if defined(BOOST_HAS_PTHREADS)
|
||||||
|
#include <pthread.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#include "Hypothesis.h"
|
||||||
|
#include "IOWrapper.h"
|
||||||
|
#include "Manager.h"
|
||||||
|
#include "StaticData.h"
|
||||||
|
#include "ThreadPool.h"
|
||||||
|
#include "Util.h"
|
||||||
|
#include "mbr.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Moses;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Makes sure output goes in the correct order.
|
||||||
|
**/
|
||||||
|
class OutputCollector {
|
||||||
|
public:
|
||||||
|
OutputCollector() :
|
||||||
|
m_nextOutput(0) {}
|
||||||
|
|
||||||
|
void Write(int sourceId, Manager& manager) {
|
||||||
|
//create the output string
|
||||||
|
//Note that this is copied from Main.cpp. Some refactoring
|
||||||
|
//could remove the duplicate code.
|
||||||
|
const StaticData& staticData = StaticData::Instance();
|
||||||
|
ostringstream out;
|
||||||
|
if (!staticData.UseMBR()) {
|
||||||
|
const Hypothesis* hypo = manager.GetBestHypothesis();
|
||||||
|
if (hypo) {
|
||||||
|
OutputSurface(
|
||||||
|
out,
|
||||||
|
hypo,
|
||||||
|
staticData.GetOutputFactorOrder(),
|
||||||
|
staticData.GetReportSegmentation(),
|
||||||
|
staticData.GetReportAllFactors());
|
||||||
|
}
|
||||||
|
out << endl;
|
||||||
|
} else {
|
||||||
|
//MBR decoding
|
||||||
|
size_t nBestSize = staticData.GetMBRSize();
|
||||||
|
if (nBestSize <= 0) {
|
||||||
|
cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
|
||||||
|
exit(1);
|
||||||
|
} else {
|
||||||
|
TrellisPathList nBestList;
|
||||||
|
manager.CalcNBest(nBestSize, nBestList,true);
|
||||||
|
VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl);
|
||||||
|
IFVERBOSE(2) { PrintUserTime("calculated n-best list for MBR decoding"); }
|
||||||
|
vector<const Factor*> mbrBestHypo = doMBR(nBestList);
|
||||||
|
for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
|
||||||
|
const Factor *factor = mbrBestHypo[i];
|
||||||
|
if (i>0) out << " ";
|
||||||
|
out << factor->GetString();
|
||||||
|
}
|
||||||
|
out << endl;
|
||||||
|
IFVERBOSE(2) { PrintUserTime("finished MBR decoding"); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Write(sourceId,out.str());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
/**
|
||||||
|
* Write or cache the output, as appropriate.
|
||||||
|
**/
|
||||||
|
void Write(int sourceId,const string& output) {
|
||||||
|
boost::mutex::scoped_lock lock(m_mutex);
|
||||||
|
if (sourceId == m_nextOutput) {
|
||||||
|
//This is the one we were expecting
|
||||||
|
cout << output;
|
||||||
|
++m_nextOutput;
|
||||||
|
//see if there's any more
|
||||||
|
map<int,string>::iterator iter;
|
||||||
|
while ((iter = m_outputs.find(m_nextOutput)) != m_outputs.end()) {
|
||||||
|
cout << iter->second;
|
||||||
|
m_outputs.erase(iter);
|
||||||
|
++m_nextOutput;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
//save for later
|
||||||
|
m_outputs[sourceId] = output;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
map<int,string> m_outputs;
|
||||||
|
int m_nextOutput;
|
||||||
|
boost::mutex m_mutex;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Translates a sentence.
|
||||||
|
**/
|
||||||
|
class TranslationTask : public Task {
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
TranslationTask(size_t lineNumber,
|
||||||
|
InputType* source, OutputCollector& outputCollector) :
|
||||||
|
m_source(source), m_lineNumber(lineNumber),
|
||||||
|
m_outputCollector(outputCollector) {}
|
||||||
|
|
||||||
|
void Run() {
|
||||||
|
#if defined(BOOST_HAS_PTHREADS)
|
||||||
|
TRACE_ERR("Translating line " << m_lineNumber << " in thread id " << (int)pthread_self() << std::endl);
|
||||||
|
#endif
|
||||||
|
const StaticData &staticData = StaticData::Instance();
|
||||||
|
Sentence sentence(Input);
|
||||||
|
const vector<FactorType> &inputFactorOrder =
|
||||||
|
staticData.GetInputFactorOrder();
|
||||||
|
Manager manager(*m_source,staticData.GetSearchAlgorithm());
|
||||||
|
manager.ProcessSentence();
|
||||||
|
m_outputCollector.Write(m_lineNumber,manager);
|
||||||
|
}
|
||||||
|
|
||||||
|
~TranslationTask() {delete m_source;}
|
||||||
|
|
||||||
|
private:
|
||||||
|
InputType* m_source;
|
||||||
|
size_t m_lineNumber;
|
||||||
|
OutputCollector& m_outputCollector;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
//extract pool-size args, send others to moses
|
||||||
|
char** mosesargv = new char*[argc+2];
|
||||||
|
int mosesargc = 0;
|
||||||
|
int threadcount = 10;
|
||||||
|
for (int i = 0; i < argc; ++i) {
|
||||||
|
if (!strcmp(argv[i], "-threads")) {
|
||||||
|
++i;
|
||||||
|
if (i >= argc) {
|
||||||
|
cerr << "Error: Missing argument to -threads" << endl;
|
||||||
|
exit(1);
|
||||||
|
} else {
|
||||||
|
threadcount = atoi(argv[i]);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
mosesargv[mosesargc] = new char[strlen(argv[i])+1];
|
||||||
|
strcpy(mosesargv[mosesargc],argv[i]);
|
||||||
|
++mosesargc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (threadcount <= 0) {
|
||||||
|
cerr << "Error: Must specify a positive number of threads" << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
Parameter* params = new Parameter();
|
||||||
|
if (!params->LoadParam(mosesargc,mosesargv)) {
|
||||||
|
params->Explain();
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (!StaticData::LoadDataStatic(params)) {
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const StaticData& staticData = StaticData::Instance();
|
||||||
|
IOWrapper* ioWrapper = GetIODevice(staticData);
|
||||||
|
|
||||||
|
if (!ioWrapper) {
|
||||||
|
cerr << "Error; Failed to create IO object" << endl;
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
ThreadPool pool(threadcount);
|
||||||
|
InputType* source = NULL;
|
||||||
|
size_t lineCount = 0;
|
||||||
|
OutputCollector outputCollector;
|
||||||
|
while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) {
|
||||||
|
TranslationTask* task =
|
||||||
|
new TranslationTask(lineCount,source, outputCollector);
|
||||||
|
pool.Submit(task);
|
||||||
|
source = NULL; //make sure it doesn't get deleted
|
||||||
|
++lineCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
pool.Stop(true); //flush remaining jobs
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,7 +1,16 @@
|
|||||||
bin_PROGRAMS = moses
|
if WITH_THREADS
|
||||||
|
bin_PROGRAMS = moses mosesmt
|
||||||
|
else
|
||||||
|
bin_PROGRAMS = moses
|
||||||
|
endif
|
||||||
|
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS)
|
||||||
moses_SOURCES = Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp
|
moses_SOURCES = Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp
|
||||||
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src
|
moses_LDADD = -L$(top_srcdir)/moses/src -lmoses $(BOOST_LDFLAGS) $(BOOST_THREAD_LIB)
|
||||||
|
moses_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
|
||||||
|
|
||||||
|
mosesmt_SOURCES = MainMT.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp ThreadPool.cpp
|
||||||
|
mosesmt_LDADD = -L$(top_srcdir)/moses/src $(BOOST_LDFLAGS) -lmoses $(BOOST_THREAD_LIB)
|
||||||
|
mosesmt_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
|
||||||
|
|
||||||
|
|
||||||
moses_LDADD = -L$(top_srcdir)/moses/src -lmoses
|
|
||||||
moses_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
|
|
||||||
|
|
||||||
|
95
moses-cmd/src/ThreadPool.cpp
Normal file
95
moses-cmd/src/ThreadPool.cpp
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
// $Id: $
|
||||||
|
|
||||||
|
/***********************************************************************
|
||||||
|
Moses - factored phrase-based language decoder
|
||||||
|
Copyright (C) 2009 University of Edinburgh
|
||||||
|
|
||||||
|
This library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with this library; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
***********************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
#include "ThreadPool.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Moses;
|
||||||
|
|
||||||
|
Moses::ThreadPool::ThreadPool( size_t numThreads )
|
||||||
|
: m_stopped(false), m_stopping(false)
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < numThreads; ++i) {
|
||||||
|
m_threads.create_thread(boost::bind(&ThreadPool::Execute,this));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Moses::ThreadPool::Execute()
|
||||||
|
{
|
||||||
|
do {
|
||||||
|
Task* task = NULL;
|
||||||
|
{ // Find a job to perform
|
||||||
|
boost::mutex::scoped_lock lock(m_mutex);
|
||||||
|
if (m_tasks.empty() && !m_stopped) {
|
||||||
|
m_threadNeeded.wait(lock);
|
||||||
|
}
|
||||||
|
if (!m_stopped && !m_tasks.empty()) {
|
||||||
|
task = m_tasks.front();
|
||||||
|
m_tasks.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//Execute job
|
||||||
|
if (task) {
|
||||||
|
task->Run();
|
||||||
|
delete task;
|
||||||
|
}
|
||||||
|
m_threadAvailable.notify_all();
|
||||||
|
} while (!m_stopped);
|
||||||
|
TRACE_ERR("Thread " << (int)pthread_self() << " exiting" << endl);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Moses::ThreadPool::Submit( Task* task )
|
||||||
|
{
|
||||||
|
boost::mutex::scoped_lock lock(m_mutex);
|
||||||
|
if (m_stopping) {
|
||||||
|
throw runtime_error("ThreadPool stopping - unable to accept new jobs");
|
||||||
|
}
|
||||||
|
m_tasks.push(task);
|
||||||
|
m_threadNeeded.notify_all();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void Moses::ThreadPool::Stop(bool processRemainingJobs)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
//prevent more jobs from being added to the queue
|
||||||
|
boost::mutex::scoped_lock lock(m_mutex);
|
||||||
|
if (m_stopped) return;
|
||||||
|
m_stopping = true;
|
||||||
|
}
|
||||||
|
if (processRemainingJobs) {
|
||||||
|
boost::mutex::scoped_lock lock(m_mutex);
|
||||||
|
//wait for queue to drain.
|
||||||
|
while (!m_tasks.empty() && !m_stopped) {
|
||||||
|
m_threadAvailable.wait(lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//tell all threads to stop
|
||||||
|
{
|
||||||
|
boost::mutex::scoped_lock lock(m_mutex);
|
||||||
|
m_stopped = true;
|
||||||
|
}
|
||||||
|
m_threadNeeded.notify_all();
|
||||||
|
|
||||||
|
cerr << m_threads.size() << endl;
|
||||||
|
m_threads.join_all();
|
||||||
|
}
|
107
moses-cmd/src/ThreadPool.h
Normal file
107
moses-cmd/src/ThreadPool.h
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
// $Id: $
|
||||||
|
|
||||||
|
/***********************************************************************
|
||||||
|
Moses - factored phrase-based language decoder
|
||||||
|
Copyright (C) 2009 University of Edinburgh
|
||||||
|
|
||||||
|
This library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with this library; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
***********************************************************************/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <queue>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <boost/bind.hpp>
|
||||||
|
#include <boost/thread.hpp>
|
||||||
|
|
||||||
|
#include "Util.h"
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Classes to implement a ThreadPool.
|
||||||
|
**/
|
||||||
|
|
||||||
|
namespace Moses {
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A task to be executed by the ThreadPool
|
||||||
|
**/
|
||||||
|
class Task {
|
||||||
|
public:
|
||||||
|
virtual void Run() = 0;
|
||||||
|
virtual ~Task() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
class ThreadPool {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Construct a thread pool of a fixed size.
|
||||||
|
**/
|
||||||
|
ThreadPool(size_t numThreads);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a job to the threadpool.
|
||||||
|
**/
|
||||||
|
void Submit(Task* task);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait until all queued jobs have completed, and shut down
|
||||||
|
* the ThreadPool.
|
||||||
|
**/
|
||||||
|
void Stop(bool processRemainingJobs = false);
|
||||||
|
|
||||||
|
~ThreadPool() { Stop(); }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
/**
|
||||||
|
* The main loop executed by each thread.
|
||||||
|
**/
|
||||||
|
void Execute();
|
||||||
|
|
||||||
|
std::queue<Task*> m_tasks;
|
||||||
|
boost::thread_group m_threads;
|
||||||
|
boost::mutex m_mutex;
|
||||||
|
boost::condition_variable m_threadNeeded;
|
||||||
|
boost::condition_variable m_threadAvailable;
|
||||||
|
bool m_stopped;
|
||||||
|
bool m_stopping;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
|
class TestTask : public Task {
|
||||||
|
public:
|
||||||
|
TestTask(int id) : m_id(id) {}
|
||||||
|
virtual void Run() {
|
||||||
|
int tid = (int)pthread_self();
|
||||||
|
std::cerr << "Executing " << m_id << " in thread id " << tid << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~TestTask() {}
|
||||||
|
|
||||||
|
private:
|
||||||
|
int m_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@ -1,102 +0,0 @@
|
|||||||
// $Id$
|
|
||||||
/***********************************************************************
|
|
||||||
Moses - factored phrase-based language decoder
|
|
||||||
Copyright (C) 2006 University of Edinburgh
|
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
|
||||||
modify it under the terms of the GNU Lesser General Public
|
|
||||||
License as published by the Free Software Foundation; either
|
|
||||||
version 2.1 of the License, or (at your option) any later version.
|
|
||||||
|
|
||||||
This library is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
Lesser General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Lesser General Public
|
|
||||||
License along with this library; if not, write to the Free Software
|
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
***********************************************************************/
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include "AlignmentElement.h"
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
namespace Moses
|
|
||||||
{
|
|
||||||
AlignmentElement::AlignmentElement(const ContainerType &alignInfo)
|
|
||||||
{
|
|
||||||
insert_iterator<ContainerType> insertIter( m_collection, m_collection.end() );
|
|
||||||
copy(alignInfo.begin(), alignInfo.end(), insertIter);
|
|
||||||
};
|
|
||||||
|
|
||||||
AlignmentElement::AlignmentElement(const vector<AlignmentElementType> &alignInfo)
|
|
||||||
{
|
|
||||||
insert_iterator<ContainerType> insertIter( m_collection, m_collection.end() );
|
|
||||||
copy(alignInfo.begin(), alignInfo.end(), insertIter);
|
|
||||||
};
|
|
||||||
|
|
||||||
AlignmentElement::AlignmentElement(const AlignmentElement &alignInfo)
|
|
||||||
{
|
|
||||||
insert_iterator<ContainerType> insertIter( m_collection, m_collection.end() );
|
|
||||||
copy(alignInfo.begin(), alignInfo.end(), insertIter);
|
|
||||||
};
|
|
||||||
|
|
||||||
AlignmentElement& AlignmentElement::operator=(const AlignmentElement& alignInfo)
|
|
||||||
{
|
|
||||||
insert_iterator<ContainerType> insertIter( m_collection, m_collection.end() );
|
|
||||||
copy(alignInfo.begin(), alignInfo.end(), insertIter);
|
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AlignmentElement::Shift(int shift)
|
|
||||||
{
|
|
||||||
ContainerType newColl;
|
|
||||||
|
|
||||||
ContainerType::const_iterator iter;
|
|
||||||
for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter){
|
|
||||||
if (*iter!=-1) newColl.insert(*iter + shift);
|
|
||||||
else newColl.insert(*iter);
|
|
||||||
}
|
|
||||||
m_collection = newColl;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& out, const AlignmentElement &alignElement)
|
|
||||||
{
|
|
||||||
const AlignmentElement::ContainerType &elemSet = alignElement.GetCollection();
|
|
||||||
|
|
||||||
// out << "(";
|
|
||||||
if (elemSet.size() > 0)
|
|
||||||
{
|
|
||||||
AlignmentElement::ContainerType::const_iterator iter = elemSet.begin();
|
|
||||||
out << *iter;
|
|
||||||
for (++iter ; iter != elemSet.end() ; ++iter)
|
|
||||||
out << "," << *iter;
|
|
||||||
}
|
|
||||||
// out << ")";
|
|
||||||
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AlignmentElement::SetIntersect(const AlignmentElement &otherElement)
|
|
||||||
{
|
|
||||||
ContainerType newElement;
|
|
||||||
set_intersection(m_collection.begin() , m_collection.end()
|
|
||||||
,otherElement.begin() , otherElement.end()
|
|
||||||
,inserter(newElement , newElement.begin()) );
|
|
||||||
m_collection = newElement;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AlignmentElement::SetUniformAlignment(size_t otherPhraseSize)
|
|
||||||
{
|
|
||||||
for (size_t pos = 0 ; pos < otherPhraseSize ; ++pos)
|
|
||||||
m_collection.insert(pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
TO_STRING_BODY(AlignmentElement);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
@ -1,110 +0,0 @@
|
|||||||
// $Id$
|
|
||||||
/***********************************************************************
|
|
||||||
Moses - factored phrase-based language decoder
|
|
||||||
Copyright (C) 2006 University of Edinburgh
|
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
|
||||||
modify it under the terms of the GNU Lesser General Public
|
|
||||||
License as published by th e Free Software Foundation; either
|
|
||||||
version 2.1 of the License, or (at your option) any later version.
|
|
||||||
|
|
||||||
This library is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
Lesser General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Lesser General Public
|
|
||||||
License along with this library; if not, write to the Free Software
|
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
***********************************************************************/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <set>
|
|
||||||
#include <vector>
|
|
||||||
#include "Util.h"
|
|
||||||
|
|
||||||
namespace Moses
|
|
||||||
{
|
|
||||||
|
|
||||||
typedef short int AlignmentElementType;
|
|
||||||
|
|
||||||
//! set of alignments of 1 word
|
|
||||||
class AlignmentElement
|
|
||||||
{
|
|
||||||
friend std::ostream& operator<<(std::ostream& out, const AlignmentElement &alignElement);
|
|
||||||
|
|
||||||
protected:
|
|
||||||
typedef std::set<AlignmentElementType> ContainerType;
|
|
||||||
ContainerType m_collection;
|
|
||||||
public:
|
|
||||||
typedef ContainerType::iterator iterator;
|
|
||||||
typedef ContainerType::const_iterator const_iterator;
|
|
||||||
const_iterator begin() const { return m_collection.begin(); }
|
|
||||||
const_iterator end() const { return m_collection.end(); }
|
|
||||||
|
|
||||||
AlignmentElement(){};
|
|
||||||
~AlignmentElement(){};
|
|
||||||
|
|
||||||
//! inital constructor from parsed info from phrase table
|
|
||||||
AlignmentElement(const ContainerType &alignInfo);
|
|
||||||
AlignmentElement(const std::vector<AlignmentElementType> &alignInfo);
|
|
||||||
AlignmentElement(const AlignmentElement &alignInfo);
|
|
||||||
|
|
||||||
AlignmentElement& operator=(const AlignmentElement ©);
|
|
||||||
|
|
||||||
//! number of words this element aligns to
|
|
||||||
size_t GetSize() const
|
|
||||||
{
|
|
||||||
return m_collection.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool IsEmpty() const
|
|
||||||
{
|
|
||||||
return m_collection.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
//! return internal collection of elements
|
|
||||||
const ContainerType &GetCollection() const
|
|
||||||
{
|
|
||||||
return m_collection;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** compare all alignments for this word.
|
|
||||||
* Return true iff both words are aligned to the same words
|
|
||||||
*/
|
|
||||||
bool Equals(const AlignmentElement &compare) const
|
|
||||||
{
|
|
||||||
return m_collection == compare.GetCollection();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** used by the unknown word handler.
|
|
||||||
* Set alignment to 0
|
|
||||||
*/
|
|
||||||
void SetIdentityAlignment()
|
|
||||||
{
|
|
||||||
m_collection.insert(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** align to all elements on other side, where the size of the other
|
|
||||||
* phrase is otherPhraseSize. Used when element has no alignment info
|
|
||||||
*/
|
|
||||||
void SetUniformAlignment(size_t otherPhraseSize);
|
|
||||||
|
|
||||||
/** set intersect with other element. Used when applying trans opt to a hypo
|
|
||||||
*/
|
|
||||||
void SetIntersect(const AlignmentElement &otherElement);
|
|
||||||
|
|
||||||
void Add(size_t pos)
|
|
||||||
{
|
|
||||||
m_collection.insert(pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
// shift alignment so that it is comparitable to another alignment.
|
|
||||||
void Shift(int shift);
|
|
||||||
|
|
||||||
TO_STRING();
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
@ -1,102 +0,0 @@
|
|||||||
// $Id$
|
|
||||||
/***********************************************************************
|
|
||||||
Moses - factored phrase-based language decoder
|
|
||||||
Copyright (C) 2006 University of Edinburgh
|
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
|
||||||
modify it under the terms of the GNU Lesser General Public
|
|
||||||
License as published by the Free Software Foundation; either
|
|
||||||
version 2.1 of the License, or (at your option) any later version.
|
|
||||||
|
|
||||||
This library is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
Lesser General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Lesser General Public
|
|
||||||
License along with this library; if not, write to the Free Software
|
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
***********************************************************************/
|
|
||||||
|
|
||||||
#include "AlignmentPair.h"
|
|
||||||
#include "AlignmentPhrase.h"
|
|
||||||
#include "WordsRange.h"
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
namespace Moses
|
|
||||||
{
|
|
||||||
AlignmentPhraseInserter AlignmentPair::GetInserter(FactorDirection direction)
|
|
||||||
{
|
|
||||||
return (direction == Input) ? back_insert_iterator<AlignmentPhrase::CollectionType>(m_sourceAlign.GetVector())
|
|
||||||
: back_insert_iterator<AlignmentPhrase::CollectionType>(m_targetAlign.GetVector());
|
|
||||||
}
|
|
||||||
|
|
||||||
void AlignmentPair::SetIdentityAlignment()
|
|
||||||
{
|
|
||||||
AlignmentElement alignment;
|
|
||||||
alignment.SetIdentityAlignment();
|
|
||||||
|
|
||||||
m_sourceAlign.Add(alignment);
|
|
||||||
m_targetAlign.Add(alignment);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AlignmentPair::IsCompatible(const AlignmentPair &compare
|
|
||||||
, size_t sourceStart
|
|
||||||
, size_t targetStart) const
|
|
||||||
{
|
|
||||||
// source
|
|
||||||
bool ret = GetAlignmentPhrase(Input).IsCompatible(
|
|
||||||
compare.GetAlignmentPhrase(Input)
|
|
||||||
, sourceStart
|
|
||||||
, targetStart);
|
|
||||||
|
|
||||||
if (!ret)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// target
|
|
||||||
return GetAlignmentPhrase(Output).IsCompatible(
|
|
||||||
compare.GetAlignmentPhrase(Output)
|
|
||||||
, targetStart
|
|
||||||
, sourceStart);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AlignmentPair::Add(const AlignmentPair &newAlignment
|
|
||||||
, const WordsRange &sourceRange
|
|
||||||
, const WordsRange &targetRange)
|
|
||||||
{
|
|
||||||
m_sourceAlign.Add(newAlignment.m_sourceAlign
|
|
||||||
, targetRange.GetStartPos()
|
|
||||||
, sourceRange.GetStartPos());
|
|
||||||
m_targetAlign.Add(newAlignment.m_targetAlign
|
|
||||||
, sourceRange.GetStartPos()
|
|
||||||
, targetRange.GetStartPos());
|
|
||||||
}
|
|
||||||
|
|
||||||
void AlignmentPair::Merge(const AlignmentPair &newAlignment, const WordsRange &sourceRange, const WordsRange &targetRange)
|
|
||||||
{
|
|
||||||
m_sourceAlign.Merge(newAlignment.m_sourceAlign
|
|
||||||
, targetRange.GetStartPos()
|
|
||||||
, sourceRange.GetStartPos());
|
|
||||||
m_targetAlign.Merge(newAlignment.m_targetAlign
|
|
||||||
, sourceRange.GetStartPos()
|
|
||||||
, targetRange.GetStartPos());
|
|
||||||
}
|
|
||||||
|
|
||||||
TO_STRING_BODY(AlignmentPair);
|
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream &out, const AlignmentPair &alignmentPair)
|
|
||||||
{
|
|
||||||
// out << "f2e: " << alignmentPair.m_sourceAlign << ""
|
|
||||||
// << " , e2f: " << alignmentPair.m_targetAlign << " ";
|
|
||||||
out << "f2e: ";
|
|
||||||
alignmentPair.m_sourceAlign.print(out);
|
|
||||||
out << " , e2f: ";
|
|
||||||
alignmentPair.m_targetAlign.print(out);
|
|
||||||
out << " ";
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
@ -1,112 +0,0 @@
|
|||||||
// $Id$
|
|
||||||
/***********************************************************************
|
|
||||||
Moses - factored phrase-based language decoder
|
|
||||||
Copyright (C) 2006 University of Edinburgh
|
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
|
||||||
modify it under the terms of the GNU Lesser General Public
|
|
||||||
License as published by the Free Software Foundation; either
|
|
||||||
version 2.1 of the License, or (at your option) any later version.
|
|
||||||
|
|
||||||
This library is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
Lesser General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Lesser General Public
|
|
||||||
License along with this library; if not, write to the Free Software
|
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
***********************************************************************/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <vector>
|
|
||||||
#include <iterator>
|
|
||||||
#include "TypeDef.h"
|
|
||||||
#include "Util.h"
|
|
||||||
#include "AlignmentPhrase.h"
|
|
||||||
|
|
||||||
namespace Moses
|
|
||||||
{
|
|
||||||
|
|
||||||
typedef std::back_insert_iterator<AlignmentPhrase::CollectionType> AlignmentPhraseInserter;
|
|
||||||
|
|
||||||
/** represent the alignment info between source and target phrase */
|
|
||||||
class AlignmentPair
|
|
||||||
{
|
|
||||||
friend std::ostream& operator<<(std::ostream&, const AlignmentPair&);
|
|
||||||
|
|
||||||
protected:
|
|
||||||
AlignmentPhrase m_sourceAlign, m_targetAlign;
|
|
||||||
|
|
||||||
public:
|
|
||||||
// constructor
|
|
||||||
AlignmentPair()
|
|
||||||
{}
|
|
||||||
// constructor, init source size. used in hypo
|
|
||||||
AlignmentPair(size_t sourceSize)
|
|
||||||
:m_sourceAlign(sourceSize)
|
|
||||||
{}
|
|
||||||
|
|
||||||
// constructor, by copy
|
|
||||||
AlignmentPair(const AlignmentPair& a){
|
|
||||||
m_sourceAlign=a.GetAlignmentPhrase(Input);
|
|
||||||
m_targetAlign=a.GetAlignmentPhrase(Output);
|
|
||||||
};
|
|
||||||
|
|
||||||
// constructor, by copy
|
|
||||||
AlignmentPair(const AlignmentPhrase& a, const AlignmentPhrase& b){
|
|
||||||
SetAlignmentPhrase(a,b);
|
|
||||||
};
|
|
||||||
|
|
||||||
~AlignmentPair(){};
|
|
||||||
|
|
||||||
/** get the back_insert_iterator to the source or target alignment vector so that
|
|
||||||
* they could be populated
|
|
||||||
*/
|
|
||||||
AlignmentPhraseInserter GetInserter(FactorDirection direction);
|
|
||||||
|
|
||||||
const AlignmentPhrase &GetAlignmentPhrase(FactorDirection direction) const
|
|
||||||
{
|
|
||||||
return (direction == Input) ? m_sourceAlign : m_targetAlign;
|
|
||||||
}
|
|
||||||
|
|
||||||
AlignmentPhrase &GetAlignmentPhrase(FactorDirection direction)
|
|
||||||
{
|
|
||||||
return (direction == Input) ? m_sourceAlign : m_targetAlign;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetAlignmentPhrase(FactorDirection direction, const AlignmentPhrase& a)
|
|
||||||
{
|
|
||||||
if (direction == Input) m_sourceAlign=a;
|
|
||||||
else m_targetAlign=a;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SetAlignmentPhrase(const AlignmentPhrase& a, const AlignmentPhrase& b)
|
|
||||||
{
|
|
||||||
m_sourceAlign=a;
|
|
||||||
m_targetAlign=b;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/** used by the unknown word handler.
|
|
||||||
* Set alignment to 0
|
|
||||||
*/
|
|
||||||
void SetIdentityAlignment();
|
|
||||||
|
|
||||||
//! call Merge() for source and and Add() target alignment phrase
|
|
||||||
void Add(const AlignmentPair &newAlignment, const WordsRange &sourceRange, const WordsRange &targetRange);
|
|
||||||
|
|
||||||
//! call Merge for both source and target alignment phrase
|
|
||||||
void Merge(const AlignmentPair &newAlignment, const WordsRange &sourceRange, const WordsRange &targetRange);
|
|
||||||
|
|
||||||
bool IsCompatible(const AlignmentPair &compare
|
|
||||||
, size_t sourceStart
|
|
||||||
, size_t targetStart) const;
|
|
||||||
|
|
||||||
TO_STRING();
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -1,233 +0,0 @@
|
|||||||
// $Id$
|
|
||||||
/***********************************************************************
|
|
||||||
Moses - factored phrase-based language decoder
|
|
||||||
Copyright (C) 2006 University of Edinburgh
|
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
|
||||||
modify it under the terms of the GNU Lesser General Public
|
|
||||||
License as published by the Free Software Foundation; either
|
|
||||||
version 2.1 of the License, or (at your option) any later version.
|
|
||||||
|
|
||||||
This library is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
Lesser General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Lesser General Public
|
|
||||||
License along with this library; if not, write to the Free Software
|
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
***********************************************************************/
|
|
||||||
|
|
||||||
#include "AlignmentPhrase.h"
|
|
||||||
#include "WordsRange.h"
|
|
||||||
#include "WordsBitmap.h"
|
|
||||||
#include "UserMessage.h"
|
|
||||||
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
namespace Moses
|
|
||||||
{
|
|
||||||
|
|
||||||
void EmptyAlignment(string &Align, size_t Size)
|
|
||||||
{
|
|
||||||
Align = " ";
|
|
||||||
for (size_t pos = 0 ; pos < Size ; ++pos)
|
|
||||||
Align += "() ";
|
|
||||||
}
|
|
||||||
|
|
||||||
void UniformAlignment(string &Align, size_t fSize, size_t eSize)
|
|
||||||
{
|
|
||||||
std::stringstream AlignStream;
|
|
||||||
for (size_t fpos = 0 ; fpos < fSize ; ++fpos){
|
|
||||||
AlignStream << "(";
|
|
||||||
for (size_t epos = 0 ; epos < eSize ; ++epos){
|
|
||||||
if (epos) AlignStream << ",";
|
|
||||||
AlignStream << epos;
|
|
||||||
}
|
|
||||||
AlignStream << ") ";
|
|
||||||
}
|
|
||||||
Align = AlignStream.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
AlignmentPhrase::AlignmentPhrase(const AlignmentPhrase ©)
|
|
||||||
: m_collection(copy.m_collection.size())
|
|
||||||
{
|
|
||||||
for (size_t pos = 0 ; pos < copy.m_collection.size() ; ++pos)
|
|
||||||
{
|
|
||||||
if (copy.Exists(pos))
|
|
||||||
m_collection[pos] = new AlignmentElement(copy.GetElement(pos));
|
|
||||||
else
|
|
||||||
m_collection[pos] = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
AlignmentPhrase& AlignmentPhrase::operator=(const AlignmentPhrase ©)
|
|
||||||
{
|
|
||||||
m_collection.resize(copy.GetSize());
|
|
||||||
// m_collection=AlignmentPhrase(copy.GetSize());
|
|
||||||
for (size_t pos = 0 ; pos < copy.GetSize() ; ++pos)
|
|
||||||
{
|
|
||||||
if (copy.Exists(pos))
|
|
||||||
m_collection[pos] = new AlignmentElement(copy.GetElement(pos));
|
|
||||||
else
|
|
||||||
m_collection[pos] = NULL;
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
AlignmentPhrase::AlignmentPhrase(size_t size)
|
|
||||||
:m_collection(size)
|
|
||||||
{
|
|
||||||
for (size_t pos = 0 ; pos < size ; ++pos)
|
|
||||||
{
|
|
||||||
m_collection[pos] = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
AlignmentPhrase::~AlignmentPhrase()
|
|
||||||
{
|
|
||||||
RemoveAllInColl(m_collection);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AlignmentPhrase::IsCompatible(const AlignmentPhrase &compare, size_t mergePosStart, size_t shiftPos) const
|
|
||||||
{
|
|
||||||
const size_t compareSize = min(GetSize() - mergePosStart , compare.GetSize());
|
|
||||||
|
|
||||||
size_t posThis = mergePosStart;
|
|
||||||
for (size_t posCompare = 0 ; posCompare < compareSize ; ++posCompare)
|
|
||||||
{
|
|
||||||
if (!Exists(posThis))
|
|
||||||
continue;
|
|
||||||
assert(posThis < GetSize());
|
|
||||||
|
|
||||||
const AlignmentElement &alignThis = GetElement(posThis);
|
|
||||||
AlignmentElement alignCompare = compare.GetElement(posCompare);
|
|
||||||
|
|
||||||
// shift alignment
|
|
||||||
alignCompare.Shift( (int)shiftPos);
|
|
||||||
|
|
||||||
if (!alignThis.Equals(alignCompare))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
posThis++;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AlignmentPhrase::Add(const AlignmentPhrase &newAlignment, size_t shift, size_t startPos)
|
|
||||||
{
|
|
||||||
size_t insertPos = startPos;
|
|
||||||
for (size_t pos = 0 ; pos < newAlignment.GetSize() ; ++pos)
|
|
||||||
{
|
|
||||||
// shift alignment
|
|
||||||
AlignmentElement alignElement = newAlignment.GetElement(pos);
|
|
||||||
alignElement.Shift( (int)shift );
|
|
||||||
|
|
||||||
if (insertPos >= GetSize())
|
|
||||||
{ // probably doing target. append alignment to end
|
|
||||||
assert(insertPos == GetSize());
|
|
||||||
Add(alignElement);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (Exists(insertPos))
|
|
||||||
{ // add
|
|
||||||
m_collection[insertPos]->SetIntersect(alignElement);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
m_collection[insertPos] = new AlignmentElement(alignElement);
|
|
||||||
}
|
|
||||||
|
|
||||||
insertPos++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void AlignmentPhrase::Shift(size_t shift)
|
|
||||||
{
|
|
||||||
for (size_t pos = 0 ; pos < GetSize() ; ++pos)
|
|
||||||
{
|
|
||||||
// shift alignment
|
|
||||||
GetElement(pos).Shift( (int)shift );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void AlignmentPhrase::Merge(const AlignmentPhrase &newAlignment, size_t shift, size_t startPos)
|
|
||||||
{
|
|
||||||
assert(startPos < GetSize());
|
|
||||||
|
|
||||||
size_t insertPos = startPos;
|
|
||||||
for (size_t pos = 0 ; pos < newAlignment.GetSize() ; ++pos)
|
|
||||||
{
|
|
||||||
// shift alignment
|
|
||||||
AlignmentElement alignElement = newAlignment.GetElement(pos);
|
|
||||||
alignElement.Shift( (int)shift );
|
|
||||||
|
|
||||||
// merge elements to only contain co-joined elements
|
|
||||||
GetElement(insertPos).SetIntersect(alignElement);
|
|
||||||
|
|
||||||
insertPos++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void AlignmentPhrase::AddUniformAlignmentElement(std::list<size_t> &uniformAlignmentTarget)
|
|
||||||
{
|
|
||||||
list<size_t>::iterator iter;
|
|
||||||
for (iter = uniformAlignmentTarget.begin() ; iter != uniformAlignmentTarget.end() ; ++iter)
|
|
||||||
{
|
|
||||||
for (size_t pos = 0 ; pos < GetSize() ; ++pos)
|
|
||||||
{
|
|
||||||
AlignmentElement &alignElement = GetElement(pos);
|
|
||||||
alignElement.Add(*iter);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& out, const AlignmentPhrase &alignmentPhrase)
|
|
||||||
{
|
|
||||||
|
|
||||||
for (size_t pos = 0 ; pos < alignmentPhrase.GetSize() ; ++pos)
|
|
||||||
{
|
|
||||||
if (alignmentPhrase.Exists(pos))
|
|
||||||
{
|
|
||||||
if (pos) out << " ";
|
|
||||||
const AlignmentElement &alignElement = alignmentPhrase.GetElement(pos);
|
|
||||||
out << alignElement;
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
stringstream strme;
|
|
||||||
strme << "No alignment at position " << pos;
|
|
||||||
UserMessage::Add(strme.str());
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AlignmentPhrase::print(std::ostream& out, size_t offset) const
|
|
||||||
{
|
|
||||||
|
|
||||||
for (size_t pos = 0 ; pos < GetSize() ; ++pos)
|
|
||||||
{
|
|
||||||
if (Exists(pos))
|
|
||||||
{
|
|
||||||
if (pos) out << " ";
|
|
||||||
out << pos+offset << "=";
|
|
||||||
const AlignmentElement &alignElement = GetElement(pos);
|
|
||||||
out << alignElement;
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
stringstream strme;
|
|
||||||
strme << "No alignment at position " << pos;
|
|
||||||
UserMessage::Add(strme.str());
|
|
||||||
abort();
|
|
||||||
// out << pos+offset << "=";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TO_STRING_BODY(AlignmentPhrase);
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -1,108 +0,0 @@
|
|||||||
// $Id$
|
|
||||||
/***********************************************************************
|
|
||||||
Moses - factored phrase-based language decoder
|
|
||||||
Copyright (C) 2006 University of Edinburgh
|
|
||||||
|
|
||||||
This library is free software; you can redistribute it and/or
|
|
||||||
modify it under the terms of the GNU Lesser General Public
|
|
||||||
License as published by the Free Software Foundation; either
|
|
||||||
version 2.1 of the License, or (at your option) any later version.
|
|
||||||
|
|
||||||
This library is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
||||||
Lesser General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU Lesser General Public
|
|
||||||
License along with this library; if not, write to the Free Software
|
|
||||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
||||||
***********************************************************************/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
#include <vector>
|
|
||||||
#include "AlignmentElement.h"
|
|
||||||
#include "Util.h"
|
|
||||||
|
|
||||||
namespace Moses
|
|
||||||
{
|
|
||||||
|
|
||||||
void EmptyAlignment(std::string &Align, size_t Size);
|
|
||||||
void UniformAlignment(std::string &Align, size_t fSize, size_t eSize);
|
|
||||||
|
|
||||||
class WordsRange;
|
|
||||||
class WordsBitmap;
|
|
||||||
|
|
||||||
//! alignments of each word in a phrase
|
|
||||||
class AlignmentPhrase
|
|
||||||
{
|
|
||||||
friend std::ostream& operator<<(std::ostream& out, const AlignmentPhrase &alignmentPhrase);
|
|
||||||
public:
|
|
||||||
typedef std::vector<AlignmentElement*> CollectionType;
|
|
||||||
protected:
|
|
||||||
CollectionType m_collection;
|
|
||||||
public:
|
|
||||||
AlignmentPhrase(){};
|
|
||||||
AlignmentPhrase(size_t size);
|
|
||||||
|
|
||||||
/** copy constructor */
|
|
||||||
AlignmentPhrase(const AlignmentPhrase ©);
|
|
||||||
AlignmentPhrase& operator=(const AlignmentPhrase&);
|
|
||||||
|
|
||||||
/** destructor */
|
|
||||||
~AlignmentPhrase();
|
|
||||||
|
|
||||||
/** compare with another alignment phrase, return true if the other alignment phrase is a
|
|
||||||
* subset of this. Used to see whether a trans opt can be used to expand a hypo
|
|
||||||
*/
|
|
||||||
bool IsCompatible(const AlignmentPhrase &compare, size_t mergePosStart, size_t shiftPos) const;
|
|
||||||
|
|
||||||
//! add newAlignment to end of this alignment phrase, offsetting by newAlignmentRange.GetStartPos()
|
|
||||||
void Add(const AlignmentPhrase &newAlignment, size_t shift, size_t startPos);
|
|
||||||
|
|
||||||
/*< merge newAlignment to this alignment phrase, offsetting by newAlignmentRange.GetStartPos().
|
|
||||||
Use intersection of each alignment element
|
|
||||||
*/
|
|
||||||
void Merge(const AlignmentPhrase &newAlignment, size_t shift, size_t startPos);
|
|
||||||
|
|
||||||
void Shift(size_t shift);
|
|
||||||
|
|
||||||
size_t GetSize() const
|
|
||||||
{
|
|
||||||
return m_collection.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
CollectionType &GetVector()
|
|
||||||
{
|
|
||||||
return m_collection;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Add(const AlignmentElement &element)
|
|
||||||
{
|
|
||||||
m_collection.push_back(new AlignmentElement(element));
|
|
||||||
}
|
|
||||||
|
|
||||||
// add elements which didn't have alignments, so are set to uniform on the other side
|
|
||||||
void AddUniformAlignmentElement(std::list<size_t> &uniformAlignmentTarget);
|
|
||||||
|
|
||||||
AlignmentElement &GetElement(size_t pos)
|
|
||||||
{ return *m_collection[pos]; }
|
|
||||||
|
|
||||||
const AlignmentElement &GetElement(size_t pos) const
|
|
||||||
{ return *m_collection[pos]; }
|
|
||||||
|
|
||||||
bool Exists(size_t pos) const
|
|
||||||
{
|
|
||||||
return m_collection[pos] != NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
void print(std::ostream& out, size_t offset=0) const;
|
|
||||||
|
|
||||||
TO_STRING();
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -57,22 +57,27 @@ class HypothesisScoreOrdererNoDistortion
|
|||||||
class HypothesisScoreOrdererWithDistortion
|
class HypothesisScoreOrdererWithDistortion
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static const WordsRange *transOptRange; // TODO. HACK!!
|
HypothesisScoreOrdererWithDistortion(const WordsRange* transOptRange) :
|
||||||
|
m_transOptRange(transOptRange) {}
|
||||||
|
|
||||||
|
const WordsRange* m_transOptRange;
|
||||||
|
|
||||||
bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const
|
bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const
|
||||||
{
|
{
|
||||||
assert (transOptRange != NULL);
|
assert (m_transOptRange != NULL);
|
||||||
|
|
||||||
const float weightDistortion = StaticData::Instance().GetWeightDistortion();
|
const float weightDistortion = StaticData::Instance().GetWeightDistortion();
|
||||||
const DistortionScoreProducer *dsp = StaticData::Instance().GetDistortionScoreProducer();
|
const DistortionScoreProducer *dsp = StaticData::Instance().GetDistortionScoreProducer();
|
||||||
const float distortionScoreA = dsp->CalculateDistortionScore(
|
const float distortionScoreA = dsp->CalculateDistortionScore(
|
||||||
|
*hypoA,
|
||||||
hypoA->GetCurrSourceWordsRange(),
|
hypoA->GetCurrSourceWordsRange(),
|
||||||
*transOptRange,
|
*m_transOptRange,
|
||||||
hypoA->GetWordsBitmap().GetFirstGapPos()
|
hypoA->GetWordsBitmap().GetFirstGapPos()
|
||||||
);
|
);
|
||||||
const float distortionScoreB = dsp->CalculateDistortionScore(
|
const float distortionScoreB = dsp->CalculateDistortionScore(
|
||||||
|
*hypoB,
|
||||||
hypoB->GetCurrSourceWordsRange(),
|
hypoB->GetCurrSourceWordsRange(),
|
||||||
*transOptRange,
|
*m_transOptRange,
|
||||||
hypoB->GetWordsBitmap().GetFirstGapPos()
|
hypoB->GetWordsBitmap().GetFirstGapPos()
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -95,8 +100,6 @@ class HypothesisScoreOrdererWithDistortion
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const WordsRange *HypothesisScoreOrdererWithDistortion::transOptRange = NULL;
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// BackwardsEdge Code
|
// BackwardsEdge Code
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -104,7 +107,8 @@ const WordsRange *HypothesisScoreOrdererWithDistortion::transOptRange = NULL;
|
|||||||
BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
|
BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
|
||||||
, BitmapContainer &parent
|
, BitmapContainer &parent
|
||||||
, const TranslationOptionList &translations
|
, const TranslationOptionList &translations
|
||||||
, const SquareMatrix &futureScore)
|
, const SquareMatrix &futureScore,
|
||||||
|
const InputType& itype)
|
||||||
: m_initialized(false)
|
: m_initialized(false)
|
||||||
, m_prevBitmapContainer(prevBitmapContainer)
|
, m_prevBitmapContainer(prevBitmapContainer)
|
||||||
, m_parent(parent)
|
, m_parent(parent)
|
||||||
@ -131,7 +135,6 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
|
|||||||
}
|
}
|
||||||
|
|
||||||
const WordsRange &transOptRange = translations.Get(0)->GetSourceWordsRange();
|
const WordsRange &transOptRange = translations.Get(0)->GetSourceWordsRange();
|
||||||
const InputType *itype = StaticData::Instance().GetInput();
|
|
||||||
|
|
||||||
HypothesisSet::const_iterator iterHypo = m_prevBitmapContainer.GetHypotheses().begin();
|
HypothesisSet::const_iterator iterHypo = m_prevBitmapContainer.GetHypotheses().begin();
|
||||||
HypothesisSet::const_iterator iterEnd = m_prevBitmapContainer.GetHypotheses().end();
|
HypothesisSet::const_iterator iterEnd = m_prevBitmapContainer.GetHypotheses().end();
|
||||||
@ -149,7 +152,7 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int distortionDistance = itype->ComputeDistortionDistance(hypo.GetCurrSourceWordsRange()
|
int distortionDistance = itype.ComputeDistortionDistance(hypo.GetCurrSourceWordsRange()
|
||||||
, transOptRange);
|
, transOptRange);
|
||||||
|
|
||||||
if (distortionDistance <= maxDistortion)
|
if (distortionDistance <= maxDistortion)
|
||||||
@ -169,8 +172,8 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
|
|||||||
assert(m_hypotheses[0]->GetTotalScore() >= m_hypotheses[1]->GetTotalScore());
|
assert(m_hypotheses[0]->GetTotalScore() >= m_hypotheses[1]->GetTotalScore());
|
||||||
}
|
}
|
||||||
|
|
||||||
HypothesisScoreOrdererWithDistortion::transOptRange = &transOptRange;
|
HypothesisScoreOrdererWithDistortion orderer (&transOptRange);
|
||||||
std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrdererWithDistortion());
|
std::sort(m_hypotheses.begin(), m_hypotheses.end(), orderer);
|
||||||
|
|
||||||
// std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrdererNoDistortion());
|
// std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrdererNoDistortion());
|
||||||
}
|
}
|
||||||
|
@ -185,7 +185,8 @@ class BackwardsEdge
|
|||||||
BackwardsEdge(const BitmapContainer &prevBitmapContainer
|
BackwardsEdge(const BitmapContainer &prevBitmapContainer
|
||||||
, BitmapContainer &parent
|
, BitmapContainer &parent
|
||||||
, const TranslationOptionList &translations
|
, const TranslationOptionList &translations
|
||||||
, const SquareMatrix &futureScore);
|
, const SquareMatrix &futureScore,
|
||||||
|
const InputType& source);
|
||||||
~BackwardsEdge();
|
~BackwardsEdge();
|
||||||
|
|
||||||
bool GetInitialized();
|
bool GetInitialized();
|
||||||
|
@ -35,14 +35,24 @@ class DecodeGraph
|
|||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
std::list<const DecodeStep*> m_steps;
|
std::list<const DecodeStep*> m_steps;
|
||||||
|
size_t m_position;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
/**
|
||||||
|
* position: The position of this graph within the decode sequence.
|
||||||
|
**/
|
||||||
|
DecodeGraph(size_t position): m_position(position) {}
|
||||||
//! iterators
|
//! iterators
|
||||||
typedef std::list<const DecodeStep*>::iterator iterator;
|
typedef std::list<const DecodeStep*>::iterator iterator;
|
||||||
typedef std::list<const DecodeStep*>::const_iterator const_iterator;
|
typedef std::list<const DecodeStep*>::const_iterator const_iterator;
|
||||||
const_iterator begin() const { return m_steps.begin(); }
|
const_iterator begin() const { return m_steps.begin(); }
|
||||||
const_iterator end() const { return m_steps.end(); }
|
const_iterator end() const { return m_steps.end(); }
|
||||||
|
|
||||||
|
size_t GetPosition() const
|
||||||
|
{
|
||||||
|
return m_position;
|
||||||
|
}
|
||||||
|
|
||||||
~DecodeGraph();
|
~DecodeGraph();
|
||||||
|
|
||||||
//! Add another decode step to the graph
|
//! Add another decode step to the graph
|
||||||
|
@ -29,14 +29,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
DecodeStepTranslation::DecodeStepTranslation(PhraseDictionary* dict, const DecodeStep* prev)
|
DecodeStepTranslation::DecodeStepTranslation(PhraseDictionary* dict, const DecodeStep* prev)
|
||||||
: DecodeStep(dict, prev)
|
: DecodeStep(dict, prev), m_phraseDictionary(dict)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
const PhraseDictionary &DecodeStepTranslation::GetPhraseDictionary() const
|
/*const PhraseDictionary &DecodeStepTranslation::GetPhraseDictionary() const
|
||||||
{
|
{
|
||||||
return *static_cast<const PhraseDictionary*>(m_ptr);
|
return *m_phraseDictionary;
|
||||||
}
|
}*/
|
||||||
|
|
||||||
TranslationOption *DecodeStepTranslation::MergeTranslation(const TranslationOption& oldTO, const TargetPhrase &targetPhrase) const
|
TranslationOption *DecodeStepTranslation::MergeTranslation(const TranslationOption& oldTO, const TargetPhrase &targetPhrase) const
|
||||||
{
|
{
|
||||||
@ -102,11 +102,10 @@ void DecodeStepTranslation::ProcessInitialTranslation(
|
|||||||
,PartialTranslOptColl &outputPartialTranslOptColl
|
,PartialTranslOptColl &outputPartialTranslOptColl
|
||||||
, size_t startPos, size_t endPos, bool adhereTableLimit) const
|
, size_t startPos, size_t endPos, bool adhereTableLimit) const
|
||||||
{
|
{
|
||||||
const PhraseDictionary &phraseDictionary = GetPhraseDictionary();
|
const size_t tableLimit = m_phraseDictionary->GetTableLimit();
|
||||||
const size_t tableLimit = phraseDictionary.GetTableLimit();
|
|
||||||
|
|
||||||
const WordsRange wordsRange(startPos, endPos);
|
const WordsRange wordsRange(startPos, endPos);
|
||||||
const TargetPhraseCollection *phraseColl = phraseDictionary.GetTargetPhraseCollection(source,wordsRange);
|
const TargetPhraseCollection *phraseColl = m_phraseDictionary->GetTargetPhraseCollection(source,wordsRange);
|
||||||
|
|
||||||
if (phraseColl != NULL)
|
if (phraseColl != NULL)
|
||||||
{
|
{
|
||||||
|
@ -22,6 +22,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "DecodeStep.h"
|
#include "DecodeStep.h"
|
||||||
|
#include "PhraseDictionary.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
@ -57,6 +58,7 @@ private:
|
|||||||
This function runs IsCompatible() to ensure the two can be merged
|
This function runs IsCompatible() to ensure the two can be merged
|
||||||
*/
|
*/
|
||||||
TranslationOption *MergeTranslation(const TranslationOption& oldTO, const TargetPhrase &targetPhrase) const;
|
TranslationOption *MergeTranslation(const TranslationOption& oldTO, const TargetPhrase &targetPhrase) const;
|
||||||
|
PhraseDictionary* m_phraseDictionary;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -51,11 +51,12 @@ std::string DistortionScoreProducer::GetScoreProducerWeightShortName() const
|
|||||||
return "d";
|
return "d";
|
||||||
}
|
}
|
||||||
|
|
||||||
float DistortionScoreProducer::CalculateDistortionScore(const WordsRange &prev, const WordsRange &curr, const int FirstGap) const
|
float DistortionScoreProducer::CalculateDistortionScore(const Hypothesis& hypo,
|
||||||
|
const WordsRange &prev, const WordsRange &curr, const int FirstGap) const
|
||||||
{
|
{
|
||||||
const int USE_OLD = 1;
|
const int USE_OLD = 1;
|
||||||
if (USE_OLD) {
|
if (USE_OLD) {
|
||||||
return - (float) StaticData::Instance().GetInput()->ComputeDistortionDistance(prev, curr);
|
return - (float) hypo.GetInput().ComputeDistortionDistance(prev, curr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
|
// Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
|
||||||
@ -85,6 +86,7 @@ FFState* DistortionScoreProducer::Evaluate(
|
|||||||
ScoreComponentCollection* out) const {
|
ScoreComponentCollection* out) const {
|
||||||
const DistortionState_traditional* prev = static_cast<const DistortionState_traditional*>(prev_state);
|
const DistortionState_traditional* prev = static_cast<const DistortionState_traditional*>(prev_state);
|
||||||
const float distortionScore = CalculateDistortionScore(
|
const float distortionScore = CalculateDistortionScore(
|
||||||
|
hypo,
|
||||||
prev->range,
|
prev->range,
|
||||||
hypo.GetCurrSourceWordsRange(),
|
hypo.GetCurrSourceWordsRange(),
|
||||||
prev->first_gap);
|
prev->first_gap);
|
||||||
|
@ -16,7 +16,8 @@ class DistortionScoreProducer : public StatefulFeatureFunction {
|
|||||||
public:
|
public:
|
||||||
DistortionScoreProducer(ScoreIndexManager &scoreIndexManager);
|
DistortionScoreProducer(ScoreIndexManager &scoreIndexManager);
|
||||||
|
|
||||||
float CalculateDistortionScore(const WordsRange &prev, const WordsRange &curr, const int FirstGapPosition) const;
|
float CalculateDistortionScore(const Hypothesis& hypo,
|
||||||
|
const WordsRange &prev, const WordsRange &curr, const int FirstGapPosition) const;
|
||||||
|
|
||||||
size_t GetNumScoreComponents() const;
|
size_t GetNumScoreComponents() const;
|
||||||
std::string GetScoreProducerDescription() const;
|
std::string GetScoreProducerDescription() const;
|
||||||
|
@ -38,7 +38,10 @@ void FactorCollection::LoadVocab(FactorDirection direction, FactorType factorTyp
|
|||||||
ifstream inFile(filePath.c_str());
|
ifstream inFile(filePath.c_str());
|
||||||
|
|
||||||
string line;
|
string line;
|
||||||
|
#ifdef WITH_THREADS
|
||||||
|
boost::upgrade_lock<boost::shared_mutex> lock(m_accessLock);
|
||||||
|
boost::upgrade_to_unique_lock<boost::shared_mutex> uniqueLock(lock);
|
||||||
|
#endif
|
||||||
while( !getline(inFile, line, '\n').eof())
|
while( !getline(inFile, line, '\n').eof())
|
||||||
{
|
{
|
||||||
vector<string> token = Tokenize( line );
|
vector<string> token = Tokenize( line );
|
||||||
@ -53,6 +56,9 @@ void FactorCollection::LoadVocab(FactorDirection direction, FactorType factorTyp
|
|||||||
|
|
||||||
bool FactorCollection::Exists(FactorDirection direction, FactorType factorType, const string &factorString)
|
bool FactorCollection::Exists(FactorDirection direction, FactorType factorType, const string &factorString)
|
||||||
{
|
{
|
||||||
|
#ifdef WITH_THREADS
|
||||||
|
boost::shared_lock<boost::shared_mutex> lock(m_accessLock);
|
||||||
|
#endif
|
||||||
// find string id
|
// find string id
|
||||||
const string *ptrString=&(*m_factorStringCollection.insert(factorString).first);
|
const string *ptrString=&(*m_factorStringCollection.insert(factorString).first);
|
||||||
|
|
||||||
@ -67,6 +73,10 @@ const Factor *FactorCollection::AddFactor(FactorDirection direction
|
|||||||
, FactorType factorType
|
, FactorType factorType
|
||||||
, const string &factorString)
|
, const string &factorString)
|
||||||
{
|
{
|
||||||
|
#ifdef WITH_THREADS
|
||||||
|
boost::upgrade_lock<boost::shared_mutex> lock(m_accessLock);
|
||||||
|
boost::upgrade_to_unique_lock<boost::shared_mutex> uniqueLock(lock);
|
||||||
|
#endif
|
||||||
// find string id
|
// find string id
|
||||||
const string *ptrString=&(*m_factorStringCollection.insert(factorString).first);
|
const string *ptrString=&(*m_factorStringCollection.insert(factorString).first);
|
||||||
pair<FactorSet::iterator, bool> ret = m_collection.insert( Factor(direction, factorType, ptrString, m_factorId) );
|
pair<FactorSet::iterator, bool> ret = m_collection.insert( Factor(direction, factorType, ptrString, m_factorId) );
|
||||||
|
@ -23,6 +23,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
|
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#ifdef WITH_THREADS
|
||||||
|
#include <boost/thread/shared_mutex.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "Factor.h"
|
#include "Factor.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace Moses
|
||||||
@ -47,6 +52,10 @@ class FactorCollection
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
static FactorCollection s_instance;
|
static FactorCollection s_instance;
|
||||||
|
#ifdef WITH_THREADS
|
||||||
|
//reader-writer lock
|
||||||
|
boost::shared_mutex m_accessLock;
|
||||||
|
#endif
|
||||||
|
|
||||||
size_t m_factorId; /**< unique, contiguous ids, starting from 0, for each factor */
|
size_t m_factorId; /**< unique, contiguous ids, starting from 0, for each factor */
|
||||||
FactorSet m_collection; /**< collection of all factors */
|
FactorSet m_collection; /**< collection of all factors */
|
||||||
|
@ -109,8 +109,8 @@ inline FILE* fOpen(const char* fn,const char* m) {
|
|||||||
if(FILE* f=fopen(fn,m))
|
if(FILE* f=fopen(fn,m))
|
||||||
return f;
|
return f;
|
||||||
else {
|
else {
|
||||||
assert(false);
|
|
||||||
UserMessage::Add(std::string("ERROR: could not open file ") + fn + " with mode " + m + "\n");
|
UserMessage::Add(std::string("ERROR: could not open file ") + fn + " with mode " + m + "\n");
|
||||||
|
assert(false);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -34,12 +34,12 @@ public:
|
|||||||
operator Ptr () {load();return t;}
|
operator Ptr () {load();return t;}
|
||||||
|
|
||||||
const T& operator* () const {load();return *t;}
|
const T& operator* () const {load();return *t;}
|
||||||
const Ptr operator->() const {load();return t;}
|
Ptr operator->() const {load();return t;}
|
||||||
operator const Ptr () const {load();return t;}
|
operator Ptr () const {load();return t;}
|
||||||
|
|
||||||
// direct access to pointer, use with care!
|
// direct access to pointer, use with care!
|
||||||
Ptr getPtr() {return t;}
|
Ptr getPtr() {return t;}
|
||||||
const Ptr getPtr() const {return t;}
|
Ptr getPtr() const {return t;}
|
||||||
|
|
||||||
operator bool() const {return (f && pos!=InvalidOffT);}
|
operator bool() const {return (f && pos!=InvalidOffT);}
|
||||||
|
|
||||||
|
@ -36,6 +36,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
#include "StaticData.h"
|
#include "StaticData.h"
|
||||||
#include "InputType.h"
|
#include "InputType.h"
|
||||||
#include "LMList.h"
|
#include "LMList.h"
|
||||||
|
#include "Manager.h"
|
||||||
#include "hash.h"
|
#include "hash.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -48,9 +49,8 @@ unsigned int Hypothesis::s_HypothesesCreated = 0;
|
|||||||
ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
|
ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
|
Hypothesis::Hypothesis(Manager& manager, InputType const& source, const TargetPhrase &emptyTarget)
|
||||||
: m_prevHypo(NULL)
|
: m_prevHypo(NULL)
|
||||||
, m_transOpt(NULL)
|
|
||||||
, m_targetPhrase(emptyTarget)
|
, m_targetPhrase(emptyTarget)
|
||||||
, m_sourcePhrase(0)
|
, m_sourcePhrase(0)
|
||||||
, m_sourceCompleted(source.GetSize())
|
, m_sourceCompleted(source.GetSize())
|
||||||
@ -60,8 +60,10 @@ Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
|
|||||||
, m_wordDeleted(false)
|
, m_wordDeleted(false)
|
||||||
, m_ffStates(StaticData::Instance().GetScoreIndexManager().GetStatefulFeatureFunctions().size())
|
, m_ffStates(StaticData::Instance().GetScoreIndexManager().GetStatefulFeatureFunctions().size())
|
||||||
, m_arcList(NULL)
|
, m_arcList(NULL)
|
||||||
, m_id(0)
|
, m_transOpt(NULL)
|
||||||
, m_alignPair(source.GetSize())
|
, m_manager(manager)
|
||||||
|
|
||||||
|
, m_id(0)
|
||||||
{ // used for initial seeding of trans process
|
{ // used for initial seeding of trans process
|
||||||
// initialize scores
|
// initialize scores
|
||||||
//_hash_computed = false;
|
//_hash_computed = false;
|
||||||
@ -78,7 +80,6 @@ Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
|
|||||||
Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
|
Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
|
||||||
: m_prevHypo(&prevHypo)
|
: m_prevHypo(&prevHypo)
|
||||||
, m_targetPhrase(transOpt.GetTargetPhrase())
|
, m_targetPhrase(transOpt.GetTargetPhrase())
|
||||||
, m_transOpt(&transOpt)
|
|
||||||
, m_sourcePhrase(transOpt.GetSourcePhrase())
|
, m_sourcePhrase(transOpt.GetSourcePhrase())
|
||||||
, m_sourceCompleted (prevHypo.m_sourceCompleted )
|
, m_sourceCompleted (prevHypo.m_sourceCompleted )
|
||||||
, m_sourceInput (prevHypo.m_sourceInput)
|
, m_sourceInput (prevHypo.m_sourceInput)
|
||||||
@ -88,11 +89,12 @@ Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &tran
|
|||||||
, m_wordDeleted(false)
|
, m_wordDeleted(false)
|
||||||
, m_totalScore(0.0f)
|
, m_totalScore(0.0f)
|
||||||
, m_futureScore(0.0f)
|
, m_futureScore(0.0f)
|
||||||
, m_ffStates(prevHypo.m_ffStates.size())
|
|
||||||
, m_scoreBreakdown (prevHypo.m_scoreBreakdown)
|
, m_scoreBreakdown (prevHypo.m_scoreBreakdown)
|
||||||
|
, m_ffStates(prevHypo.m_ffStates.size())
|
||||||
, m_arcList(NULL)
|
, m_arcList(NULL)
|
||||||
|
, m_transOpt(&transOpt)
|
||||||
|
, m_manager(prevHypo.GetManager())
|
||||||
, m_id(s_HypothesesCreated++)
|
, m_id(s_HypothesesCreated++)
|
||||||
, m_alignPair(prevHypo.m_alignPair)
|
|
||||||
{
|
{
|
||||||
// assert that we are not extending our hypothesis by retranslating something
|
// assert that we are not extending our hypothesis by retranslating something
|
||||||
// that this hypothesis has already translated!
|
// that this hypothesis has already translated!
|
||||||
@ -221,13 +223,13 @@ Hypothesis* Hypothesis::Create(const Hypothesis &prevHypo, const TranslationOpti
|
|||||||
* return the subclass of Hypothesis most appropriate to the given target phrase
|
* return the subclass of Hypothesis most appropriate to the given target phrase
|
||||||
*/
|
*/
|
||||||
|
|
||||||
Hypothesis* Hypothesis::Create(InputType const& m_source, const TargetPhrase &emptyTarget)
|
Hypothesis* Hypothesis::Create(Manager& manager, InputType const& m_source, const TargetPhrase &emptyTarget)
|
||||||
{
|
{
|
||||||
#ifdef USE_HYPO_POOL
|
#ifdef USE_HYPO_POOL
|
||||||
Hypothesis *ptr = s_objectPool.getPtr();
|
Hypothesis *ptr = s_objectPool.getPtr();
|
||||||
return new(ptr) Hypothesis(m_source, emptyTarget);
|
return new(ptr) Hypothesis(manager, m_source, emptyTarget);
|
||||||
#else
|
#else
|
||||||
return new Hypothesis(m_source, emptyTarget);
|
return new Hypothesis(manager, m_source, emptyTarget);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -301,7 +303,7 @@ void Hypothesis::CalcScore(const SquareMatrix &futureScore)
|
|||||||
// TOTAL
|
// TOTAL
|
||||||
m_totalScore = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore;
|
m_totalScore = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore;
|
||||||
|
|
||||||
IFVERBOSE(2) { staticData.GetSentenceStats().AddTimeOtherScore( clock()-t ); }
|
IFVERBOSE(2) { m_manager.GetSentenceStats().AddTimeOtherScore( clock()-t ); }
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Calculates the expected score of extending this hypothesis with the
|
/** Calculates the expected score of extending this hypothesis with the
|
||||||
@ -334,7 +336,7 @@ float Hypothesis::CalcExpectedScore( const SquareMatrix &futureScore ) {
|
|||||||
// TOTAL
|
// TOTAL
|
||||||
float total = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore + estimatedLMScore;
|
float total = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore + estimatedLMScore;
|
||||||
|
|
||||||
IFVERBOSE(2) { staticData.GetSentenceStats().AddTimeEstimateScore( clock()-t ); }
|
IFVERBOSE(2) { m_manager.GetSentenceStats().AddTimeEstimateScore( clock()-t ); }
|
||||||
return total;
|
return total;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -355,7 +357,7 @@ void Hypothesis::CalcRemainingScore()
|
|||||||
// TOTAL
|
// TOTAL
|
||||||
m_totalScore = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore;
|
m_totalScore = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore;
|
||||||
|
|
||||||
IFVERBOSE(2) { StaticData::Instance().GetSentenceStats().AddTimeOtherScore( clock()-t ); }
|
IFVERBOSE(2) { m_manager.GetSentenceStats().AddTimeOtherScore( clock()-t ); }
|
||||||
}
|
}
|
||||||
|
|
||||||
const Hypothesis* Hypothesis::GetPrevHypo()const{
|
const Hypothesis* Hypothesis::GetPrevHypo()const{
|
||||||
@ -387,10 +389,7 @@ void Hypothesis::PrintHypothesis() const
|
|||||||
TRACE_ERR( "\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()<<": "
|
TRACE_ERR( "\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()<<": "
|
||||||
<< *m_sourcePhrase <<endl);
|
<< *m_sourcePhrase <<endl);
|
||||||
TRACE_ERR( "\ttranslated as: "<<(Phrase&) m_targetPhrase<<endl); // <<" => translation cost "<<m_score[ScoreType::PhraseTrans];
|
TRACE_ERR( "\ttranslated as: "<<(Phrase&) m_targetPhrase<<endl); // <<" => translation cost "<<m_score[ScoreType::PhraseTrans];
|
||||||
if (PrintAlignmentInfo()){
|
|
||||||
TRACE_ERR( "\tsource-target word alignment: "<< m_targetPhrase.GetAlignmentPair().GetAlignmentPhrase(Input) << endl); // <<" => source to target word-to-word alignment
|
|
||||||
TRACE_ERR( "\ttarget-source word alignment: "<< m_targetPhrase.GetAlignmentPair().GetAlignmentPhrase(Output) << endl); // <<" => target to source word-to-word alignment
|
|
||||||
}
|
|
||||||
if (m_wordDeleted) TRACE_ERR( "\tword deleted"<<endl);
|
if (m_wordDeleted) TRACE_ERR( "\tword deleted"<<endl);
|
||||||
// TRACE_ERR( "\tdistance: "<<GetCurrSourceWordsRange().CalcDistortion(m_prevHypo->GetCurrSourceWordsRange())); // << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<<endl;
|
// TRACE_ERR( "\tdistance: "<<GetCurrSourceWordsRange().CalcDistortion(m_prevHypo->GetCurrSourceWordsRange())); // << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<<endl;
|
||||||
// TRACE_ERR( "\tlanguage model cost "); // <<m_score[ScoreType::LanguageModelScore]<<endl;
|
// TRACE_ERR( "\tlanguage model cost "); // <<m_score[ScoreType::LanguageModelScore]<<endl;
|
||||||
@ -456,14 +455,7 @@ ostream& operator<<(ostream& out, const Hypothesis& hypothesis)
|
|||||||
out << " " << hypothesis.GetScoreBreakdown();
|
out << " " << hypothesis.GetScoreBreakdown();
|
||||||
|
|
||||||
// alignment
|
// alignment
|
||||||
if (hypothesis.PrintAlignmentInfo()){
|
|
||||||
out << " [f2e:";
|
|
||||||
hypothesis.SourceAlignmentToStream(out);
|
|
||||||
out << "]";
|
|
||||||
out << " [e2f:";
|
|
||||||
hypothesis.TargetAlignmentToStream(out);
|
|
||||||
out << "]";
|
|
||||||
}
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,7 +36,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
#include "LexicalReordering.h"
|
#include "LexicalReordering.h"
|
||||||
#include "InputType.h"
|
#include "InputType.h"
|
||||||
#include "ObjectPool.h"
|
#include "ObjectPool.h"
|
||||||
#include "AlignmentPair.h"
|
|
||||||
|
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
@ -47,6 +46,7 @@ class TranslationOption;
|
|||||||
class WordsRange;
|
class WordsRange;
|
||||||
class Hypothesis;
|
class Hypothesis;
|
||||||
class FFState;
|
class FFState;
|
||||||
|
class Manager;
|
||||||
|
|
||||||
typedef std::vector<Hypothesis*> ArcList;
|
typedef std::vector<Hypothesis*> ArcList;
|
||||||
|
|
||||||
@ -82,14 +82,14 @@ protected:
|
|||||||
std::vector<const FFState*> m_ffStates;
|
std::vector<const FFState*> m_ffStates;
|
||||||
const Hypothesis *m_winningHypo;
|
const Hypothesis *m_winningHypo;
|
||||||
ArcList *m_arcList; /*! all arcs that end at the same trellis point as this hypothesis */
|
ArcList *m_arcList; /*! all arcs that end at the same trellis point as this hypothesis */
|
||||||
AlignmentPair m_alignPair;
|
|
||||||
const TranslationOption *m_transOpt;
|
const TranslationOption *m_transOpt;
|
||||||
|
Manager& m_manager;
|
||||||
|
|
||||||
int m_id; /*! numeric ID of this hypothesis, used for logging */
|
int m_id; /*! numeric ID of this hypothesis, used for logging */
|
||||||
static unsigned int s_HypothesesCreated; // Statistics: how many hypotheses were created in total
|
static unsigned int s_HypothesesCreated; // Statistics: how many hypotheses were created in total
|
||||||
|
|
||||||
/*! used by initial seeding of the translation process */
|
/*! used by initial seeding of the translation process */
|
||||||
Hypothesis(InputType const& source, const TargetPhrase &emptyTarget);
|
Hypothesis(Manager& manager, InputType const& source, const TargetPhrase &emptyTarget);
|
||||||
/*! used when creating a new hypothesis using a translation option (phrase translation) */
|
/*! used when creating a new hypothesis using a translation option (phrase translation) */
|
||||||
Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt);
|
Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt);
|
||||||
|
|
||||||
@ -104,15 +104,17 @@ public:
|
|||||||
/** return the subclass of Hypothesis most appropriate to the given translation option */
|
/** return the subclass of Hypothesis most appropriate to the given translation option */
|
||||||
static Hypothesis* Create(const Hypothesis &prevHypo, const TranslationOption &transOpt, const Phrase* constraint);
|
static Hypothesis* Create(const Hypothesis &prevHypo, const TranslationOption &transOpt, const Phrase* constraint);
|
||||||
|
|
||||||
static Hypothesis* Create(const WordsBitmap &initialCoverage);
|
static Hypothesis* Create(Manager& manager, const WordsBitmap &initialCoverage);
|
||||||
|
|
||||||
/** return the subclass of Hypothesis most appropriate to the given target phrase */
|
/** return the subclass of Hypothesis most appropriate to the given target phrase */
|
||||||
static Hypothesis* Create(InputType const& source, const TargetPhrase &emptyTarget);
|
static Hypothesis* Create(Manager& manager, InputType const& source, const TargetPhrase &emptyTarget);
|
||||||
|
|
||||||
/** return the subclass of Hypothesis most appropriate to the given translation option */
|
/** return the subclass of Hypothesis most appropriate to the given translation option */
|
||||||
Hypothesis* CreateNext(const TranslationOption &transOpt, const Phrase* constraint) const;
|
Hypothesis* CreateNext(const TranslationOption &transOpt, const Phrase* constraint) const;
|
||||||
|
|
||||||
void PrintHypothesis() const;
|
void PrintHypothesis() const;
|
||||||
|
|
||||||
|
const InputType& GetInput() const {return m_sourceInput;}
|
||||||
|
|
||||||
/** return target phrase used to create this hypothesis */
|
/** return target phrase used to create this hypothesis */
|
||||||
// const Phrase &GetCurrTargetPhrase() const
|
// const Phrase &GetCurrTargetPhrase() const
|
||||||
@ -133,6 +135,11 @@ public:
|
|||||||
{
|
{
|
||||||
return m_currTargetWordsRange;
|
return m_currTargetWordsRange;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Manager& GetManager() const
|
||||||
|
{
|
||||||
|
return m_manager;
|
||||||
|
}
|
||||||
|
|
||||||
/** output length of the translation option used to create this hypothesis */
|
/** output length of the translation option used to create this hypothesis */
|
||||||
inline size_t GetCurrTargetLength() const
|
inline size_t GetCurrTargetLength() const
|
||||||
@ -223,37 +230,9 @@ public:
|
|||||||
|
|
||||||
inline bool PrintAlignmentInfo() const{ return GetCurrTargetPhrase().PrintAlignmentInfo(); }
|
inline bool PrintAlignmentInfo() const{ return GetCurrTargetPhrase().PrintAlignmentInfo(); }
|
||||||
|
|
||||||
void SourceAlignmentToStream(std::ostream& out) const
|
|
||||||
{
|
|
||||||
if (m_prevHypo != NULL)
|
|
||||||
{
|
|
||||||
m_prevHypo->SourceAlignmentToStream(out);
|
|
||||||
AlignmentPhrase alignSourcePhrase=GetCurrTargetPhrase().GetAlignmentPair().GetAlignmentPhrase(Input);
|
|
||||||
alignSourcePhrase.Shift(m_currTargetWordsRange.GetStartPos());
|
|
||||||
out << " ";
|
|
||||||
/*
|
|
||||||
out << "\nGetCurrTargetPhrase(): " << GetCurrTargetPhrase();
|
|
||||||
out << "\nm_currTargetWordsRange: " << m_currTargetWordsRange << "->";
|
|
||||||
*/
|
|
||||||
alignSourcePhrase.print(out,m_currSourceWordsRange.GetStartPos());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void TargetAlignmentToStream(std::ostream& out) const
|
|
||||||
{
|
|
||||||
if (m_prevHypo != NULL)
|
|
||||||
{
|
|
||||||
m_prevHypo->TargetAlignmentToStream(out);
|
|
||||||
AlignmentPhrase alignTargetPhrase=GetCurrTargetPhrase().GetAlignmentPair().GetAlignmentPhrase(Output);
|
|
||||||
alignTargetPhrase.Shift(m_currSourceWordsRange.GetStartPos());
|
|
||||||
out << " ";
|
|
||||||
/*
|
|
||||||
out << "\nGetCurrTargetPhrase(): " << GetCurrTargetPhrase();
|
|
||||||
out << "\nm_currSourceWordsRange: " << m_currSourceWordsRange << "->";
|
|
||||||
*/
|
|
||||||
alignTargetPhrase.print(out,m_currTargetWordsRange.GetStartPos());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TO_STRING();
|
TO_STRING();
|
||||||
|
|
||||||
@ -283,11 +262,7 @@ public:
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
//! vector of what source words were aligned to each target
|
|
||||||
const AlignmentPair &GetAlignmentPair() const
|
|
||||||
{
|
|
||||||
return m_alignPair;
|
|
||||||
}
|
|
||||||
//! target span that trans opt would populate if applied to this hypo. Used for alignment check
|
//! target span that trans opt would populate if applied to this hypo. Used for alignment check
|
||||||
size_t GetNextStartPos(const TranslationOption &transOpt) const;
|
size_t GetNextStartPos(const TranslationOption &transOpt) const;
|
||||||
|
|
||||||
|
@ -8,14 +8,19 @@
|
|||||||
|
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
|
|
||||||
|
class Manager;
|
||||||
|
|
||||||
class HypothesisStack
|
class HypothesisStack
|
||||||
{
|
{
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
typedef std::set< Hypothesis*, HypothesisRecombinationOrderer > _HCType;
|
typedef std::set< Hypothesis*, HypothesisRecombinationOrderer > _HCType;
|
||||||
_HCType m_hypos; /**< contains hypotheses */
|
_HCType m_hypos; /**< contains hypotheses */
|
||||||
|
Manager& m_manager;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
HypothesisStack(Manager& manager): m_manager(manager) {}
|
||||||
typedef _HCType::iterator iterator;
|
typedef _HCType::iterator iterator;
|
||||||
typedef _HCType::const_iterator const_iterator;
|
typedef _HCType::const_iterator const_iterator;
|
||||||
//! iterators
|
//! iterators
|
||||||
|
@ -26,12 +26,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
#include "TypeDef.h"
|
#include "TypeDef.h"
|
||||||
#include "Util.h"
|
#include "Util.h"
|
||||||
#include "StaticData.h"
|
#include "StaticData.h"
|
||||||
|
#include "Manager.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
HypothesisStackCubePruning::HypothesisStackCubePruning()
|
HypothesisStackCubePruning::HypothesisStackCubePruning(Manager& manager) :
|
||||||
|
HypothesisStack(manager)
|
||||||
{
|
{
|
||||||
m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
|
m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
|
||||||
m_bestScore = -std::numeric_limits<float>::infinity();
|
m_bestScore = -std::numeric_limits<float>::infinity();
|
||||||
@ -85,7 +87,7 @@ bool HypothesisStackCubePruning::AddPrune(Hypothesis *hypo)
|
|||||||
{
|
{
|
||||||
if (hypo->GetTotalScore() < m_worstScore)
|
if (hypo->GetTotalScore() < m_worstScore)
|
||||||
{ // too bad for stack. don't bother adding hypo into collection
|
{ // too bad for stack. don't bother adding hypo into collection
|
||||||
StaticData::Instance().GetSentenceStats().AddDiscarded();
|
m_manager.GetSentenceStats().AddDiscarded();
|
||||||
VERBOSE(3,"discarded, too bad for stack" << std::endl);
|
VERBOSE(3,"discarded, too bad for stack" << std::endl);
|
||||||
FREEHYPO(hypo);
|
FREEHYPO(hypo);
|
||||||
return false;
|
return false;
|
||||||
@ -103,7 +105,7 @@ bool HypothesisStackCubePruning::AddPrune(Hypothesis *hypo)
|
|||||||
Hypothesis *hypoExisting = *iterExisting;
|
Hypothesis *hypoExisting = *iterExisting;
|
||||||
assert(iterExisting != m_hypos.end());
|
assert(iterExisting != m_hypos.end());
|
||||||
|
|
||||||
StaticData::Instance().GetSentenceStats().AddRecombination(*hypo, **iterExisting);
|
m_manager.GetSentenceStats().AddRecombination(*hypo, **iterExisting);
|
||||||
|
|
||||||
// found existing hypo with same target ending.
|
// found existing hypo with same target ending.
|
||||||
// keep the best 1
|
// keep the best 1
|
||||||
@ -187,7 +189,7 @@ void HypothesisStackCubePruning::PruneToSize(size_t newSize)
|
|||||||
{
|
{
|
||||||
iterator iterRemove = iter++;
|
iterator iterRemove = iter++;
|
||||||
Remove(iterRemove);
|
Remove(iterRemove);
|
||||||
StaticData::Instance().GetSentenceStats().AddPruning();
|
m_manager.GetSentenceStats().AddPruning();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -273,7 +275,8 @@ void HypothesisStackCubePruning::SetBitmapAccessor(const WordsBitmap &newBitmap
|
|||||||
BackwardsEdge *edge = new BackwardsEdge(bitmapContainer
|
BackwardsEdge *edge = new BackwardsEdge(bitmapContainer
|
||||||
, *bmContainer
|
, *bmContainer
|
||||||
, transOptList
|
, transOptList
|
||||||
, futureScore);
|
, futureScore,
|
||||||
|
m_manager.GetSource());
|
||||||
bmContainer->AddBackwardsEdge(edge);
|
bmContainer->AddBackwardsEdge(edge);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -33,6 +33,7 @@ namespace Moses
|
|||||||
|
|
||||||
class BitmapContainer;
|
class BitmapContainer;
|
||||||
class TranslationOptionList;
|
class TranslationOptionList;
|
||||||
|
class Manager;
|
||||||
|
|
||||||
typedef std::map<WordsBitmap, BitmapContainer*> _BMType;
|
typedef std::map<WordsBitmap, BitmapContainer*> _BMType;
|
||||||
|
|
||||||
@ -60,7 +61,7 @@ protected:
|
|||||||
void RemoveAll();
|
void RemoveAll();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
HypothesisStackCubePruning();
|
HypothesisStackCubePruning(Manager& manager);
|
||||||
~HypothesisStackCubePruning()
|
~HypothesisStackCubePruning()
|
||||||
{
|
{
|
||||||
RemoveAll();
|
RemoveAll();
|
||||||
|
@ -26,12 +26,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
#include "TypeDef.h"
|
#include "TypeDef.h"
|
||||||
#include "Util.h"
|
#include "Util.h"
|
||||||
#include "StaticData.h"
|
#include "StaticData.h"
|
||||||
|
#include "Manager.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
HypothesisStackNormal::HypothesisStackNormal()
|
HypothesisStackNormal::HypothesisStackNormal(Manager& manager) :
|
||||||
|
HypothesisStack(manager)
|
||||||
{
|
{
|
||||||
m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
|
m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
|
||||||
m_bestScore = -std::numeric_limits<float>::infinity();
|
m_bestScore = -std::numeric_limits<float>::infinity();
|
||||||
@ -96,7 +98,7 @@ bool HypothesisStackNormal::AddPrune(Hypothesis *hypo)
|
|||||||
&& ! ( m_minHypoStackDiversity > 0
|
&& ! ( m_minHypoStackDiversity > 0
|
||||||
&& hypo->GetTotalScore() >= GetWorstScoreForBitmap( hypo->GetWordsBitmap() ) ) )
|
&& hypo->GetTotalScore() >= GetWorstScoreForBitmap( hypo->GetWordsBitmap() ) ) )
|
||||||
{
|
{
|
||||||
StaticData::Instance().GetSentenceStats().AddDiscarded();
|
m_manager.GetSentenceStats().AddDiscarded();
|
||||||
VERBOSE(3,"discarded, too bad for stack" << std::endl);
|
VERBOSE(3,"discarded, too bad for stack" << std::endl);
|
||||||
FREEHYPO(hypo);
|
FREEHYPO(hypo);
|
||||||
return false;
|
return false;
|
||||||
@ -114,7 +116,7 @@ bool HypothesisStackNormal::AddPrune(Hypothesis *hypo)
|
|||||||
Hypothesis *hypoExisting = *iterExisting;
|
Hypothesis *hypoExisting = *iterExisting;
|
||||||
assert(iterExisting != m_hypos.end());
|
assert(iterExisting != m_hypos.end());
|
||||||
|
|
||||||
StaticData::Instance().GetSentenceStats().AddRecombination(*hypo, **iterExisting);
|
m_manager.GetSentenceStats().AddRecombination(*hypo, **iterExisting);
|
||||||
|
|
||||||
// found existing hypo with same target ending.
|
// found existing hypo with same target ending.
|
||||||
// keep the best 1
|
// keep the best 1
|
||||||
@ -211,7 +213,7 @@ void HypothesisStackNormal::PruneToSize(size_t newSize)
|
|||||||
if (! included[i])
|
if (! included[i])
|
||||||
{
|
{
|
||||||
FREEHYPO( hypos[i] );
|
FREEHYPO( hypos[i] );
|
||||||
StaticData::Instance().GetSentenceStats().AddPruning();
|
m_manager.GetSentenceStats().AddPruning();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(included);
|
free(included);
|
||||||
|
@ -69,7 +69,7 @@ public:
|
|||||||
return GetWorstScoreForBitmap( coverage.GetID() );
|
return GetWorstScoreForBitmap( coverage.GetID() );
|
||||||
}
|
}
|
||||||
|
|
||||||
HypothesisStackNormal();
|
HypothesisStackNormal(Manager& manager);
|
||||||
|
|
||||||
/** adds the hypo, but only if within thresholds (beamThr, stackSize).
|
/** adds the hypo, but only if within thresholds (beamThr, stackSize).
|
||||||
* This function will recombine hypotheses silently! There is no record
|
* This function will recombine hypotheses silently! There is no record
|
||||||
|
@ -28,6 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
#include "LanguageModel.h"
|
#include "LanguageModel.h"
|
||||||
#include "TypeDef.h"
|
#include "TypeDef.h"
|
||||||
#include "Util.h"
|
#include "Util.h"
|
||||||
|
#include "Manager.h"
|
||||||
#include "FactorCollection.h"
|
#include "FactorCollection.h"
|
||||||
#include "Phrase.h"
|
#include "Phrase.h"
|
||||||
#include "StaticData.h"
|
#include "StaticData.h"
|
||||||
@ -183,7 +184,7 @@ FFState* LanguageModel::Evaluate(
|
|||||||
res->lmstate = GetState(contextFactor);
|
res->lmstate = GetState(contextFactor);
|
||||||
}
|
}
|
||||||
out->PlusEquals(this, lmScore);
|
out->PlusEquals(this, lmScore);
|
||||||
IFVERBOSE(2) { StaticData::Instance().GetSentenceStats().AddTimeCalcLM( clock()-t ); }
|
IFVERBOSE(2) { hypo.GetManager().GetSentenceStats().AddTimeCalcLM( clock()-t ); }
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -206,9 +206,10 @@ LexicalReorderingTableTree::LexicalReorderingTableTree(
|
|||||||
const std::vector<FactorType>& f_factors,
|
const std::vector<FactorType>& f_factors,
|
||||||
const std::vector<FactorType>& e_factors,
|
const std::vector<FactorType>& e_factors,
|
||||||
const std::vector<FactorType>& c_factors)
|
const std::vector<FactorType>& c_factors)
|
||||||
: LexicalReorderingTable(f_factors, e_factors, c_factors)
|
: LexicalReorderingTable(f_factors, e_factors, c_factors), m_UseCache(false), m_FilePath(filePath)
|
||||||
{
|
{
|
||||||
m_Table.Read(filePath+".binlexr");
|
m_Table.reset(new PrefixTreeMap());
|
||||||
|
m_Table->Read(m_FilePath+".binlexr");
|
||||||
}
|
}
|
||||||
|
|
||||||
LexicalReorderingTableTree::~LexicalReorderingTableTree(){
|
LexicalReorderingTableTree::~LexicalReorderingTableTree(){
|
||||||
@ -240,7 +241,7 @@ Score LexicalReorderingTableTree::GetScore(const Phrase& f, const Phrase& e, con
|
|||||||
//not in cache go to file...
|
//not in cache go to file...
|
||||||
Score score;
|
Score score;
|
||||||
Candidates cands;
|
Candidates cands;
|
||||||
m_Table.GetCandidates(MakeTableKey(f,e), &cands);
|
m_Table->GetCandidates(MakeTableKey(f,e), &cands);
|
||||||
if(cands.empty()){
|
if(cands.empty()){
|
||||||
return Score();
|
return Score();
|
||||||
}
|
}
|
||||||
@ -271,7 +272,7 @@ Score LexicalReorderingTableTree::auxFindScoreForContext(const Candidates& cands
|
|||||||
*/
|
*/
|
||||||
cvec.push_back(context.GetWord(i).GetString(m_FactorsC, false));
|
cvec.push_back(context.GetWord(i).GetString(m_FactorsC, false));
|
||||||
}
|
}
|
||||||
IPhrase c = m_Table.ConvertPhrase(cvec,TargetVocId);
|
IPhrase c = m_Table->ConvertPhrase(cvec,TargetVocId);
|
||||||
IPhrase sub_c;
|
IPhrase sub_c;
|
||||||
IPhrase::iterator start = c.begin();
|
IPhrase::iterator start = c.begin();
|
||||||
for(size_t j = 0; j <= context.GetSize(); ++j, ++start){
|
for(size_t j = 0; j <= context.GetSize(); ++j, ++start){
|
||||||
@ -302,6 +303,11 @@ void LexicalReorderingTableTree::InitializeForInput(const InputType& input){
|
|||||||
// Cache(*s); ... this just takes up too much memory, we cache elsewhere
|
// Cache(*s); ... this just takes up too much memory, we cache elsewhere
|
||||||
DisableCache();
|
DisableCache();
|
||||||
}
|
}
|
||||||
|
if (!m_Table.get()) {
|
||||||
|
//load thread specific table.
|
||||||
|
m_Table.reset(new PrefixTreeMap());
|
||||||
|
m_Table->Read(m_FilePath+".binlexr");
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
bool LexicalReorderingTableTree::Create(std::istream& inFile,
|
bool LexicalReorderingTableTree::Create(std::istream& inFile,
|
||||||
@ -515,7 +521,7 @@ IPhrase LexicalReorderingTableTree::MakeTableKey(const Phrase& f,
|
|||||||
*/
|
*/
|
||||||
keyPart.push_back(f.GetWord(i).GetString(m_FactorsF, false));
|
keyPart.push_back(f.GetWord(i).GetString(m_FactorsF, false));
|
||||||
}
|
}
|
||||||
auxAppend(key, m_Table.ConvertPhrase(keyPart, SourceVocId));
|
auxAppend(key, m_Table->ConvertPhrase(keyPart, SourceVocId));
|
||||||
keyPart.clear();
|
keyPart.clear();
|
||||||
}
|
}
|
||||||
if(!m_FactorsE.empty()){
|
if(!m_FactorsE.empty()){
|
||||||
@ -529,7 +535,7 @@ IPhrase LexicalReorderingTableTree::MakeTableKey(const Phrase& f,
|
|||||||
*/
|
*/
|
||||||
keyPart.push_back(e.GetWord(i).GetString(m_FactorsE, false));
|
keyPart.push_back(e.GetWord(i).GetString(m_FactorsE, false));
|
||||||
}
|
}
|
||||||
auxAppend(key, m_Table.ConvertPhrase(keyPart,TargetVocId));
|
auxAppend(key, m_Table->ConvertPhrase(keyPart,TargetVocId));
|
||||||
//keyPart.clear();
|
//keyPart.clear();
|
||||||
}
|
}
|
||||||
return key;
|
return key;
|
||||||
@ -547,20 +553,20 @@ void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f){
|
|||||||
if(m_FactorsE.empty()){
|
if(m_FactorsE.empty()){
|
||||||
//f is all of key...
|
//f is all of key...
|
||||||
Candidates cands;
|
Candidates cands;
|
||||||
m_Table.GetCandidates(MakeTableKey(f,Phrase(Output)),&cands);
|
m_Table->GetCandidates(MakeTableKey(f,Phrase(Output)),&cands);
|
||||||
m_Cache[MakeCacheKey(f,Phrase(Output))] = cands;
|
m_Cache[MakeCacheKey(f,Phrase(Output))] = cands;
|
||||||
} else {
|
} else {
|
||||||
ObjectPool<PPimp> pool;
|
ObjectPool<PPimp> pool;
|
||||||
PPimp* pPos = m_Table.GetRoot();
|
PPimp* pPos = m_Table->GetRoot();
|
||||||
//1) goto subtree for f
|
//1) goto subtree for f
|
||||||
for(int i = 0; i < f.GetSize() && 0 != pPos && pPos->isValid(); ++i){
|
for(int i = 0; i < f.GetSize() && 0 != pPos && pPos->isValid(); ++i){
|
||||||
/* old code
|
/* old code
|
||||||
pPos = m_Table.Extend(pPos, auxClearString(f.GetWord(i).ToString(m_FactorsF)), SourceVocId);
|
pPos = m_Table.Extend(pPos, auxClearString(f.GetWord(i).ToString(m_FactorsF)), SourceVocId);
|
||||||
*/
|
*/
|
||||||
pPos = m_Table.Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId);
|
pPos = m_Table->Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId);
|
||||||
}
|
}
|
||||||
if(0 != pPos && pPos->isValid()){
|
if(0 != pPos && pPos->isValid()){
|
||||||
pPos = m_Table.Extend(pPos, PrefixTreeMap::MagicWord);
|
pPos = m_Table->Extend(pPos, PrefixTreeMap::MagicWord);
|
||||||
}
|
}
|
||||||
if(0 == pPos || !pPos->isValid()){
|
if(0 == pPos || !pPos->isValid()){
|
||||||
return;
|
return;
|
||||||
@ -574,9 +580,9 @@ void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f){
|
|||||||
while(!stack.empty()){
|
while(!stack.empty()){
|
||||||
if(stack.back().pos->isValid()){
|
if(stack.back().pos->isValid()){
|
||||||
LabelId w = stack.back().pos->ptr()->getKey(stack.back().pos->idx);
|
LabelId w = stack.back().pos->ptr()->getKey(stack.back().pos->idx);
|
||||||
std::string next_path = stack.back().path + " " + m_Table.ConvertWord(w,TargetVocId);
|
std::string next_path = stack.back().path + " " + m_Table->ConvertWord(w,TargetVocId);
|
||||||
//cache this
|
//cache this
|
||||||
m_Table.GetCandidates(*stack.back().pos,&cands);
|
m_Table->GetCandidates(*stack.back().pos,&cands);
|
||||||
if(!cands.empty()){
|
if(!cands.empty()){
|
||||||
m_Cache[cache_key + auxClearString(next_path)] = cands;
|
m_Cache[cache_key + auxClearString(next_path)] = cands;
|
||||||
}
|
}
|
||||||
|
@ -6,6 +6,11 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
|
#ifdef WITH_THREADS
|
||||||
|
#include <boost/thread/tss.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
//moses dependencies:
|
//moses dependencies:
|
||||||
#include "TypeDef.h"
|
#include "TypeDef.h"
|
||||||
#include "Phrase.h"
|
#include "Phrase.h"
|
||||||
@ -106,7 +111,9 @@ class LexicalReorderingTableTree : public LexicalReorderingTable {
|
|||||||
m_UseCache = false;
|
m_UseCache = false;
|
||||||
};
|
};
|
||||||
void ClearCache(){
|
void ClearCache(){
|
||||||
m_Cache.clear();
|
if (m_UseCache) {
|
||||||
|
m_Cache.clear();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
virtual std::vector<float> GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
|
virtual std::vector<float> GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
|
||||||
@ -130,12 +137,17 @@ class LexicalReorderingTableTree : public LexicalReorderingTable {
|
|||||||
private:
|
private:
|
||||||
//typedef LexicalReorderingCand CandType;
|
//typedef LexicalReorderingCand CandType;
|
||||||
typedef std::map< std::string, Candidates > CacheType;
|
typedef std::map< std::string, Candidates > CacheType;
|
||||||
typedef PrefixTreeMap TableType;
|
#ifdef WITH_THREADS
|
||||||
|
typedef boost::thread_specific_ptr<PrefixTreeMap> TableType;
|
||||||
|
#else
|
||||||
|
typedef std::auto_ptr<PrefixTreeMap> TableType;
|
||||||
|
#endif
|
||||||
|
|
||||||
static const int SourceVocId = 0;
|
static const int SourceVocId = 0;
|
||||||
static const int TargetVocId = 1;
|
static const int TargetVocId = 1;
|
||||||
|
|
||||||
bool m_UseCache;
|
bool m_UseCache;
|
||||||
|
std::string m_FilePath;
|
||||||
CacheType m_Cache;
|
CacheType m_Cache;
|
||||||
TableType m_Table;
|
TableType m_Table;
|
||||||
};
|
};
|
||||||
|
@ -1,9 +1,6 @@
|
|||||||
lib_LIBRARIES = libmoses.a
|
lib_LIBRARIES = libmoses.a
|
||||||
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES
|
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS)
|
||||||
libmoses_a_SOURCES = \
|
libmoses_a_SOURCES = \
|
||||||
AlignmentElement.cpp \
|
|
||||||
AlignmentPhrase.cpp \
|
|
||||||
AlignmentPair.cpp \
|
|
||||||
BitmapContainer.cpp \
|
BitmapContainer.cpp \
|
||||||
ConfusionNet.cpp \
|
ConfusionNet.cpp \
|
||||||
DecodeGraph.cpp \
|
DecodeGraph.cpp \
|
||||||
|
@ -52,7 +52,7 @@ namespace Moses
|
|||||||
Manager::Manager(InputType const& source, SearchAlgorithm searchAlgorithm)
|
Manager::Manager(InputType const& source, SearchAlgorithm searchAlgorithm)
|
||||||
:m_source(source)
|
:m_source(source)
|
||||||
,m_transOptColl(source.CreateTranslationOptionCollection())
|
,m_transOptColl(source.CreateTranslationOptionCollection())
|
||||||
,m_search(Search::CreateSearch(source, searchAlgorithm, *m_transOptColl))
|
,m_search(Search::CreateSearch(*this, source, searchAlgorithm, *m_transOptColl))
|
||||||
,m_start(clock())
|
,m_start(clock())
|
||||||
,interrupted_flag(0)
|
,interrupted_flag(0)
|
||||||
{
|
{
|
||||||
@ -82,23 +82,24 @@ void Manager::ProcessSentence()
|
|||||||
{
|
{
|
||||||
// reset statistics
|
// reset statistics
|
||||||
const StaticData &staticData = StaticData::Instance();
|
const StaticData &staticData = StaticData::Instance();
|
||||||
staticData.ResetSentenceStats(m_source);
|
ResetSentenceStats(m_source);
|
||||||
|
|
||||||
// collect translation options for this sentence
|
// collect translation options for this sentence
|
||||||
const vector <DecodeGraph*>
|
vector <DecodeGraph*>
|
||||||
&decodeStepVL = staticData.GetDecodeStepVL();
|
decodeStepVL = staticData.GetDecodeStepVL(m_source);
|
||||||
m_transOptColl->CreateTranslationOptions(decodeStepVL);
|
m_transOptColl->CreateTranslationOptions(decodeStepVL);
|
||||||
|
|
||||||
// some reporting on how long this took
|
// some reporting on how long this took
|
||||||
clock_t gotOptions = clock();
|
clock_t gotOptions = clock();
|
||||||
float et = (gotOptions - m_start);
|
float et = (gotOptions - m_start);
|
||||||
IFVERBOSE(2) { staticData.GetSentenceStats().AddTimeCollectOpts( gotOptions - m_start ); }
|
IFVERBOSE(2) { GetSentenceStats().AddTimeCollectOpts( gotOptions - m_start ); }
|
||||||
et /= (float)CLOCKS_PER_SEC;
|
et /= (float)CLOCKS_PER_SEC;
|
||||||
VERBOSE(1, "Collecting options took " << et << " seconds" << endl);
|
VERBOSE(1, "Collecting options took " << et << " seconds" << endl);
|
||||||
|
|
||||||
// search for best translation with the specified algorithm
|
// search for best translation with the specified algorithm
|
||||||
m_search->ProcessSentence();
|
m_search->ProcessSentence();
|
||||||
VERBOSE(1, "Search took " << ((clock()-m_start)/(float)CLOCKS_PER_SEC) << " seconds" << endl);
|
VERBOSE(1, "Search took " << ((clock()-m_start)/(float)CLOCKS_PER_SEC) << " seconds" << endl);
|
||||||
|
RemoveAllInColl(decodeStepVL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -177,13 +178,13 @@ void Manager::CalcDecoderStatistics() const
|
|||||||
const Hypothesis *hypo = GetBestHypothesis();
|
const Hypothesis *hypo = GetBestHypothesis();
|
||||||
if (hypo != NULL)
|
if (hypo != NULL)
|
||||||
{
|
{
|
||||||
StaticData::Instance().GetSentenceStats().CalcFinalStats(*hypo);
|
GetSentenceStats().CalcFinalStats(*hypo);
|
||||||
IFVERBOSE(2) {
|
IFVERBOSE(2) {
|
||||||
if (hypo != NULL) {
|
if (hypo != NULL) {
|
||||||
string buff;
|
string buff;
|
||||||
string buff2;
|
string buff2;
|
||||||
TRACE_ERR( "Source and Target Units:"
|
TRACE_ERR( "Source and Target Units:"
|
||||||
<< *StaticData::Instance().GetInput());
|
<< hypo->GetInput());
|
||||||
buff2.insert(0,"] ");
|
buff2.insert(0,"] ");
|
||||||
buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString());
|
buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString());
|
||||||
buff2.insert(0,":");
|
buff2.insert(0,":");
|
||||||
@ -221,11 +222,11 @@ void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo
|
|||||||
<< "\ta=";
|
<< "\ta=";
|
||||||
|
|
||||||
// phrase table scores
|
// phrase table scores
|
||||||
const std::vector<PhraseDictionary*> &phraseTables = staticData.GetPhraseDictionaries();
|
const std::vector<PhraseDictionaryFeature*> &phraseTables = staticData.GetPhraseDictionaries();
|
||||||
std::vector<PhraseDictionary*>::const_iterator iterPhraseTable;
|
std::vector<PhraseDictionaryFeature*>::const_iterator iterPhraseTable;
|
||||||
for (iterPhraseTable = phraseTables.begin() ; iterPhraseTable != phraseTables.end() ; ++iterPhraseTable)
|
for (iterPhraseTable = phraseTables.begin() ; iterPhraseTable != phraseTables.end() ; ++iterPhraseTable)
|
||||||
{
|
{
|
||||||
const PhraseDictionary *phraseTable = *iterPhraseTable;
|
const PhraseDictionaryFeature *phraseTable = *iterPhraseTable;
|
||||||
vector<float> scores = hypo->GetScoreBreakdown().GetScoresForProducer(phraseTable);
|
vector<float> scores = hypo->GetScoreBreakdown().GetScoresForProducer(phraseTable);
|
||||||
|
|
||||||
outputWordGraphStream << scores[0];
|
outputWordGraphStream << scores[0];
|
||||||
|
@ -104,11 +104,22 @@ public:
|
|||||||
void SerializeSearchGraphPB(long translationId, std::ostream& outputStream) const;
|
void SerializeSearchGraphPB(long translationId, std::ostream& outputStream) const;
|
||||||
#endif
|
#endif
|
||||||
void GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const;
|
void GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const;
|
||||||
|
const InputType& GetSource() const {return m_source;}
|
||||||
|
|
||||||
/***
|
/***
|
||||||
* to be called after processing a sentence (which may consist of more than just calling ProcessSentence() )
|
* to be called after processing a sentence (which may consist of more than just calling ProcessSentence() )
|
||||||
*/
|
*/
|
||||||
void CalcDecoderStatistics() const;
|
void CalcDecoderStatistics() const;
|
||||||
|
void ResetSentenceStats(const InputType& source)
|
||||||
|
{
|
||||||
|
m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
|
||||||
|
}
|
||||||
|
SentenceStats& GetSentenceStats() const
|
||||||
|
{
|
||||||
|
return *m_sentenceStats;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::auto_ptr<SentenceStats> m_sentenceStats;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -179,16 +179,16 @@ public:
|
|||||||
|
|
||||||
StringTgtCand::first_type const& factorStrings=cands[i].first;
|
StringTgtCand::first_type const& factorStrings=cands[i].first;
|
||||||
StringTgtCand::second_type const& probVector=cands[i].second;
|
StringTgtCand::second_type const& probVector=cands[i].second;
|
||||||
StringWordAlignmentCand::second_type const& swaVector=swacands[i].second;
|
//StringWordAlignmentCand::second_type const& swaVector=swacands[i].second;
|
||||||
StringWordAlignmentCand::second_type const& twaVector=twacands[i].second;
|
//StringWordAlignmentCand::second_type const& twaVector=twacands[i].second;
|
||||||
|
|
||||||
std::vector<float> scoreVector(probVector.size());
|
std::vector<float> scoreVector(probVector.size());
|
||||||
std::transform(probVector.begin(),probVector.end(),scoreVector.begin(),
|
std::transform(probVector.begin(),probVector.end(),scoreVector.begin(),
|
||||||
TransformScore);
|
TransformScore);
|
||||||
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),
|
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),
|
||||||
FloorScore);
|
FloorScore);
|
||||||
// CreateTargetPhrase(targetPhrase,factorStrings,scoreVector,&src);
|
CreateTargetPhrase(targetPhrase,factorStrings,scoreVector,&src);
|
||||||
CreateTargetPhrase(targetPhrase,factorStrings,scoreVector,swaVector,twaVector,&src);
|
//CreateTargetPhrase(targetPhrase,factorStrings,scoreVector,swaVector,twaVector,&src);
|
||||||
costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
|
costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
|
||||||
tCands.push_back(targetPhrase);
|
tCands.push_back(targetPhrase);
|
||||||
}
|
}
|
||||||
@ -293,35 +293,14 @@ public:
|
|||||||
for(size_t l=0;l<m_output.size();++l)
|
for(size_t l=0;l<m_output.size();++l)
|
||||||
w[m_output[l]]= factorCollection.AddFactor(Output, m_output[l], factors[l]);
|
w[m_output[l]]= factorCollection.AddFactor(Output, m_output[l], factors[l]);
|
||||||
}
|
}
|
||||||
targetPhrase.SetScore(m_obj, scoreVector, m_weights, m_weightWP, *m_languageModels);
|
targetPhrase.SetScore(m_obj->GetFeature(), scoreVector, m_weights, m_weightWP, *m_languageModels);
|
||||||
targetPhrase.SetSourcePhrase(srcPtr);
|
targetPhrase.SetSourcePhrase(srcPtr);
|
||||||
|
|
||||||
// targetPhrase.CreateAlignmentInfo("???", "???", 44);
|
// targetPhrase.CreateAlignmentInfo("???", "???", 44);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void CreateTargetPhrase(TargetPhrase& targetPhrase,
|
|
||||||
StringTgtCand::first_type const& factorStrings,
|
|
||||||
StringTgtCand::second_type const& scoreVector,
|
|
||||||
StringWordAlignmentCand::second_type const& swaVector,
|
|
||||||
StringWordAlignmentCand::second_type const& twaVector,
|
|
||||||
Phrase const* srcPtr=0) const
|
|
||||||
{
|
|
||||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
|
||||||
|
|
||||||
for(size_t k=0;k<factorStrings.size();++k)
|
|
||||||
{
|
|
||||||
std::vector<std::string> factors=TokenizeMultiCharSeparator(*factorStrings[k],StaticData::Instance().GetFactorDelimiter());
|
|
||||||
Word& w=targetPhrase.AddWord();
|
|
||||||
for(size_t l=0;l<m_output.size();++l)
|
|
||||||
w[m_output[l]]= factorCollection.AddFactor(Output, m_output[l], factors[l]);
|
|
||||||
}
|
|
||||||
targetPhrase.SetScore(m_obj, scoreVector, m_weights, m_weightWP, *m_languageModels);
|
|
||||||
targetPhrase.SetSourcePhrase(srcPtr);
|
|
||||||
|
|
||||||
targetPhrase.CreateAlignmentInfo(swaVector, twaVector);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
TargetPhraseCollection* PruneTargetCandidates(std::vector<TargetPhrase> const & tCands,
|
TargetPhraseCollection* PruneTargetCandidates(std::vector<TargetPhrase> const & tCands,
|
||||||
std::vector<std::pair<float,size_t> >& costs) const
|
std::vector<std::pair<float,size_t> >& costs) const
|
||||||
|
@ -21,41 +21,125 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
***********************************************************************/
|
***********************************************************************/
|
||||||
|
|
||||||
#include "PhraseDictionary.h"
|
#include "PhraseDictionary.h"
|
||||||
|
#include "PhraseDictionaryTreeAdaptor.h"
|
||||||
#include "StaticData.h"
|
#include "StaticData.h"
|
||||||
#include "InputType.h"
|
#include "InputType.h"
|
||||||
#include "TranslationOption.h"
|
#include "TranslationOption.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace Moses {
|
||||||
{
|
|
||||||
PhraseDictionary::PhraseDictionary(size_t numScoreComponent)
|
|
||||||
: Dictionary(numScoreComponent),m_tableLimit(0)
|
|
||||||
{
|
|
||||||
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
|
|
||||||
}
|
|
||||||
|
|
||||||
PhraseDictionary::~PhraseDictionary() {}
|
|
||||||
|
|
||||||
const TargetPhraseCollection *PhraseDictionary::
|
const TargetPhraseCollection *PhraseDictionary::
|
||||||
GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const
|
GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const
|
||||||
{
|
{
|
||||||
return GetTargetPhraseCollection(src.GetSubString(range));
|
return GetTargetPhraseCollection(src.GetSubString(range));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string PhraseDictionary::GetScoreProducerDescription() const
|
PhraseDictionaryFeature::PhraseDictionaryFeature
|
||||||
|
( size_t numScoreComponent
|
||||||
|
, unsigned numInputScores
|
||||||
|
, const std::vector<FactorType> &input
|
||||||
|
, const std::vector<FactorType> &output
|
||||||
|
, const std::string &filePath
|
||||||
|
, const std::vector<float> &weight
|
||||||
|
, size_t tableLimit):
|
||||||
|
m_numScoreComponent(numScoreComponent),
|
||||||
|
m_numInputScores(numInputScores),
|
||||||
|
m_input(input),
|
||||||
|
m_output(output),
|
||||||
|
m_filePath(filePath),
|
||||||
|
m_weight(weight),
|
||||||
|
m_tableLimit(tableLimit)
|
||||||
|
{
|
||||||
|
const StaticData& staticData = StaticData::Instance();
|
||||||
|
const_cast<ScoreIndexManager&>(staticData.GetScoreIndexManager()).AddScoreProducer(this);
|
||||||
|
|
||||||
|
|
||||||
|
//if we're using an in-memory phrase table, then load it now, otherwise wait
|
||||||
|
if (!FileExists(filePath+".binphr.idx"))
|
||||||
|
{ // memory phrase table
|
||||||
|
VERBOSE(2,"using standard phrase tables" << endl);
|
||||||
|
if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) {
|
||||||
|
m_filePath += ".gz";
|
||||||
|
VERBOSE(2,"Using gzipped file" << endl);
|
||||||
|
}
|
||||||
|
if (staticData.GetInputType() != SentenceInput)
|
||||||
|
{
|
||||||
|
UserMessage::Add("Must use binary phrase table for this input type");
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
PhraseDictionaryMemory* pdm = new PhraseDictionaryMemory(m_numScoreComponent,this);
|
||||||
|
assert(pdm->Load(m_input
|
||||||
|
, m_output
|
||||||
|
, m_filePath
|
||||||
|
, m_weight
|
||||||
|
, m_tableLimit
|
||||||
|
, staticData.GetAllLM()
|
||||||
|
, staticData.GetWeightWordPenalty()));
|
||||||
|
m_memoryDictionary.reset(pdm);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//don't initialise the tree dictionary until it's required
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
PhraseDictionary* PhraseDictionaryFeature::GetDictionary
|
||||||
|
(const InputType& source) {
|
||||||
|
PhraseDictionary* dict = NULL;
|
||||||
|
if (m_memoryDictionary.get()) {
|
||||||
|
dict = m_memoryDictionary.get();
|
||||||
|
} else {
|
||||||
|
if (!m_treeDictionary.get()) {
|
||||||
|
//load the tree dictionary for this thread
|
||||||
|
const StaticData& staticData = StaticData::Instance();
|
||||||
|
PhraseDictionaryTreeAdaptor* pdta = new PhraseDictionaryTreeAdaptor(m_numScoreComponent, m_numInputScores,this);
|
||||||
|
assert(pdta->Load(
|
||||||
|
m_input
|
||||||
|
, m_output
|
||||||
|
, m_filePath
|
||||||
|
, m_weight
|
||||||
|
, m_tableLimit
|
||||||
|
, staticData.GetAllLM()
|
||||||
|
, staticData.GetWeightWordPenalty()));
|
||||||
|
m_treeDictionary.reset(pdta);
|
||||||
|
}
|
||||||
|
dict = m_treeDictionary.get();
|
||||||
|
}
|
||||||
|
dict->InitializeForInput(source);
|
||||||
|
return dict;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
PhraseDictionaryFeature::~PhraseDictionaryFeature() {}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
std::string PhraseDictionaryFeature::GetScoreProducerDescription() const
|
||||||
{
|
{
|
||||||
return "PhraseModel";
|
return "PhraseModel";
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t PhraseDictionary::GetNumScoreComponents() const
|
size_t PhraseDictionaryFeature::GetNumScoreComponents() const
|
||||||
{
|
{
|
||||||
return m_numScoreComponent;
|
return m_numScoreComponent;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t PhraseDictionary::GetNumInputScores() const { return 0;}
|
size_t PhraseDictionaryFeature::GetNumInputScores() const
|
||||||
|
{
|
||||||
|
return m_numInputScores;
|
||||||
|
}
|
||||||
|
|
||||||
bool PhraseDictionary::ComputeValueInTranslationOption() const {
|
bool PhraseDictionaryFeature::ComputeValueInTranslationOption() const {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const PhraseDictionaryFeature* PhraseDictionary::GetFeature() const {
|
||||||
|
return m_feature;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -26,6 +26,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
#include <list>
|
#include <list>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#ifdef WITH_THREADS
|
||||||
|
#include <boost/thread/tss.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "Phrase.h"
|
#include "Phrase.h"
|
||||||
#include "TargetPhrase.h"
|
#include "TargetPhrase.h"
|
||||||
#include "Dictionary.h"
|
#include "Dictionary.h"
|
||||||
@ -39,24 +44,58 @@ class StaticData;
|
|||||||
class InputType;
|
class InputType;
|
||||||
class WordsRange;
|
class WordsRange;
|
||||||
|
|
||||||
/** abstract base class for phrase table classes
|
class PhraseDictionaryFeature;
|
||||||
*/
|
/**
|
||||||
class PhraseDictionary : public Dictionary, public StatelessFeatureFunction
|
* Abstract base class for phrase dictionaries (tables).
|
||||||
|
**/
|
||||||
|
class PhraseDictionary: public Dictionary {
|
||||||
|
public:
|
||||||
|
PhraseDictionary(size_t numScoreComponent, const PhraseDictionaryFeature* feature):
|
||||||
|
Dictionary(numScoreComponent), m_tableLimit(0), m_feature(feature) {}
|
||||||
|
//! table limit number.
|
||||||
|
size_t GetTableLimit() const { return m_tableLimit; }
|
||||||
|
DecodeType GetDecodeType() const { return Translate; }
|
||||||
|
const PhraseDictionaryFeature* GetFeature() const;
|
||||||
|
/** set/change translation weights and recalc weighted score for each translation.
|
||||||
|
* TODO This may be redundant now we use ScoreCollection
|
||||||
|
*/
|
||||||
|
virtual void SetWeightTransModel(const std::vector<float> &weightT)=0;
|
||||||
|
|
||||||
|
//! find list of translations that can translates src. Only for phrase input
|
||||||
|
virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const=0;
|
||||||
|
//! find list of translations that can translates a portion of src. Used by confusion network decoding
|
||||||
|
virtual const TargetPhraseCollection *GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const;
|
||||||
|
//! Create entry for translation of source to targetPhrase
|
||||||
|
virtual void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)=0;
|
||||||
|
virtual void InitializeForInput(InputType const& source) = 0;
|
||||||
|
|
||||||
|
|
||||||
|
protected:
|
||||||
|
size_t m_tableLimit;
|
||||||
|
const PhraseDictionaryFeature* m_feature;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a feature derived from a phrase table.
|
||||||
|
*/
|
||||||
|
class PhraseDictionaryFeature : public StatelessFeatureFunction
|
||||||
{
|
{
|
||||||
protected:
|
|
||||||
size_t m_tableLimit;
|
|
||||||
std::string m_filePath; // just for debugging purposes
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
PhraseDictionary(size_t numScoreComponent);
|
PhraseDictionaryFeature( size_t numScoreComponent
|
||||||
virtual ~PhraseDictionary();
|
, unsigned numInputScores
|
||||||
|
, const std::vector<FactorType> &input
|
||||||
|
, const std::vector<FactorType> &output
|
||||||
|
, const std::string &filePath
|
||||||
|
, const std::vector<float> &weight
|
||||||
|
, size_t tableLimit);
|
||||||
|
|
||||||
|
virtual ~PhraseDictionaryFeature();
|
||||||
|
|
||||||
DecodeType GetDecodeType() const { return Translate; }
|
virtual bool ComputeValueInTranslationOption() const;
|
||||||
//! table limit number.
|
|
||||||
size_t GetTableLimit() const { return m_tableLimit; }
|
|
||||||
|
|
||||||
//! Overriden by load on demand phrase tables classes to load data for each input
|
|
||||||
virtual void InitializeForInput(InputType const &/*source*/) {}
|
|
||||||
std::string GetScoreProducerDescription() const;
|
std::string GetScoreProducerDescription() const;
|
||||||
std::string GetScoreProducerWeightShortName() const
|
std::string GetScoreProducerWeightShortName() const
|
||||||
{
|
{
|
||||||
@ -66,21 +105,26 @@ class PhraseDictionary : public Dictionary, public StatelessFeatureFunction
|
|||||||
|
|
||||||
size_t GetNumInputScores() const;
|
size_t GetNumInputScores() const;
|
||||||
|
|
||||||
virtual bool ComputeValueInTranslationOption() const;
|
PhraseDictionary* GetDictionary(const InputType& source);
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t m_numScoreComponent;
|
||||||
|
unsigned m_numInputScores;
|
||||||
|
std::vector<FactorType> m_input;
|
||||||
|
std::vector<FactorType> m_output;
|
||||||
|
std::string m_filePath;
|
||||||
|
std::vector<float> m_weight;
|
||||||
|
size_t m_tableLimit;
|
||||||
|
//Only instantiate one of these
|
||||||
|
std::auto_ptr<PhraseDictionary> m_memoryDictionary;
|
||||||
|
#ifdef WITH_THREADS
|
||||||
|
boost::thread_specific_ptr<PhraseDictionary> m_treeDictionary;
|
||||||
|
#else
|
||||||
|
std::auto_ptr<PhraseDictionary> m_treeDictionary;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** set/change translation weights and recalc weighted score for each translation.
|
|
||||||
* TODO This may be redundant now we use ScoreCollection
|
|
||||||
*/
|
|
||||||
virtual void SetWeightTransModel(const std::vector<float> &weightT)=0;
|
|
||||||
|
|
||||||
//! find list of translations that can translates src. Only for phrase input
|
|
||||||
virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const=0;
|
|
||||||
//! find list of translations that can translates a portion of src. Used by confusion network decoding
|
|
||||||
virtual const TargetPhraseCollection *GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const;
|
|
||||||
//! Create entry for translation of source to targetPhrase
|
|
||||||
virtual void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)=0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -33,7 +33,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
#include "StaticData.h"
|
#include "StaticData.h"
|
||||||
#include "WordsRange.h"
|
#include "WordsRange.h"
|
||||||
#include "UserMessage.h"
|
#include "UserMessage.h"
|
||||||
#include "AlignmentPair.h"
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
@ -50,7 +49,6 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
|
|||||||
const StaticData &staticData = StaticData::Instance();
|
const StaticData &staticData = StaticData::Instance();
|
||||||
|
|
||||||
m_tableLimit = tableLimit;
|
m_tableLimit = tableLimit;
|
||||||
m_filePath = filePath;
|
|
||||||
|
|
||||||
//factors
|
//factors
|
||||||
m_inputFactors = FactorMask(input);
|
m_inputFactors = FactorMask(input);
|
||||||
@ -132,22 +130,13 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
|
|||||||
targetPhrase.SetSourcePhrase(&sourcePhrase);
|
targetPhrase.SetSourcePhrase(&sourcePhrase);
|
||||||
targetPhrase.CreateFromString( output, targetPhraseString, factorDelimiter);
|
targetPhrase.CreateFromString( output, targetPhraseString, factorDelimiter);
|
||||||
|
|
||||||
// load alignment info only when present and relevant
|
|
||||||
if (staticData.UseAlignmentInfo()){
|
|
||||||
if (numElement==3){
|
|
||||||
stringstream strme;
|
|
||||||
strme << "You are using AlignmentInfo, but this info not available in the Phrase Table. Only " <<numElement<<" fields on line " << line_num;
|
|
||||||
UserMessage::Add(strme.str());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
targetPhrase.CreateAlignmentInfo(sourceAlignString, targetAlignString);
|
|
||||||
}
|
|
||||||
|
|
||||||
// component score, for n-best output
|
// component score, for n-best output
|
||||||
std::vector<float> scv(scoreVector.size());
|
std::vector<float> scv(scoreVector.size());
|
||||||
std::transform(scoreVector.begin(),scoreVector.end(),scv.begin(),TransformScore);
|
std::transform(scoreVector.begin(),scoreVector.end(),scv.begin(),TransformScore);
|
||||||
std::transform(scv.begin(),scv.end(),scv.begin(),FloorScore);
|
std::transform(scv.begin(),scv.end(),scv.begin(),FloorScore);
|
||||||
targetPhrase.SetScore(this, scv, weight, weightWP, languageModels);
|
targetPhrase.SetScore(m_feature, scv, weight, weightWP, languageModels);
|
||||||
|
|
||||||
AddEquivPhrase(sourcePhrase, targetPhrase);
|
AddEquivPhrase(sourcePhrase, targetPhrase);
|
||||||
|
|
||||||
|
@ -42,10 +42,8 @@ protected:
|
|||||||
TargetPhraseCollection *CreateTargetPhraseCollection(const Phrase &source);
|
TargetPhraseCollection *CreateTargetPhraseCollection(const Phrase &source);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
PhraseDictionaryMemory(size_t numScoreComponent)
|
PhraseDictionaryMemory(size_t numScoreComponent, PhraseDictionaryFeature* feature)
|
||||||
: MyBase(numScoreComponent)
|
: PhraseDictionary(numScoreComponent,feature) {}
|
||||||
{
|
|
||||||
}
|
|
||||||
virtual ~PhraseDictionaryMemory();
|
virtual ~PhraseDictionaryMemory();
|
||||||
|
|
||||||
bool Load(const std::vector<FactorType> &input
|
bool Load(const std::vector<FactorType> &input
|
||||||
@ -62,6 +60,8 @@ public:
|
|||||||
|
|
||||||
// for mert
|
// for mert
|
||||||
void SetWeightTransModel(const std::vector<float> &weightT);
|
void SetWeightTransModel(const std::vector<float> &weightT);
|
||||||
|
virtual void InitializeForInput(InputType const&)
|
||||||
|
{/* Don't do anything source specific here as this object is shared between threads.*/}
|
||||||
|
|
||||||
TO_STRING();
|
TO_STRING();
|
||||||
|
|
||||||
|
@ -64,8 +64,9 @@ const PhraseDictionaryNode *PhraseDictionaryNode::GetChild(const Word &word) con
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PhraseDictionaryNode::SetWeightTransModel(const PhraseDictionaryMemory *phraseDictionary
|
void PhraseDictionaryNode::SetWeightTransModel(
|
||||||
, const std::vector<float> &weightT)
|
const PhraseDictionaryMemory *phraseDictionary,
|
||||||
|
const std::vector<float> &weightT)
|
||||||
{
|
{
|
||||||
// recursively set weights
|
// recursively set weights
|
||||||
NodeMap::iterator iterNodeMap;
|
NodeMap::iterator iterNodeMap;
|
||||||
@ -84,7 +85,7 @@ void PhraseDictionaryNode::SetWeightTransModel(const PhraseDictionaryMemory *phr
|
|||||||
++iterTargetPhrase)
|
++iterTargetPhrase)
|
||||||
{
|
{
|
||||||
TargetPhrase &targetPhrase = **iterTargetPhrase;
|
TargetPhrase &targetPhrase = **iterTargetPhrase;
|
||||||
targetPhrase.SetWeights(phraseDictionary, weightT);
|
targetPhrase.SetWeights(phraseDictionary->GetFeature(), weightT);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -32,6 +32,7 @@ namespace Moses
|
|||||||
{
|
{
|
||||||
|
|
||||||
class PhraseDictionaryMemory;
|
class PhraseDictionaryMemory;
|
||||||
|
class PhraseDictionaryFeature;
|
||||||
|
|
||||||
/** One node of the PhraseDictionaryMemory structure
|
/** One node of the PhraseDictionaryMemory structure
|
||||||
*/
|
*/
|
||||||
|
@ -115,18 +115,36 @@ PhraseDictionaryTree::PrefixPtr::operator bool() const
|
|||||||
return imp && imp->isValid();
|
return imp && imp->isValid();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef LVoc<std::string> WordVoc;
|
||||||
|
|
||||||
|
static WordVoc* ReadVoc(const std::string& filename) {
|
||||||
|
static std::map<std::string,WordVoc*> vocs;
|
||||||
|
#ifdef HAVE_THREADS
|
||||||
|
boost::mutex mutex;
|
||||||
|
boost::mutex::scoped_lock lock(mutex);
|
||||||
|
#endif
|
||||||
|
std::map<std::string,WordVoc*>::iterator vi = vocs.find(filename);
|
||||||
|
if (vi == vocs.end()) {
|
||||||
|
WordVoc* voc = new WordVoc();
|
||||||
|
voc->Read(filename);
|
||||||
|
vocs[filename] = voc;
|
||||||
|
}
|
||||||
|
return vocs[filename];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
struct PDTimp {
|
struct PDTimp {
|
||||||
typedef PrefixTreeF<LabelId,OFF_T> PTF;
|
typedef PrefixTreeF<LabelId,OFF_T> PTF;
|
||||||
typedef FilePtr<PTF> CPT;
|
typedef FilePtr<PTF> CPT;
|
||||||
typedef std::vector<CPT> Data;
|
typedef std::vector<CPT> Data;
|
||||||
typedef LVoc<std::string> WordVoc;
|
|
||||||
|
|
||||||
Data data;
|
Data data;
|
||||||
std::vector<OFF_T> srcOffsets;
|
std::vector<OFF_T> srcOffsets;
|
||||||
|
|
||||||
FILE *os,*ot;
|
FILE *os,*ot;
|
||||||
WordVoc sv,tv;
|
WordVoc* sv;
|
||||||
|
WordVoc* tv;
|
||||||
|
|
||||||
ObjectPool<PPimp> pPool;
|
ObjectPool<PPimp> pPool;
|
||||||
// a comparison with the Boost MemPools might be useful
|
// a comparison with the Boost MemPools might be useful
|
||||||
@ -189,7 +207,7 @@ struct PDTimp {
|
|||||||
std::vector<std::string const*> vs;
|
std::vector<std::string const*> vs;
|
||||||
vs.reserve(iphrase.size());
|
vs.reserve(iphrase.size());
|
||||||
for(size_t j=0;j<iphrase.size();++j)
|
for(size_t j=0;j<iphrase.size();++j)
|
||||||
vs.push_back(&tv.symbol(iphrase[j]));
|
vs.push_back(&tv->symbol(iphrase[j]));
|
||||||
rv.push_back(StringTgtCand(vs,i->GetScores()));
|
rv.push_back(StringTgtCand(vs,i->GetScores()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -206,7 +224,7 @@ struct PDTimp {
|
|||||||
std::vector<std::string const*> vs;
|
std::vector<std::string const*> vs;
|
||||||
vs.reserve(iphrase.size());
|
vs.reserve(iphrase.size());
|
||||||
for(size_t j=0;j<iphrase.size();++j)
|
for(size_t j=0;j<iphrase.size();++j)
|
||||||
vs.push_back(&tv.symbol(iphrase[j]));
|
vs.push_back(&tv->symbol(iphrase[j]));
|
||||||
rv.push_back(StringTgtCand(vs,i->GetScores()));
|
rv.push_back(StringTgtCand(vs,i->GetScores()));
|
||||||
swa.push_back(StringWordAlignmentCand(vs,(i->GetSourceAlignment())));
|
swa.push_back(StringWordAlignmentCand(vs,(i->GetSourceAlignment())));
|
||||||
twa.push_back(StringWordAlignmentCand(vs,(i->GetTargetAlignment())));
|
twa.push_back(StringWordAlignmentCand(vs,(i->GetTargetAlignment())));
|
||||||
@ -223,7 +241,7 @@ struct PDTimp {
|
|||||||
assert(p);
|
assert(p);
|
||||||
if(w.empty() || w==EPSILON) return p;
|
if(w.empty() || w==EPSILON) return p;
|
||||||
|
|
||||||
LabelId wi=sv.index(w);
|
LabelId wi=sv->index(w);
|
||||||
|
|
||||||
if(wi==InvalidLabelId) return PPtr(); // unknown word
|
if(wi==InvalidLabelId) return PPtr(); // unknown word
|
||||||
else if(p.imp->isRoot())
|
else if(p.imp->isRoot())
|
||||||
@ -300,8 +318,10 @@ int PDTimp::Read(const std::string& fn)
|
|||||||
for(size_t i=0;i<data.size();++i)
|
for(size_t i=0;i<data.size();++i)
|
||||||
data[i]=CPT(os,srcOffsets[i]);
|
data[i]=CPT(os,srcOffsets[i]);
|
||||||
|
|
||||||
sv.Read(ifsv);
|
sv = ReadVoc(ifsv);
|
||||||
tv.Read(iftv);
|
tv = ReadVoc(iftv);
|
||||||
|
//sv.Read(ifsv);
|
||||||
|
//tv.Read(iftv);
|
||||||
|
|
||||||
TRACE_ERR("binary phrasefile loaded, default OFF_T: "<<PTF::getDefault()
|
TRACE_ERR("binary phrasefile loaded, default OFF_T: "<<PTF::getDefault()
|
||||||
<<"\n");
|
<<"\n");
|
||||||
@ -320,7 +340,7 @@ void PDTimp::PrintTgtCand(const TgtCands& tcand,std::ostream& out) const
|
|||||||
const IPhrase& iphr=tcand[i].GetPhrase();
|
const IPhrase& iphr=tcand[i].GetPhrase();
|
||||||
|
|
||||||
out << i << " -- " << sc << " -- ";
|
out << i << " -- " << sc << " -- ";
|
||||||
for(size_t j=0;j<iphr.size();++j) out << tv.symbol(iphr[j])<<" ";
|
for(size_t j=0;j<iphr.size();++j) out << tv->symbol(iphr[j])<<" ";
|
||||||
out<< " -- ";
|
out<< " -- ";
|
||||||
for (size_t j=0;j<srcAlign.size();j++) out << " " << srcAlign[j];
|
for (size_t j=0;j<srcAlign.size();j++) out << " " << srcAlign[j];
|
||||||
out << " -- ";
|
out << " -- ";
|
||||||
@ -370,7 +390,7 @@ GetTargetCandidates(const std::vector<std::string>& src,
|
|||||||
IPhrase f(src.size());
|
IPhrase f(src.size());
|
||||||
for(size_t i=0;i<src.size();++i)
|
for(size_t i=0;i<src.size();++i)
|
||||||
{
|
{
|
||||||
f[i]=imp->sv.index(src[i]);
|
f[i]=imp->sv->index(src[i]);
|
||||||
if(f[i]==InvalidLabelId) return;
|
if(f[i]==InvalidLabelId) return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -388,7 +408,7 @@ GetTargetCandidates(const std::vector<std::string>& src,
|
|||||||
IPhrase f(src.size());
|
IPhrase f(src.size());
|
||||||
for(size_t i=0;i<src.size();++i)
|
for(size_t i=0;i<src.size();++i)
|
||||||
{
|
{
|
||||||
f[i]=imp->sv.index(src[i]);
|
f[i]=imp->sv->index(src[i]);
|
||||||
if(f[i]==InvalidLabelId) return;
|
if(f[i]==InvalidLabelId) return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -405,7 +425,7 @@ PrintTargetCandidates(const std::vector<std::string>& src,
|
|||||||
IPhrase f(src.size());
|
IPhrase f(src.size());
|
||||||
for(size_t i=0;i<src.size();++i)
|
for(size_t i=0;i<src.size();++i)
|
||||||
{
|
{
|
||||||
f[i]=imp->sv.index(src[i]);
|
f[i]=imp->sv->index(src[i]);
|
||||||
if(f[i]==InvalidLabelId)
|
if(f[i]==InvalidLabelId)
|
||||||
{
|
{
|
||||||
TRACE_ERR("the source phrase '"<<src<<"' contains an unknown word '"
|
TRACE_ERR("the source phrase '"<<src<<"' contains an unknown word '"
|
||||||
@ -447,6 +467,8 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
|
|||||||
std::vector<OFF_T> vo;
|
std::vector<OFF_T> vo;
|
||||||
size_t lnc=0;
|
size_t lnc=0;
|
||||||
size_t numElement = NOT_FOUND; // 3=old format, 5=async format which include word alignment info
|
size_t numElement = NOT_FOUND; // 3=old format, 5=async format which include word alignment info
|
||||||
|
imp->sv = new WordVoc();
|
||||||
|
imp->tv = new WordVoc();
|
||||||
|
|
||||||
while(getline(inFile, line))
|
while(getline(inFile, line))
|
||||||
{
|
{
|
||||||
@ -490,30 +512,13 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
|
|||||||
|
|
||||||
std::vector<std::string> wordVec = Tokenize(sourcePhraseString);
|
std::vector<std::string> wordVec = Tokenize(sourcePhraseString);
|
||||||
for (size_t i = 0 ; i < wordVec.size() ; ++i)
|
for (size_t i = 0 ; i < wordVec.size() ; ++i)
|
||||||
f.push_back(imp->sv.add(wordVec[i]));
|
f.push_back(imp->sv->add(wordVec[i]));
|
||||||
|
|
||||||
wordVec = Tokenize(targetPhraseString);
|
wordVec = Tokenize(targetPhraseString);
|
||||||
for (size_t i = 0 ; i < wordVec.size() ; ++i)
|
for (size_t i = 0 ; i < wordVec.size() ; ++i)
|
||||||
e.push_back(imp->tv.add(wordVec[i]));
|
e.push_back(imp->tv->add(wordVec[i]));
|
||||||
|
|
||||||
|
|
||||||
if (!PrintWordAlignment()){// word-to-word alignment are not used, create empty word-to-word alignment
|
|
||||||
EmptyAlignment(sourceAlignString, f.size());
|
|
||||||
EmptyAlignment(targetAlignString, e.size());
|
|
||||||
}
|
|
||||||
else if (numElement==3){
|
|
||||||
stringstream strme;
|
|
||||||
strme << "You are asking for AlignmentInfo, but this info not available in the Phrase Table. Only " <<numElement<<" fields on line " << lnc << " : " << line;
|
|
||||||
|
|
||||||
strme << endl << "Deleting files " << ofn << " and " << oft << "..." << endl;
|
|
||||||
if( remove( ofn.c_str() ) != 0 ) strme << "Error deleting file " << ofn;
|
|
||||||
else strme << "File " << ofn << " successfully deleted";
|
|
||||||
strme << endl;
|
|
||||||
if( remove( oft.c_str() ) != 0 ) strme << "Error deleting file " << oft;
|
|
||||||
else strme << "File " << oft << " successfully deleted";
|
|
||||||
strme << endl;
|
|
||||||
UserMessage::Add(strme.str());
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
//change "()" into "(-1)" for both source and target word-to-word alignments
|
//change "()" into "(-1)" for both source and target word-to-word alignments
|
||||||
std::string emtpyAlignStr="()";
|
std::string emtpyAlignStr="()";
|
||||||
@ -648,8 +653,8 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
|
|||||||
fWriteVector(oi,vo);
|
fWriteVector(oi,vo);
|
||||||
fClose(oi);
|
fClose(oi);
|
||||||
|
|
||||||
imp->sv.Write(ofsv);
|
imp->sv->Write(ofsv);
|
||||||
imp->tv.Write(oftv);
|
imp->tv->Write(oftv);
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,11 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
|
#ifdef WITH_THREADS
|
||||||
|
#include <boost/thread/mutex.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "TypeDef.h"
|
#include "TypeDef.h"
|
||||||
#include "Dictionary.h"
|
#include "Dictionary.h"
|
||||||
|
|
||||||
|
@ -23,8 +23,9 @@ namespace Moses
|
|||||||
*************************************************************/
|
*************************************************************/
|
||||||
|
|
||||||
PhraseDictionaryTreeAdaptor::
|
PhraseDictionaryTreeAdaptor::
|
||||||
PhraseDictionaryTreeAdaptor(size_t numScoreComponent,unsigned numInputScores)
|
PhraseDictionaryTreeAdaptor(size_t numScoreComponent, unsigned numInputScores, const PhraseDictionaryFeature* feature)
|
||||||
: MyBase(numScoreComponent),imp(new PDTAimp(this,numInputScores)) {}
|
: PhraseDictionary(numScoreComponent,feature), imp(new PDTAimp(this,numInputScores)) {
|
||||||
|
}
|
||||||
|
|
||||||
PhraseDictionaryTreeAdaptor::~PhraseDictionaryTreeAdaptor()
|
PhraseDictionaryTreeAdaptor::~PhraseDictionaryTreeAdaptor()
|
||||||
{
|
{
|
||||||
@ -32,21 +33,6 @@ PhraseDictionaryTreeAdaptor::~PhraseDictionaryTreeAdaptor()
|
|||||||
delete imp;
|
delete imp;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PhraseDictionaryTreeAdaptor::CleanUp()
|
|
||||||
{
|
|
||||||
imp->CleanUp();
|
|
||||||
MyBase::CleanUp();
|
|
||||||
}
|
|
||||||
|
|
||||||
void PhraseDictionaryTreeAdaptor::InitializeForInput(InputType const& source)
|
|
||||||
{
|
|
||||||
// caching only required for confusion net
|
|
||||||
if(ConfusionNet const* cn=dynamic_cast<ConfusionNet const*>(&source))
|
|
||||||
imp->CacheSource(*cn);
|
|
||||||
//else if(Sentence const* s=dynamic_cast<Sentence const*>(&source))
|
|
||||||
// following removed by phi, not helpful
|
|
||||||
// imp->CacheSource(ConfusionNet(*s));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
|
bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
|
||||||
, const std::vector<FactorType> &output
|
, const std::vector<FactorType> &output
|
||||||
@ -54,8 +40,7 @@ bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
|
|||||||
, const std::vector<float> &weight
|
, const std::vector<float> &weight
|
||||||
, size_t tableLimit
|
, size_t tableLimit
|
||||||
, const LMList &languageModels
|
, const LMList &languageModels
|
||||||
, float weightWP
|
, float weightWP)
|
||||||
)
|
|
||||||
{
|
{
|
||||||
if(m_numScoreComponent!=weight.size()) {
|
if(m_numScoreComponent!=weight.size()) {
|
||||||
stringstream strme;
|
stringstream strme;
|
||||||
@ -64,7 +49,6 @@ bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
|
|||||||
UserMessage::Add(strme.str());
|
UserMessage::Add(strme.str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
m_filePath = filePath;
|
|
||||||
|
|
||||||
// set Dictionary members
|
// set Dictionary members
|
||||||
m_inputFactors = FactorMask(input);
|
m_inputFactors = FactorMask(input);
|
||||||
@ -79,6 +63,13 @@ bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PhraseDictionaryTreeAdaptor::InitializeForInput(InputType const& source) {
|
||||||
|
imp->CleanUp();
|
||||||
|
// caching only required for confusion net
|
||||||
|
if(ConfusionNet const* cn=dynamic_cast<ConfusionNet const*>(&source))
|
||||||
|
imp->CacheSource(*cn);
|
||||||
|
}
|
||||||
|
|
||||||
TargetPhraseCollection const*
|
TargetPhraseCollection const*
|
||||||
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollection(Phrase const &src) const
|
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollection(Phrase const &src) const
|
||||||
{
|
{
|
||||||
|
@ -27,7 +27,7 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary {
|
|||||||
void operator=(const PhraseDictionaryTreeAdaptor&);
|
void operator=(const PhraseDictionaryTreeAdaptor&);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
PhraseDictionaryTreeAdaptor(size_t numScoreComponent,unsigned numInputScores);
|
PhraseDictionaryTreeAdaptor(size_t numScoreComponent, unsigned numInputScores, const PhraseDictionaryFeature* feature);
|
||||||
virtual ~PhraseDictionaryTreeAdaptor();
|
virtual ~PhraseDictionaryTreeAdaptor();
|
||||||
|
|
||||||
// enable/disable caching
|
// enable/disable caching
|
||||||
@ -46,19 +46,14 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary {
|
|||||||
, const std::vector<float> &weight
|
, const std::vector<float> &weight
|
||||||
, size_t tableLimit
|
, size_t tableLimit
|
||||||
, const LMList &languageModels
|
, const LMList &languageModels
|
||||||
, float weightWP
|
, float weightWP);
|
||||||
|
|
||||||
);
|
|
||||||
|
|
||||||
// get translation candidates for a given source phrase
|
// get translation candidates for a given source phrase
|
||||||
// returns null pointer if nothing found
|
// returns null pointer if nothing found
|
||||||
TargetPhraseCollection const* GetTargetPhraseCollection(Phrase const &src) const;
|
TargetPhraseCollection const* GetTargetPhraseCollection(Phrase const &src) const;
|
||||||
TargetPhraseCollection const* GetTargetPhraseCollection(InputType const& src,WordsRange const & srcRange) const;
|
TargetPhraseCollection const* GetTargetPhraseCollection(InputType const& src,WordsRange const & srcRange) const;
|
||||||
|
|
||||||
// clean up temporary memory etc.
|
|
||||||
void CleanUp();
|
|
||||||
|
|
||||||
void InitializeForInput(InputType const& source);
|
|
||||||
|
|
||||||
// change model scaling factors
|
// change model scaling factors
|
||||||
void SetWeightTransModel(const std::vector<float> &weightT);
|
void SetWeightTransModel(const std::vector<float> &weightT);
|
||||||
@ -73,6 +68,7 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary {
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t GetNumInputScores() const;
|
size_t GetNumInputScores() const;
|
||||||
|
virtual void InitializeForInput(InputType const& source);
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -59,13 +59,28 @@ void PrefixTreeMap::FreeMemory() {
|
|||||||
for(Data::iterator i = m_Data.begin(); i != m_Data.end(); ++i){
|
for(Data::iterator i = m_Data.begin(); i != m_Data.end(); ++i){
|
||||||
i->free();
|
i->free();
|
||||||
}
|
}
|
||||||
for(size_t i = 0; i < m_Voc.size(); ++i){
|
/*for(size_t i = 0; i < m_Voc.size(); ++i){
|
||||||
delete m_Voc[i];
|
delete m_Voc[i];
|
||||||
m_Voc[i] = 0;
|
m_Voc[i] = 0;
|
||||||
}
|
}*/
|
||||||
m_PtrPool.reset();
|
m_PtrPool.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static WordVoc* ReadVoc(const std::string& filename) {
|
||||||
|
static std::map<std::string,WordVoc*> vocs;
|
||||||
|
#ifdef WITH_THREADS
|
||||||
|
boost::mutex mutex;
|
||||||
|
boost::mutex::scoped_lock lock(mutex);
|
||||||
|
#endif
|
||||||
|
std::map<std::string,WordVoc*>::iterator vi = vocs.find(filename);
|
||||||
|
if (vi == vocs.end()) {
|
||||||
|
WordVoc* voc = new WordVoc();
|
||||||
|
voc->Read(filename);
|
||||||
|
vocs[filename] = voc;
|
||||||
|
}
|
||||||
|
return vocs[filename];
|
||||||
|
}
|
||||||
|
|
||||||
int PrefixTreeMap::Read(const std::string& fileNameStem, int numVocs){
|
int PrefixTreeMap::Read(const std::string& fileNameStem, int numVocs){
|
||||||
std::string ifs(fileNameStem + ".srctree"),
|
std::string ifs(fileNameStem + ".srctree"),
|
||||||
ift(fileNameStem + ".tgtdata"),
|
ift(fileNameStem + ".tgtdata"),
|
||||||
@ -77,7 +92,13 @@ int PrefixTreeMap::Read(const std::string& fileNameStem, int numVocs){
|
|||||||
fReadVector(ii,srcOffsets);
|
fReadVector(ii,srcOffsets);
|
||||||
fClose(ii);
|
fClose(ii);
|
||||||
|
|
||||||
|
if (m_FileSrc) {
|
||||||
|
fClose(m_FileSrc);
|
||||||
|
}
|
||||||
m_FileSrc = fOpen(ifs.c_str(),"rb");
|
m_FileSrc = fOpen(ifs.c_str(),"rb");
|
||||||
|
if (m_FileTgt) {
|
||||||
|
fClose(m_FileTgt);
|
||||||
|
}
|
||||||
m_FileTgt = fOpen(ift.c_str(),"rb");
|
m_FileTgt = fOpen(ift.c_str(),"rb");
|
||||||
|
|
||||||
m_Data.resize(srcOffsets.size());
|
m_Data.resize(srcOffsets.size());
|
||||||
@ -99,8 +120,9 @@ int PrefixTreeMap::Read(const std::string& fileNameStem, int numVocs){
|
|||||||
m_Voc.resize(numVocs);
|
m_Voc.resize(numVocs);
|
||||||
for(int i = 0; i < numVocs; ++i){
|
for(int i = 0; i < numVocs; ++i){
|
||||||
sprintf(num, "%d", i);
|
sprintf(num, "%d", i);
|
||||||
m_Voc[i] = new WordVoc();
|
//m_Voc[i] = new WordVoc();
|
||||||
m_Voc[i]->Read(ifv + num);
|
//m_Voc[i]->Read(ifv + num);
|
||||||
|
m_Voc[i] = ReadVoc(ifv + num);
|
||||||
}
|
}
|
||||||
|
|
||||||
TRACE_ERR("binary file loaded, default OFF_T: "<< PTF::getDefault()<<"\n");
|
TRACE_ERR("binary file loaded, default OFF_T: "<< PTF::getDefault()<<"\n");
|
||||||
|
@ -4,6 +4,12 @@
|
|||||||
#include<vector>
|
#include<vector>
|
||||||
#include<climits>
|
#include<climits>
|
||||||
#include<iostream>
|
#include<iostream>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
#ifdef WITH_THREADS
|
||||||
|
#include <boost/thread/mutex.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#include "PrefixTree.h"
|
#include "PrefixTree.h"
|
||||||
#include "File.h"
|
#include "File.h"
|
||||||
|
@ -1,24 +1,22 @@
|
|||||||
|
|
||||||
|
#include "Manager.h"
|
||||||
#include "SearchCubePruning.h"
|
#include "SearchCubePruning.h"
|
||||||
#include "SearchNormal.h"
|
#include "SearchNormal.h"
|
||||||
#include "UserMessage.h"
|
#include "UserMessage.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
Search::Search()
|
|
||||||
{
|
|
||||||
// long sentenceID = m_source.GetTranslationId();
|
|
||||||
// m_constraint = staticData.GetConstrainingPhrase(sentenceID);
|
|
||||||
}
|
|
||||||
|
|
||||||
Search *Search::CreateSearch(const InputType &source, SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl)
|
|
||||||
|
Search *Search::CreateSearch(Manager& manager, const InputType &source,
|
||||||
|
SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl)
|
||||||
{
|
{
|
||||||
switch(searchAlgorithm)
|
switch(searchAlgorithm)
|
||||||
{
|
{
|
||||||
case Normal:
|
case Normal:
|
||||||
return new SearchNormal(source, transOptColl);
|
return new SearchNormal(manager,source, transOptColl);
|
||||||
case CubePruning:
|
case CubePruning:
|
||||||
return new SearchCubePruning(source, transOptColl);
|
return new SearchCubePruning(manager, source, transOptColl);
|
||||||
case CubeGrowing:
|
case CubeGrowing:
|
||||||
return NULL;
|
return NULL;
|
||||||
default:
|
default:
|
||||||
|
@ -12,6 +12,7 @@ class HypothesisStack;
|
|||||||
class Hypothesis;
|
class Hypothesis;
|
||||||
class InputType;
|
class InputType;
|
||||||
class TranslationOptionCollection;
|
class TranslationOptionCollection;
|
||||||
|
class Manager;
|
||||||
|
|
||||||
class Search
|
class Search
|
||||||
{
|
{
|
||||||
@ -19,16 +20,18 @@ public:
|
|||||||
virtual const std::vector < HypothesisStack* >& GetHypothesisStacks() const = 0;
|
virtual const std::vector < HypothesisStack* >& GetHypothesisStacks() const = 0;
|
||||||
virtual const Hypothesis *GetBestHypothesis() const = 0;
|
virtual const Hypothesis *GetBestHypothesis() const = 0;
|
||||||
virtual void ProcessSentence() = 0;
|
virtual void ProcessSentence() = 0;
|
||||||
Search();
|
Search(Manager& manager) : m_manager(manager) {}
|
||||||
virtual ~Search()
|
virtual ~Search()
|
||||||
{}
|
{}
|
||||||
|
|
||||||
// Factory
|
// Factory
|
||||||
static Search *CreateSearch(const InputType &source, SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl);
|
static Search *CreateSearch(Manager& manager, const InputType &source, SearchAlgorithm searchAlgorithm,
|
||||||
|
const TranslationOptionCollection &transOptColl);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
const Phrase *m_constraint;
|
const Phrase *m_constraint;
|
||||||
|
Manager& m_manager;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
|
#include "Manager.h"
|
||||||
#include "Util.h"
|
#include "Util.h"
|
||||||
#include "SearchCubePruning.h"
|
#include "SearchCubePruning.h"
|
||||||
#include "StaticData.h"
|
#include "StaticData.h"
|
||||||
@ -41,8 +41,9 @@ class BitmapContainerOrderer
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
SearchCubePruning::SearchCubePruning(const InputType &source, const TranslationOptionCollection &transOptColl)
|
SearchCubePruning::SearchCubePruning(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl)
|
||||||
:m_source(source)
|
:Search(manager)
|
||||||
|
,m_source(source)
|
||||||
,m_hypoStackColl(source.GetSize() + 1)
|
,m_hypoStackColl(source.GetSize() + 1)
|
||||||
,m_initialTargetPhrase(Output)
|
,m_initialTargetPhrase(Output)
|
||||||
,m_start(clock())
|
,m_start(clock())
|
||||||
@ -56,7 +57,7 @@ SearchCubePruning::SearchCubePruning(const InputType &source, const TranslationO
|
|||||||
std::vector < HypothesisStackCubePruning >::iterator iterStack;
|
std::vector < HypothesisStackCubePruning >::iterator iterStack;
|
||||||
for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind)
|
for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind)
|
||||||
{
|
{
|
||||||
HypothesisStackCubePruning *sourceHypoColl = new HypothesisStackCubePruning();
|
HypothesisStackCubePruning *sourceHypoColl = new HypothesisStackCubePruning(m_manager);
|
||||||
sourceHypoColl->SetMaxHypoStackSize(staticData.GetMaxHypoStackSize());
|
sourceHypoColl->SetMaxHypoStackSize(staticData.GetMaxHypoStackSize());
|
||||||
sourceHypoColl->SetBeamWidth(staticData.GetBeamWidth());
|
sourceHypoColl->SetBeamWidth(staticData.GetBeamWidth());
|
||||||
|
|
||||||
@ -78,7 +79,7 @@ void SearchCubePruning::ProcessSentence()
|
|||||||
const StaticData &staticData = StaticData::Instance();
|
const StaticData &staticData = StaticData::Instance();
|
||||||
|
|
||||||
// initial seed hypothesis: nothing translated, no words produced
|
// initial seed hypothesis: nothing translated, no words produced
|
||||||
Hypothesis *hypo = Hypothesis::Create(m_source, m_initialTargetPhrase);
|
Hypothesis *hypo = Hypothesis::Create(m_manager,m_source, m_initialTargetPhrase);
|
||||||
|
|
||||||
HypothesisStackCubePruning &firstStack = *static_cast<HypothesisStackCubePruning*>(m_hypoStackColl.front());
|
HypothesisStackCubePruning &firstStack = *static_cast<HypothesisStackCubePruning*>(m_hypoStackColl.front());
|
||||||
firstStack.AddInitial(hypo);
|
firstStack.AddInitial(hypo);
|
||||||
@ -153,8 +154,8 @@ void SearchCubePruning::ProcessSentence()
|
|||||||
PrintBitmapContainerGraph();
|
PrintBitmapContainerGraph();
|
||||||
|
|
||||||
// some more logging
|
// some more logging
|
||||||
IFVERBOSE(2) { staticData.GetSentenceStats().SetTimeTotal( clock()-m_start ); }
|
IFVERBOSE(2) { m_manager.GetSentenceStats().SetTimeTotal( clock()-m_start ); }
|
||||||
VERBOSE(2, staticData.GetSentenceStats());
|
VERBOSE(2, m_manager.GetSentenceStats());
|
||||||
}
|
}
|
||||||
|
|
||||||
void SearchCubePruning::CreateForwardTodos(HypothesisStackCubePruning &stack)
|
void SearchCubePruning::CreateForwardTodos(HypothesisStackCubePruning &stack)
|
||||||
|
@ -30,7 +30,7 @@ protected:
|
|||||||
void PrintBitmapContainerGraph();
|
void PrintBitmapContainerGraph();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SearchCubePruning(const InputType &source, const TranslationOptionCollection &transOptColl);
|
SearchCubePruning(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl);
|
||||||
~SearchCubePruning();
|
~SearchCubePruning();
|
||||||
|
|
||||||
void ProcessSentence();
|
void ProcessSentence();
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#include "Manager.h"
|
||||||
#include "Timer.h"
|
#include "Timer.h"
|
||||||
#include "SearchNormal.h"
|
#include "SearchNormal.h"
|
||||||
|
|
||||||
@ -9,8 +10,9 @@ namespace Moses
|
|||||||
* /param source input sentence
|
* /param source input sentence
|
||||||
* /param transOptColl collection of translation options to be used for this sentence
|
* /param transOptColl collection of translation options to be used for this sentence
|
||||||
*/
|
*/
|
||||||
SearchNormal::SearchNormal(const InputType &source, const TranslationOptionCollection &transOptColl)
|
SearchNormal::SearchNormal(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl)
|
||||||
:m_source(source)
|
:Search(manager)
|
||||||
|
,m_source(source)
|
||||||
,m_hypoStackColl(source.GetSize() + 1)
|
,m_hypoStackColl(source.GetSize() + 1)
|
||||||
,m_initialTargetPhrase(Output)
|
,m_initialTargetPhrase(Output)
|
||||||
,m_start(clock())
|
,m_start(clock())
|
||||||
@ -28,7 +30,7 @@ SearchNormal::SearchNormal(const InputType &source, const TranslationOptionColle
|
|||||||
std::vector < HypothesisStackNormal >::iterator iterStack;
|
std::vector < HypothesisStackNormal >::iterator iterStack;
|
||||||
for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind)
|
for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind)
|
||||||
{
|
{
|
||||||
HypothesisStackNormal *sourceHypoColl = new HypothesisStackNormal();
|
HypothesisStackNormal *sourceHypoColl = new HypothesisStackNormal(m_manager);
|
||||||
sourceHypoColl->SetMaxHypoStackSize(staticData.GetMaxHypoStackSize(),staticData.GetMinHypoStackDiversity());
|
sourceHypoColl->SetMaxHypoStackSize(staticData.GetMaxHypoStackSize(),staticData.GetMinHypoStackDiversity());
|
||||||
sourceHypoColl->SetBeamWidth(staticData.GetBeamWidth());
|
sourceHypoColl->SetBeamWidth(staticData.GetBeamWidth());
|
||||||
|
|
||||||
@ -48,11 +50,11 @@ SearchNormal::~SearchNormal()
|
|||||||
void SearchNormal::ProcessSentence()
|
void SearchNormal::ProcessSentence()
|
||||||
{
|
{
|
||||||
const StaticData &staticData = StaticData::Instance();
|
const StaticData &staticData = StaticData::Instance();
|
||||||
SentenceStats &stats = staticData.GetSentenceStats();
|
SentenceStats &stats = m_manager.GetSentenceStats();
|
||||||
clock_t t=0; // used to track time for steps
|
clock_t t=0; // used to track time for steps
|
||||||
|
|
||||||
// initial seed hypothesis: nothing translated, no words produced
|
// initial seed hypothesis: nothing translated, no words produced
|
||||||
Hypothesis *hypo = Hypothesis::Create(m_source, m_initialTargetPhrase);
|
Hypothesis *hypo = Hypothesis::Create(m_manager,m_source, m_initialTargetPhrase);
|
||||||
m_hypoStackColl[0]->AddPrune(hypo);
|
m_hypoStackColl[0]->AddPrune(hypo);
|
||||||
|
|
||||||
// go through each stack
|
// go through each stack
|
||||||
@ -91,8 +93,8 @@ void SearchNormal::ProcessSentence()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// some more logging
|
// some more logging
|
||||||
IFVERBOSE(2) { staticData.GetSentenceStats().SetTimeTotal( clock()-m_start ); }
|
IFVERBOSE(2) { m_manager.GetSentenceStats().SetTimeTotal( clock()-m_start ); }
|
||||||
VERBOSE(2, staticData.GetSentenceStats());
|
VERBOSE(2, m_manager.GetSentenceStats());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -274,7 +276,7 @@ void SearchNormal::ExpandAllHypotheses(const Hypothesis &hypothesis, size_t star
|
|||||||
void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt, float expectedScore)
|
void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt, float expectedScore)
|
||||||
{
|
{
|
||||||
const StaticData &staticData = StaticData::Instance();
|
const StaticData &staticData = StaticData::Instance();
|
||||||
SentenceStats &stats = staticData.GetSentenceStats();
|
SentenceStats &stats = m_manager.GetSentenceStats();
|
||||||
clock_t t=0; // used to track time for steps
|
clock_t t=0; // used to track time for steps
|
||||||
|
|
||||||
Hypothesis *newHypo;
|
Hypothesis *newHypo;
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
namespace Moses
|
namespace Moses
|
||||||
{
|
{
|
||||||
|
|
||||||
|
class Manager;
|
||||||
class InputType;
|
class InputType;
|
||||||
class TranslationOptionCollection;
|
class TranslationOptionCollection;
|
||||||
|
|
||||||
@ -31,7 +32,7 @@ protected:
|
|||||||
void ExpandHypothesis(const Hypothesis &hypothesis,const TranslationOption &transOpt, float expectedScore);
|
void ExpandHypothesis(const Hypothesis &hypothesis,const TranslationOption &transOpt, float expectedScore);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SearchNormal(const InputType &source, const TranslationOptionCollection &transOptColl);
|
SearchNormal(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl);
|
||||||
~SearchNormal();
|
~SearchNormal();
|
||||||
|
|
||||||
void ProcessSentence();
|
void ProcessSentence();
|
||||||
|
@ -373,7 +373,6 @@ bool StaticData::LoadData(Parameter *parameter)
|
|||||||
if (!LoadLanguageModels()) return false;
|
if (!LoadLanguageModels()) return false;
|
||||||
if (!LoadGenerationTables()) return false;
|
if (!LoadGenerationTables()) return false;
|
||||||
if (!LoadPhraseTables()) return false;
|
if (!LoadPhraseTables()) return false;
|
||||||
if (!LoadMapping()) return false;
|
|
||||||
if (!LoadGlobalLexicalModel()) return false;
|
if (!LoadGlobalLexicalModel()) return false;
|
||||||
|
|
||||||
m_scoreIndexManager.InitFeatureNames();
|
m_scoreIndexManager.InitFeatureNames();
|
||||||
@ -418,12 +417,11 @@ StaticData::~StaticData()
|
|||||||
RemoveAllInColl(m_phraseDictionary);
|
RemoveAllInColl(m_phraseDictionary);
|
||||||
RemoveAllInColl(m_generationDictionary);
|
RemoveAllInColl(m_generationDictionary);
|
||||||
RemoveAllInColl(m_languageModel);
|
RemoveAllInColl(m_languageModel);
|
||||||
RemoveAllInColl(m_decodeStepVL);
|
|
||||||
RemoveAllInColl(m_reorderModels);
|
RemoveAllInColl(m_reorderModels);
|
||||||
RemoveAllInColl(m_globalLexicalModels);
|
RemoveAllInColl(m_globalLexicalModels);
|
||||||
|
|
||||||
// delete trans opt
|
// delete trans opt
|
||||||
map<std::pair<const DecodeGraph*, Phrase>, std::pair< TranslationOptionList*, clock_t > >::iterator iterCache;
|
map<std::pair<size_t, Phrase>, std::pair< TranslationOptionList*, clock_t > >::iterator iterCache;
|
||||||
for (iterCache = m_transOptCache.begin() ; iterCache != m_transOptCache.end() ; ++iterCache)
|
for (iterCache = m_transOptCache.begin() ; iterCache != m_transOptCache.end() ; ++iterCache)
|
||||||
{
|
{
|
||||||
TranslationOptionList *transOptList = iterCache->second.first;
|
TranslationOptionList *transOptList = iterCache->second.first;
|
||||||
@ -847,47 +845,21 @@ bool StaticData::LoadPhraseTables()
|
|||||||
IFVERBOSE(1)
|
IFVERBOSE(1)
|
||||||
PrintUserTime(string("Start loading PhraseTable ") + filePath);
|
PrintUserTime(string("Start loading PhraseTable ") + filePath);
|
||||||
VERBOSE(1,"filePath: " << filePath << endl);
|
VERBOSE(1,"filePath: " << filePath << endl);
|
||||||
if (!FileExists(filePath+".binphr.idx"))
|
|
||||||
{ // memory phrase table
|
PhraseDictionaryFeature* pdf = new PhraseDictionaryFeature(
|
||||||
VERBOSE(2,"using standard phrase tables" << endl);
|
numScoreComponent
|
||||||
if (!FileExists(filePath) && FileExists(filePath + ".gz")) {
|
, (currDict==0 ? m_numInputScores : 0)
|
||||||
filePath += ".gz";
|
, input
|
||||||
VERBOSE(2,"Using gzipped file" << endl);
|
, output
|
||||||
}
|
, filePath
|
||||||
if (m_inputType != SentenceInput)
|
, weight
|
||||||
{
|
, maxTargetPhrase[index]);
|
||||||
UserMessage::Add("Must use binary phrase table for this input type");
|
|
||||||
return false;
|
m_phraseDictionary.push_back(pdf);
|
||||||
}
|
|
||||||
|
|
||||||
PhraseDictionaryMemory *pd=new PhraseDictionaryMemory(numScoreComponent);
|
|
||||||
if (!pd->Load(input
|
|
||||||
, output
|
|
||||||
, filePath
|
|
||||||
, weight
|
|
||||||
, maxTargetPhrase[index]
|
|
||||||
, GetAllLM()
|
|
||||||
, GetWeightWordPenalty()))
|
|
||||||
{
|
|
||||||
delete pd;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
m_phraseDictionary.push_back(pd);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{ // binary phrase table
|
|
||||||
VERBOSE(1, "using binary phrase tables for idx "<<currDict<<"\n");
|
|
||||||
PhraseDictionaryTreeAdaptor *pd=new PhraseDictionaryTreeAdaptor(numScoreComponent,(currDict==0 ? m_numInputScores : 0));
|
|
||||||
if (!pd->Load(input,output,filePath,weight,
|
|
||||||
maxTargetPhrase[index],
|
|
||||||
GetAllLM(),
|
|
||||||
GetWeightWordPenalty()))
|
|
||||||
{
|
|
||||||
delete pd;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
m_phraseDictionary.push_back(pd);
|
|
||||||
}
|
|
||||||
|
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
@ -898,8 +870,9 @@ bool StaticData::LoadPhraseTables()
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool StaticData::LoadMapping()
|
vector<DecodeGraph*> StaticData::GetDecodeStepVL(const InputType& source) const
|
||||||
{
|
{
|
||||||
|
vector<DecodeGraph*> decodeStepVL;
|
||||||
// mapping
|
// mapping
|
||||||
const vector<string> &mappingVector = m_parameter->GetParam("mapping");
|
const vector<string> &mappingVector = m_parameter->GetParam("mapping");
|
||||||
DecodeStep *prev = 0;
|
DecodeStep *prev = 0;
|
||||||
@ -932,7 +905,7 @@ bool StaticData::LoadMapping()
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
UserMessage::Add("Malformed mapping!");
|
UserMessage::Add("Malformed mapping!");
|
||||||
return false;
|
assert(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
DecodeStep* decodeStep = 0;
|
DecodeStep* decodeStep = 0;
|
||||||
@ -944,9 +917,9 @@ bool StaticData::LoadMapping()
|
|||||||
strme << "No phrase dictionary with index "
|
strme << "No phrase dictionary with index "
|
||||||
<< index << " available!";
|
<< index << " available!";
|
||||||
UserMessage::Add(strme.str());
|
UserMessage::Add(strme.str());
|
||||||
return false;
|
assert(false);
|
||||||
}
|
}
|
||||||
decodeStep = new DecodeStepTranslation(m_phraseDictionary[index], prev);
|
decodeStep = new DecodeStepTranslation(m_phraseDictionary[index]->GetDictionary(source), prev);
|
||||||
break;
|
break;
|
||||||
case Generate:
|
case Generate:
|
||||||
if(index>=m_generationDictionary.size())
|
if(index>=m_generationDictionary.size())
|
||||||
@ -955,7 +928,7 @@ bool StaticData::LoadMapping()
|
|||||||
strme << "No generation dictionary with index "
|
strme << "No generation dictionary with index "
|
||||||
<< index << " available!";
|
<< index << " available!";
|
||||||
UserMessage::Add(strme.str());
|
UserMessage::Add(strme.str());
|
||||||
return false;
|
assert(false);
|
||||||
}
|
}
|
||||||
decodeStep = new DecodeStepGeneration(m_generationDictionary[index], prev);
|
decodeStep = new DecodeStepGeneration(m_generationDictionary[index], prev);
|
||||||
break;
|
break;
|
||||||
@ -964,22 +937,20 @@ bool StaticData::LoadMapping()
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
assert(decodeStep);
|
assert(decodeStep);
|
||||||
if (m_decodeStepVL.size() < vectorList + 1)
|
if (decodeStepVL.size() < vectorList + 1)
|
||||||
{
|
{
|
||||||
m_decodeStepVL.push_back(new DecodeGraph());
|
decodeStepVL.push_back(new DecodeGraph(decodeStepVL.size()));
|
||||||
}
|
}
|
||||||
m_decodeStepVL[vectorList]->Add(decodeStep);
|
decodeStepVL[vectorList]->Add(decodeStep);
|
||||||
prev = decodeStep;
|
prev = decodeStep;
|
||||||
previousVectorList = vectorList;
|
previousVectorList = vectorList;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return decodeStepVL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void StaticData::CleanUpAfterSentenceProcessing() const
|
void StaticData::CleanUpAfterSentenceProcessing() const
|
||||||
{
|
{
|
||||||
for(size_t i=0;i<m_phraseDictionary.size();++i)
|
|
||||||
m_phraseDictionary[i]->CleanUp();
|
|
||||||
for(size_t i=0;i<m_generationDictionary.size();++i)
|
for(size_t i=0;i<m_generationDictionary.size();++i)
|
||||||
m_generationDictionary[i]->CleanUp();
|
m_generationDictionary[i]->CleanUp();
|
||||||
|
|
||||||
@ -997,10 +968,6 @@ void StaticData::CleanUpAfterSentenceProcessing() const
|
|||||||
binary format is used) */
|
binary format is used) */
|
||||||
void StaticData::InitializeBeforeSentenceProcessing(InputType const& in) const
|
void StaticData::InitializeBeforeSentenceProcessing(InputType const& in) const
|
||||||
{
|
{
|
||||||
m_input = ∈
|
|
||||||
for(size_t i=0;i<m_phraseDictionary.size();++i) {
|
|
||||||
m_phraseDictionary[i]->InitializeForInput(in);
|
|
||||||
}
|
|
||||||
for(size_t i=0;i<m_reorderModels.size();++i) {
|
for(size_t i=0;i<m_reorderModels.size();++i) {
|
||||||
m_reorderModels[i]->InitializeForInput(in);
|
m_reorderModels[i]->InitializeForInput(in);
|
||||||
}
|
}
|
||||||
@ -1031,9 +998,11 @@ void StaticData::SetWeightsForScoreProducer(const ScoreProducer* sp, const std::
|
|||||||
|
|
||||||
const TranslationOptionList* StaticData::FindTransOptListInCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase) const
|
const TranslationOptionList* StaticData::FindTransOptListInCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase) const
|
||||||
{
|
{
|
||||||
std::pair<const DecodeGraph*, Phrase> key(&decodeGraph, sourcePhrase);
|
std::pair<size_t, Phrase> key(decodeGraph.GetPosition(), sourcePhrase);
|
||||||
|
#ifdef WITH_THREADS
|
||||||
std::map<std::pair<const DecodeGraph*, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter
|
boost::mutex::scoped_lock lock(m_transOptCacheMutex);
|
||||||
|
#endif
|
||||||
|
std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter
|
||||||
= m_transOptCache.find(key);
|
= m_transOptCache.find(key);
|
||||||
if (iter == m_transOptCache.end())
|
if (iter == m_transOptCache.end())
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -1048,7 +1017,7 @@ void StaticData::ReduceTransOptCache() const
|
|||||||
|
|
||||||
// find cutoff for last used time
|
// find cutoff for last used time
|
||||||
priority_queue< clock_t > lastUsedTimes;
|
priority_queue< clock_t > lastUsedTimes;
|
||||||
std::map<std::pair<const DecodeGraph*, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter;
|
std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter;
|
||||||
iter = m_transOptCache.begin();
|
iter = m_transOptCache.begin();
|
||||||
while( iter != m_transOptCache.end() )
|
while( iter != m_transOptCache.end() )
|
||||||
{
|
{
|
||||||
@ -1065,7 +1034,7 @@ void StaticData::ReduceTransOptCache() const
|
|||||||
{
|
{
|
||||||
if (iter->second.second < cutoffLastUsedTime)
|
if (iter->second.second < cutoffLastUsedTime)
|
||||||
{
|
{
|
||||||
std::map<std::pair<const DecodeGraph*, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iterRemove = iter++;
|
std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iterRemove = iter++;
|
||||||
delete iterRemove->second.first;
|
delete iterRemove->second.first;
|
||||||
m_transOptCache.erase(iterRemove);
|
m_transOptCache.erase(iterRemove);
|
||||||
}
|
}
|
||||||
@ -1076,8 +1045,11 @@ void StaticData::ReduceTransOptCache() const
|
|||||||
|
|
||||||
void StaticData::AddTransOptListToCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase, const TranslationOptionList &transOptList) const
|
void StaticData::AddTransOptListToCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase, const TranslationOptionList &transOptList) const
|
||||||
{
|
{
|
||||||
std::pair<const DecodeGraph*, Phrase> key(&decodeGraph, sourcePhrase);
|
std::pair<size_t, Phrase> key(decodeGraph.GetPosition(), sourcePhrase);
|
||||||
TranslationOptionList* storedTransOptList = new TranslationOptionList(transOptList);
|
TranslationOptionList* storedTransOptList = new TranslationOptionList(transOptList);
|
||||||
|
#ifdef WITH_THREADS
|
||||||
|
boost::mutex::scoped_lock lock(m_transOptCacheMutex);
|
||||||
|
#endif
|
||||||
m_transOptCache[key] = make_pair( storedTransOptList, clock() );
|
m_transOptCache[key] = make_pair( storedTransOptList, clock() );
|
||||||
ReduceTransOptCache();
|
ReduceTransOptCache();
|
||||||
}
|
}
|
||||||
|
@ -25,6 +25,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
|
#ifdef WITH_THREADS
|
||||||
|
#include <boost/thread/mutex.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "TypeDef.h"
|
#include "TypeDef.h"
|
||||||
#include "ScoreIndexManager.h"
|
#include "ScoreIndexManager.h"
|
||||||
#include "FactorCollection.h"
|
#include "FactorCollection.h"
|
||||||
@ -46,7 +51,7 @@ namespace Moses
|
|||||||
class InputType;
|
class InputType;
|
||||||
class LexicalReordering;
|
class LexicalReordering;
|
||||||
class GlobalLexicalModel;
|
class GlobalLexicalModel;
|
||||||
class PhraseDictionary;
|
class PhraseDictionaryFeature;
|
||||||
class GenerationDictionary;
|
class GenerationDictionary;
|
||||||
class DistortionScoreProducer;
|
class DistortionScoreProducer;
|
||||||
class WordPenaltyProducer;
|
class WordPenaltyProducer;
|
||||||
@ -61,9 +66,8 @@ private:
|
|||||||
protected:
|
protected:
|
||||||
|
|
||||||
std::map<long,Phrase> m_constraints;
|
std::map<long,Phrase> m_constraints;
|
||||||
std::vector<PhraseDictionary*> m_phraseDictionary;
|
std::vector<PhraseDictionaryFeature*> m_phraseDictionary;
|
||||||
std::vector<GenerationDictionary*> m_generationDictionary;
|
std::vector<GenerationDictionary*> m_generationDictionary;
|
||||||
std::vector <DecodeGraph*> m_decodeStepVL;
|
|
||||||
Parameter *m_parameter;
|
Parameter *m_parameter;
|
||||||
std::vector<FactorType> m_inputFactorOrder, m_outputFactorOrder;
|
std::vector<FactorType> m_inputFactorOrder, m_outputFactorOrder;
|
||||||
LMList m_languageModel;
|
LMList m_languageModel;
|
||||||
@ -129,7 +133,7 @@ protected:
|
|||||||
bool m_PrintAlignmentInfo;
|
bool m_PrintAlignmentInfo;
|
||||||
bool m_PrintAlignmentInfoNbest;
|
bool m_PrintAlignmentInfoNbest;
|
||||||
|
|
||||||
mutable std::auto_ptr<SentenceStats> m_sentenceStats;
|
|
||||||
std::string m_factorDelimiter; //! by default, |, but it can be changed
|
std::string m_factorDelimiter; //! by default, |, but it can be changed
|
||||||
size_t m_maxFactorIdx[2]; //! number of factors on source and target side
|
size_t m_maxFactorIdx[2]; //! number of factors on source and target side
|
||||||
size_t m_maxNumFactors; //! max number of factors on both source and target sides
|
size_t m_maxNumFactors; //! max number of factors on both source and target sides
|
||||||
@ -144,10 +148,13 @@ protected:
|
|||||||
size_t m_timeout_threshold; //! seconds after which time out is activated
|
size_t m_timeout_threshold; //! seconds after which time out is activated
|
||||||
|
|
||||||
bool m_useTransOptCache; //! flag indicating, if the persistent translation option cache should be used
|
bool m_useTransOptCache; //! flag indicating, if the persistent translation option cache should be used
|
||||||
mutable std::map<std::pair<const DecodeGraph*, Phrase>, pair<TranslationOptionList*,clock_t> > m_transOptCache; //! persistent translation option cache
|
mutable std::map<std::pair<size_t, Phrase>, pair<TranslationOptionList*,clock_t> > m_transOptCache; //! persistent translation option cache
|
||||||
size_t m_transOptCacheMaxSize; //! maximum size for persistent translation option cache
|
size_t m_transOptCacheMaxSize; //! maximum size for persistent translation option cache
|
||||||
|
//FIXME: Single lock for cache not most efficient. However using a
|
||||||
mutable const InputType* m_input; //! holds reference to current sentence
|
//reader-writer for LRU cache is tricky - how to record last used time?
|
||||||
|
#ifdef WITH_THREADS
|
||||||
|
mutable boost::mutex m_transOptCacheMutex;
|
||||||
|
#endif
|
||||||
bool m_isAlwaysCreateDirectTranslationOption;
|
bool m_isAlwaysCreateDirectTranslationOption;
|
||||||
//! constructor. only the 1 static variable can be created
|
//! constructor. only the 1 static variable can be created
|
||||||
|
|
||||||
@ -176,9 +183,9 @@ protected:
|
|||||||
//! load all generation tables as specified in ini file
|
//! load all generation tables as specified in ini file
|
||||||
bool LoadGenerationTables();
|
bool LoadGenerationTables();
|
||||||
//! load decoding steps
|
//! load decoding steps
|
||||||
bool LoadMapping();
|
|
||||||
bool LoadLexicalReorderingModel();
|
bool LoadLexicalReorderingModel();
|
||||||
bool LoadGlobalLexicalModel();
|
bool LoadGlobalLexicalModel();
|
||||||
|
void ReduceTransOptCache() const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
@ -228,10 +235,7 @@ public:
|
|||||||
return m_outputFactorOrder;
|
return m_outputFactorOrder;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<DecodeGraph*> &GetDecodeStepVL() const
|
std::vector<DecodeGraph*> GetDecodeStepVL(const InputType& source) const;
|
||||||
{
|
|
||||||
return m_decodeStepVL;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool GetSourceStartPosMattersForRecombination() const
|
inline bool GetSourceStartPosMattersForRecombination() const
|
||||||
{
|
{
|
||||||
@ -352,7 +356,7 @@ public:
|
|||||||
{
|
{
|
||||||
return m_phraseDictionary.size();
|
return m_phraseDictionary.size();
|
||||||
}
|
}
|
||||||
const std::vector<PhraseDictionary*> &GetPhraseDictionaries() const
|
const std::vector<PhraseDictionaryFeature*> &GetPhraseDictionaries() const
|
||||||
{
|
{
|
||||||
return m_phraseDictionary;
|
return m_phraseDictionary;
|
||||||
}
|
}
|
||||||
@ -381,10 +385,7 @@ public:
|
|||||||
{
|
{
|
||||||
return m_isDetailedTranslationReportingEnabled;
|
return m_isDetailedTranslationReportingEnabled;
|
||||||
}
|
}
|
||||||
void ResetSentenceStats(const InputType& source) const
|
|
||||||
{
|
|
||||||
m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
|
|
||||||
}
|
|
||||||
bool IsLabeledNBestList() const
|
bool IsLabeledNBestList() const
|
||||||
{
|
{
|
||||||
return m_labeledNBestList;
|
return m_labeledNBestList;
|
||||||
@ -430,13 +431,9 @@ public:
|
|||||||
InputTypeEnum GetInputType() const {return m_inputType;}
|
InputTypeEnum GetInputType() const {return m_inputType;}
|
||||||
SearchAlgorithm GetSearchAlgorithm() const {return m_searchAlgorithm;}
|
SearchAlgorithm GetSearchAlgorithm() const {return m_searchAlgorithm;}
|
||||||
size_t GetNumInputScores() const {return m_numInputScores;}
|
size_t GetNumInputScores() const {return m_numInputScores;}
|
||||||
const InputType* GetInput() const { return m_input; }
|
|
||||||
void InitializeBeforeSentenceProcessing(InputType const&) const;
|
void InitializeBeforeSentenceProcessing(InputType const&) const;
|
||||||
void CleanUpAfterSentenceProcessing() const;
|
void CleanUpAfterSentenceProcessing() const;
|
||||||
SentenceStats& GetSentenceStats() const
|
|
||||||
{
|
|
||||||
return *m_sentenceStats;
|
|
||||||
}
|
|
||||||
const std::vector<float>& GetAllWeights() const
|
const std::vector<float>& GetAllWeights() const
|
||||||
{
|
{
|
||||||
return m_allWeights;
|
return m_allWeights;
|
||||||
@ -470,7 +467,7 @@ public:
|
|||||||
bool GetUseTransOptCache() const { return m_useTransOptCache; }
|
bool GetUseTransOptCache() const { return m_useTransOptCache; }
|
||||||
|
|
||||||
void AddTransOptListToCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase, const TranslationOptionList &transOptList) const;
|
void AddTransOptListToCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase, const TranslationOptionList &transOptList) const;
|
||||||
void ReduceTransOptCache() const;
|
|
||||||
|
|
||||||
const TranslationOptionList* FindTransOptListInCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase) const;
|
const TranslationOptionList* FindTransOptListInCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase) const;
|
||||||
};
|
};
|
||||||
|
@ -62,10 +62,7 @@ void TargetPhrase::WriteToRulePB(hgmert::Rule* pb) const {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void TargetPhrase::SetAlignment()
|
|
||||||
{
|
|
||||||
m_alignmentPair.SetIdentityAlignment();
|
|
||||||
}
|
|
||||||
|
|
||||||
void TargetPhrase::SetScore(float score)
|
void TargetPhrase::SetScore(float score)
|
||||||
{
|
{
|
||||||
@ -204,142 +201,14 @@ TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const
|
|||||||
return clone;
|
return clone;
|
||||||
}
|
}
|
||||||
|
|
||||||
// helper functions
|
|
||||||
void AddAlignmentElement(AlignmentPhraseInserter &inserter
|
|
||||||
, const string &str
|
|
||||||
, size_t phraseSize
|
|
||||||
, size_t otherPhraseSize
|
|
||||||
, list<size_t> &uniformAlignment)
|
|
||||||
{
|
|
||||||
// input
|
|
||||||
vector<string> alignPhraseVector = Tokenize(str);
|
|
||||||
// from
|
|
||||||
// "(0) (3) (1,2)"
|
|
||||||
// to
|
|
||||||
// "(0)" "(3)" "(1,2)"
|
|
||||||
assert (alignPhraseVector.size() == phraseSize) ;
|
|
||||||
|
|
||||||
const size_t inputSize = alignPhraseVector.size();
|
|
||||||
for (size_t pos = 0 ; pos < inputSize ; ++pos)
|
|
||||||
{
|
|
||||||
string alignElementStr = alignPhraseVector[pos];
|
|
||||||
|
|
||||||
//change "()" into "(-1)" for both source and target word-to-word alignments
|
|
||||||
std::string emtpyAlignStr="()";
|
|
||||||
std::string replaceAlignStr="(-1)";
|
|
||||||
alignElementStr=Replace(alignElementStr,emtpyAlignStr,replaceAlignStr);
|
|
||||||
|
|
||||||
//remove all "(" from both source and target word-to-word alignments
|
|
||||||
emtpyAlignStr="(";
|
|
||||||
replaceAlignStr="";
|
|
||||||
alignElementStr=Replace(alignElementStr,emtpyAlignStr,replaceAlignStr);
|
|
||||||
|
|
||||||
//remove all ")" from both source and target word-to-word alignments
|
|
||||||
emtpyAlignStr=")";
|
|
||||||
replaceAlignStr="";
|
|
||||||
alignElementStr=Replace(alignElementStr,emtpyAlignStr,replaceAlignStr);
|
|
||||||
|
|
||||||
AlignmentElement *alignElement = new AlignmentElement(Tokenize<AlignmentElementType>(alignElementStr, ","));
|
|
||||||
// "(1,2)"
|
|
||||||
// to
|
|
||||||
// [1] [2]
|
|
||||||
if (alignElement->GetSize() == 0)
|
|
||||||
{ // no alignment info. add uniform alignment, ie. can be aligned to any word
|
|
||||||
alignElement->SetUniformAlignment(otherPhraseSize);
|
|
||||||
uniformAlignment.push_back(pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
**inserter = alignElement;
|
|
||||||
(*inserter)++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// helper functions
|
|
||||||
void AddAlignmentElement(AlignmentPhraseInserter &inserter
|
|
||||||
, const WordAlignments &wa
|
|
||||||
, size_t phraseSize
|
|
||||||
, size_t otherPhraseSize
|
|
||||||
, list<size_t> &uniformAlignment)
|
|
||||||
{
|
|
||||||
// from
|
|
||||||
// "(0) (3) (1,2)"
|
|
||||||
// to
|
|
||||||
// "(0)" "(3)" "(1,2)"
|
|
||||||
assert (wa.size() == phraseSize) ;
|
|
||||||
|
|
||||||
const size_t inputSize = wa.size();
|
|
||||||
for (size_t pos = 0 ; pos < inputSize ; ++pos)
|
|
||||||
{
|
|
||||||
string alignElementStr = wa[pos];
|
|
||||||
AlignmentElement *alignElement = new AlignmentElement(Tokenize<AlignmentElementType>(alignElementStr, ","));
|
|
||||||
// "(1,2)"
|
|
||||||
// to
|
|
||||||
// [1] [2]
|
|
||||||
if (alignElement->GetSize() == 0)
|
|
||||||
{ // no alignment info. add uniform alignment, ie. can be aligned to any word
|
|
||||||
alignElement->SetUniformAlignment(otherPhraseSize);
|
|
||||||
uniformAlignment.push_back(pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
**inserter = alignElement;
|
|
||||||
(*inserter)++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void TargetPhrase::CreateAlignmentInfo(const WordAlignments &swa
|
|
||||||
, const WordAlignments &twa)
|
|
||||||
{
|
|
||||||
AlignmentPhraseInserter sourceInserter = m_alignmentPair.GetInserter(Input);
|
|
||||||
AlignmentPhraseInserter targetInserter = m_alignmentPair.GetInserter(Output);
|
|
||||||
list<size_t> uniformAlignmentSource, uniformAlignmentTarget;
|
|
||||||
|
|
||||||
if (!UseWordAlignment()){ //build uniform word-to-word alignment to fit the internal structure which requires their presence
|
|
||||||
std::string srcAlignStr,trgAlignStr;
|
|
||||||
UniformAlignment(srcAlignStr, m_sourcePhrase->GetSize(), GetSize());
|
|
||||||
UniformAlignment(trgAlignStr, GetSize(), m_sourcePhrase->GetSize());
|
|
||||||
CreateAlignmentInfo(srcAlignStr,trgAlignStr);
|
|
||||||
}
|
|
||||||
else{
|
|
||||||
AddAlignmentElement(sourceInserter
|
|
||||||
, swa
|
|
||||||
, m_sourcePhrase->GetSize()
|
|
||||||
, GetSize()
|
|
||||||
, uniformAlignmentSource);
|
|
||||||
AddAlignmentElement(targetInserter
|
|
||||||
, twa
|
|
||||||
, GetSize()
|
|
||||||
, m_sourcePhrase->GetSize()
|
|
||||||
, uniformAlignmentTarget);
|
|
||||||
}
|
|
||||||
// propergate uniform alignments to other side
|
|
||||||
// m_alignmentPair.GetAlignmentPhrase(Output).AddUniformAlignmentElement(uniformAlignmentSource);
|
|
||||||
// m_alignmentPair.GetAlignmentPhrase(Input).AddUniformAlignmentElement(uniformAlignmentTarget);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void TargetPhrase::CreateAlignmentInfo(const string &sourceStr
|
|
||||||
, const string &targetStr)
|
|
||||||
{
|
|
||||||
AlignmentPhraseInserter sourceInserter = m_alignmentPair.GetInserter(Input);
|
|
||||||
AlignmentPhraseInserter targetInserter = m_alignmentPair.GetInserter(Output);
|
|
||||||
list<size_t> uniformAlignmentSource, uniformAlignmentTarget;
|
|
||||||
|
|
||||||
AddAlignmentElement(sourceInserter
|
|
||||||
, sourceStr
|
|
||||||
, m_sourcePhrase->GetSize()
|
|
||||||
, GetSize()
|
|
||||||
, uniformAlignmentSource);
|
|
||||||
AddAlignmentElement(targetInserter
|
|
||||||
, targetStr
|
|
||||||
, GetSize()
|
|
||||||
, m_sourcePhrase->GetSize()
|
|
||||||
, uniformAlignmentTarget);
|
|
||||||
// propergate uniform alignments to other side
|
|
||||||
// m_alignmentPair.GetAlignmentPhrase(Output).AddUniformAlignmentElement(uniformAlignmentSource);
|
|
||||||
// m_alignmentPair.GetAlignmentPhrase(Input).AddUniformAlignmentElement(uniformAlignmentTarget);
|
|
||||||
}
|
|
||||||
|
|
||||||
TO_STRING_BODY(TargetPhrase);
|
TO_STRING_BODY(TargetPhrase);
|
||||||
|
|
||||||
@ -347,8 +216,7 @@ std::ostream& operator<<(std::ostream& os, const TargetPhrase& tp)
|
|||||||
{
|
{
|
||||||
os << static_cast<const Phrase&>(tp);
|
os << static_cast<const Phrase&>(tp);
|
||||||
os << ", pC=" << tp.m_transScore << ", c=" << tp.m_fullScore;
|
os << ", pC=" << tp.m_transScore << ", c=" << tp.m_fullScore;
|
||||||
if (tp.PrintAlignmentInfo())
|
|
||||||
os << ", " << tp.GetAlignmentPair();
|
|
||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,7 +25,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
#include "TypeDef.h"
|
#include "TypeDef.h"
|
||||||
#include "Phrase.h"
|
#include "Phrase.h"
|
||||||
#include "ScoreComponentCollection.h"
|
#include "ScoreComponentCollection.h"
|
||||||
#include "AlignmentPair.h"
|
|
||||||
#if HAVE_CONFIG_H
|
#if HAVE_CONFIG_H
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#endif
|
#endif
|
||||||
@ -50,7 +49,6 @@ protected:
|
|||||||
float m_transScore, m_ngramScore, m_fullScore;
|
float m_transScore, m_ngramScore, m_fullScore;
|
||||||
//float m_ngramScore, m_fullScore;
|
//float m_ngramScore, m_fullScore;
|
||||||
ScoreComponentCollection m_scoreBreakdown;
|
ScoreComponentCollection m_scoreBreakdown;
|
||||||
AlignmentPair m_alignmentPair;
|
|
||||||
|
|
||||||
// in case of confusion net, ptr to source phrase
|
// in case of confusion net, ptr to source phrase
|
||||||
Phrase const* m_sourcePhrase;
|
Phrase const* m_sourcePhrase;
|
||||||
@ -135,20 +133,9 @@ public:
|
|||||||
{
|
{
|
||||||
return m_sourcePhrase;
|
return m_sourcePhrase;
|
||||||
}
|
}
|
||||||
AlignmentPair &GetAlignmentPair()
|
|
||||||
{
|
|
||||||
return m_alignmentPair;
|
|
||||||
}
|
|
||||||
const AlignmentPair &GetAlignmentPair() const
|
|
||||||
{
|
|
||||||
return m_alignmentPair;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Parse the alignment info portion of phrase table string to create alignment info */
|
|
||||||
void CreateAlignmentInfo(const std::string &sourceStr
|
|
||||||
, const std::string &targetStr);
|
|
||||||
void CreateAlignmentInfo(const WordAlignments &swa
|
|
||||||
, const WordAlignments &twa);
|
|
||||||
|
|
||||||
void UseWordAlignment(bool a){
|
void UseWordAlignment(bool a){
|
||||||
wordalignflag=a;
|
wordalignflag=a;
|
||||||
|
@ -30,7 +30,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|||||||
#include "Util.h"
|
#include "Util.h"
|
||||||
#include "TypeDef.h"
|
#include "TypeDef.h"
|
||||||
#include "ScoreComponentCollection.h"
|
#include "ScoreComponentCollection.h"
|
||||||
#include "AlignmentPair.h"
|
|
||||||
#include "StaticData.h"
|
#include "StaticData.h"
|
||||||
|
|
||||||
namespace Moses
|
namespace Moses
|
||||||
|
@ -264,14 +264,14 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,s
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
targetPhrase.CreateAlignmentInfo("(0)","(0)");
|
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// drop source word. create blank trans opt
|
// drop source word. create blank trans opt
|
||||||
|
|
||||||
targetPhrase.SetAlignment();
|
//targetPhrase.SetAlignment();
|
||||||
|
|
||||||
}
|
}
|
||||||
transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos + length - 1), targetPhrase, m_source, 0);
|
transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos + length - 1), targetPhrase, m_source, 0);
|
||||||
|
@ -32,15 +32,15 @@ fi
|
|||||||
|
|
||||||
|
|
||||||
echo "Calling $ACLOCAL..."
|
echo "Calling $ACLOCAL..."
|
||||||
$ACLOCAL || die "aclocal failed"
|
$ACLOCAL -I m4 || die "aclocal failed"
|
||||||
echo "Calling $AUTOCONF..."
|
echo "Calling $AUTOCONF..."
|
||||||
$AUTOCONF || die "autoconf failed"
|
$AUTOCONF || die "autoconf failed"
|
||||||
echo "Calling $AUTOMAKE..."
|
echo "Calling $AUTOMAKE..."
|
||||||
$AUTOMAKE || die "automake failed"
|
$AUTOMAKE || die "automake failed"
|
||||||
|
|
||||||
echo
|
echo
|
||||||
echo "You should now be able to configure and build:"
|
echo "You should now be able to configure and build:"
|
||||||
echo " ./configure [--with-srilm=/path/to/srilm] [--with-irstlm=/path/to/irstlm] [--with-randlm=/path/to/randlm]"
|
echo " ./configure [--with-srilm=/path/to/srilm] [--with-irstlm=/path/to/irstlm] [--with-randlm=/path/to/randlm] [--with-xmlrpc-c=/path/to/xmlrpc-c-config"
|
||||||
echo " make -j 4"
|
echo " make -j 4"
|
||||||
echo
|
echo
|
||||||
|
|
||||||
|
5
server/Makefile.am
Normal file
5
server/Makefile.am
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
bin_PROGRAMS = mosesserver
|
||||||
|
mosesserver_SOURCES = mosesserver.cpp
|
||||||
|
mosesserver_CPPFLAGS = -W -Wall -I$(top_srcdir)/moses/src $(XMLRPC_C_CPPFLAGS) $(BOOST_CPPFLAGS)
|
||||||
|
mosesserver_LDADD = -L$(top_srcdir)/moses/src -lmoses $(BOOST_LDFLAGS) $(XMLRPC_C_LIBS) $(BOOST_THREAD_LIB)
|
||||||
|
mosesserver_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
|
23
server/client.perl
Executable file
23
server/client.perl
Executable file
@ -0,0 +1,23 @@
|
|||||||
|
#!/usr/bin/env perl
|
||||||
|
|
||||||
|
use XMLRPC::Lite;
|
||||||
|
|
||||||
|
$url = "http://localhost:9084/RPC2";
|
||||||
|
$proxy = XMLRPC::Lite->proxy($url);
|
||||||
|
|
||||||
|
#my %param = ("text" => "das ist ein haus das ist ein haus das ist ein haus");
|
||||||
|
#my %param = ("text" => "je ne sais pas . ");
|
||||||
|
#my %param = ("text" => "actes pris en application des traités ce euratom dont la publication est obligatoire");
|
||||||
|
#my %param = ("text" => "actes pris en application des " );
|
||||||
|
#my %param = ("text" => "je ne sais pas . ", "align" => "true");
|
||||||
|
my %param = ("text" => "hello !");
|
||||||
|
$result = $proxy->call("translate",\%param)->result;
|
||||||
|
print $result->{'text'} . "\n";
|
||||||
|
if ($result->{'align'}) {
|
||||||
|
print "Phrase alignments: \n";
|
||||||
|
$aligns = $result->{'align'};
|
||||||
|
foreach my $align (@$aligns) {
|
||||||
|
print $align->{'tgt-start'} . "," . $align->{'src-start'} . ","
|
||||||
|
. $align->{'src-end'} . "\n";
|
||||||
|
}
|
||||||
|
}
|
163
server/mosesserver.cpp
Normal file
163
server/mosesserver.cpp
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
#include <cassert>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include <xmlrpc-c/base.hpp>
|
||||||
|
#include <xmlrpc-c/registry.hpp>
|
||||||
|
#include <xmlrpc-c/server_abyss.hpp>
|
||||||
|
|
||||||
|
#include "Hypothesis.h"
|
||||||
|
#include "Manager.h"
|
||||||
|
#include "StaticData.h"
|
||||||
|
|
||||||
|
|
||||||
|
using namespace Moses;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
|
class Translator : public xmlrpc_c::method {
|
||||||
|
public:
|
||||||
|
Translator() {
|
||||||
|
// signature and help strings are documentation -- the client
|
||||||
|
// can query this information with a system.methodSignature and
|
||||||
|
// system.methodHelp RPC.
|
||||||
|
this->_signature = "S:S";
|
||||||
|
this->_help = "Does translation";
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||||
|
|
||||||
|
void
|
||||||
|
execute(xmlrpc_c::paramList const& paramList,
|
||||||
|
xmlrpc_c::value * const retvalP) {
|
||||||
|
|
||||||
|
const params_t params = paramList.getStruct(0);
|
||||||
|
paramList.verifyEnd(1);
|
||||||
|
params_t::const_iterator si = params.find("text");
|
||||||
|
if (si == params.end()) {
|
||||||
|
throw xmlrpc_c::fault(
|
||||||
|
"Missing source text",
|
||||||
|
xmlrpc_c::fault::CODE_PARSE);
|
||||||
|
}
|
||||||
|
const string source(
|
||||||
|
(xmlrpc_c::value_string(si->second)));
|
||||||
|
|
||||||
|
cerr << "Input: " << source << endl;
|
||||||
|
si = params.find("align");
|
||||||
|
bool addAlignInfo = (si != params.end());
|
||||||
|
|
||||||
|
const StaticData &staticData = StaticData::Instance();
|
||||||
|
Sentence sentence(Input);
|
||||||
|
const vector<FactorType> &inputFactorOrder =
|
||||||
|
staticData.GetInputFactorOrder();
|
||||||
|
stringstream in(source + "\n");
|
||||||
|
sentence.Read(in,inputFactorOrder);
|
||||||
|
Manager manager(sentence,staticData.GetSearchAlgorithm());
|
||||||
|
manager.ProcessSentence();
|
||||||
|
const Hypothesis* hypo = manager.GetBestHypothesis();
|
||||||
|
|
||||||
|
vector<xmlrpc_c::value> alignInfo;
|
||||||
|
stringstream out;
|
||||||
|
outputHypo(out,hypo,addAlignInfo,alignInfo);
|
||||||
|
|
||||||
|
map<string, xmlrpc_c::value> retData;
|
||||||
|
pair<string, xmlrpc_c::value>
|
||||||
|
text("text", xmlrpc_c::value_string(out.str()));
|
||||||
|
cerr << "Output: " << out.str() << endl;
|
||||||
|
if (addAlignInfo) {
|
||||||
|
retData.insert(pair<string, xmlrpc_c::value>("align", xmlrpc_c::value_array(alignInfo)));
|
||||||
|
}
|
||||||
|
retData.insert(text);
|
||||||
|
|
||||||
|
*retvalP = xmlrpc_c::value_struct(retData);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void outputHypo(ostream& out, const Hypothesis* hypo, bool addAlignmentInfo, vector<xmlrpc_c::value>& alignInfo) {
|
||||||
|
if (hypo->GetPrevHypo() != NULL) {
|
||||||
|
outputHypo(out,hypo->GetPrevHypo(),addAlignmentInfo, alignInfo);
|
||||||
|
TargetPhrase p = hypo->GetTargetPhrase();
|
||||||
|
for (size_t pos = 0 ; pos < p.GetSize() ; pos++)
|
||||||
|
{
|
||||||
|
const Factor *factor = p.GetFactor(pos, 0);
|
||||||
|
out << *factor << " ";
|
||||||
|
|
||||||
|
}
|
||||||
|
if (addAlignmentInfo) {
|
||||||
|
/**
|
||||||
|
* Add the alignment info to the array. This is in target order and consists of
|
||||||
|
* (tgt-start, src-start, src-end) triples.
|
||||||
|
**/
|
||||||
|
map<string, xmlrpc_c::value> phraseAlignInfo;
|
||||||
|
phraseAlignInfo["tgt-start"] = xmlrpc_c::value_int(hypo->GetCurrTargetWordsRange().GetStartPos());
|
||||||
|
phraseAlignInfo["src-start"] = xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetStartPos());
|
||||||
|
phraseAlignInfo["src-end"] = xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetEndPos());
|
||||||
|
alignInfo.push_back(xmlrpc_c::value_struct(phraseAlignInfo));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
|
||||||
|
//Extract port and log, send other args to moses
|
||||||
|
char** mosesargv = new char*[argc+2];
|
||||||
|
int mosesargc = 0;
|
||||||
|
int port = 8080;
|
||||||
|
const char* logfile = "/dev/null";
|
||||||
|
|
||||||
|
for (int i = 0; i < argc; ++i) {
|
||||||
|
if (!strcmp(argv[i],"--server-port")) {
|
||||||
|
++i;
|
||||||
|
if (i >= argc) {
|
||||||
|
cerr << "Error: Missing argument to --server-port" << endl;
|
||||||
|
exit(1);
|
||||||
|
} else {
|
||||||
|
port = atoi(argv[i]);
|
||||||
|
}
|
||||||
|
} else if (!strcmp(argv[i],"--server-log")) {
|
||||||
|
++i;
|
||||||
|
if (i >= argc) {
|
||||||
|
cerr << "Error: Missing argument to --server-log" << endl;
|
||||||
|
exit(1);
|
||||||
|
} else {
|
||||||
|
logfile = argv[i];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
mosesargv[mosesargc] = new char[strlen(argv[i])+1];
|
||||||
|
strcpy(mosesargv[mosesargc],argv[i]);
|
||||||
|
++mosesargc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Parameter* params = new Parameter();
|
||||||
|
if (!params->LoadParam(mosesargc,mosesargv)) {
|
||||||
|
params->Explain();
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (!StaticData::LoadDataStatic(params)) {
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
xmlrpc_c::registry myRegistry;
|
||||||
|
|
||||||
|
xmlrpc_c::methodPtr const translator(new Translator);
|
||||||
|
|
||||||
|
myRegistry.addMethod("translate", translator);
|
||||||
|
|
||||||
|
xmlrpc_c::serverAbyss myAbyssServer(
|
||||||
|
myRegistry,
|
||||||
|
port, // TCP port on which to listen
|
||||||
|
logfile
|
||||||
|
);
|
||||||
|
|
||||||
|
cerr << "Listening on port " << port << endl;
|
||||||
|
myAbyssServer.run();
|
||||||
|
// xmlrpc_c::serverAbyss.run() never returns
|
||||||
|
assert(false);
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user