Merge moses-server branch (includes mt moses) into trunk.

Plain (single-thread) moses should configure and build as before.
Multi-thread and server only available if appropriate options are selected
at configure/compile time.


git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@2477 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
bhaddow 2009-08-07 16:47:54 +00:00
parent f75e3993ac
commit c5d39f039f
76 changed files with 4742 additions and 1447 deletions

View File

@ -6,4 +6,7 @@ AUTOMAKE_OPTIONS = foreign
if WITH_MERT
MERT = mert
endif
SUBDIRS = moses/src moses-cmd/src misc $(MERT)
if WITH_SERVER
SERVER = server
endif
SUBDIRS = moses/src moses-cmd/src misc $(MERT) $(SERVER)

1463
config.guess vendored Executable file

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,11 @@
/* config.h.in. Generated from configure.in by autoheader. */
/* define if the Boost library is available */
#undef HAVE_BOOST
/* define if the Boost::Thread library is available */
#undef HAVE_BOOST_THREAD
/* Define to 1 if you have the <getopt.h> header file. */
#undef HAVE_GETOPT_H

1579
config.sub vendored Executable file

File diff suppressed because it is too large Load Diff

View File

@ -8,6 +8,8 @@ AC_LANG_CPLUSPLUS
AC_PROG_RANLIB
#AM_PROG_LIBTOOL
AX_XMLRPC_C
AC_ARG_WITH(protobuf,
[AC_HELP_STRING([--with-protobuf=PATH], [(optional) path to Google protobuf])],
[with_protobuf=$withval],
@ -42,11 +44,29 @@ AC_ARG_ENABLE(optimization,
[CPPFLAGS="$CPPFLAGS -O3"; LDFLAGS="$LDFLAGS -O3" ]
)
AC_ARG_ENABLE(threads,
[AC_HELP_STRING([--enable-threads], [compile threadsafe library and multi-threaded moses (mosesmt)])],
[with_threads=yes]
)
AM_CONDITIONAL([INTERNAL_LM], false)
AM_CONDITIONAL([SRI_LM], false)
AM_CONDITIONAL([IRST_LM], false)
AM_CONDITIONAL([RAND_LM], false)
AM_CONDITIONAL([PROTOBUF], false)
AM_CONDITIONAL([am__fastdepCC], false)
AM_CONDITIONAL([WITH_THREADS],false)
if test "x$with_threads" = 'xyes'
then
AC_MSG_NOTICE([Building threaded moses])
AX_BOOST_BASE([1.35.0])
AX_BOOST_THREAD
CPPFLAGS="$CPPFLAGS -DWITH_THREADS"
AM_CONDITIONAL([WITH_THREADS],true)
else
AC_MSG_NOTICE([Building non-threaded moses. This will disable the moses server])
fi
if test "x$with_protobuf" != 'xno'
then
@ -136,9 +156,16 @@ AC_CHECK_HEADERS([getopt.h],
[AM_CONDITIONAL([WITH_MERT],true)],
[AC_MSG_WARN([Cannot find getopt.h - disabling new mert])])
AM_CONDITIONAL([WITH_SERVER],false)
if test "x$have_xmlrpc_c" = "xyes" && test "x$with_threads" = "xyes"; then
AM_CONDITIONAL([WITH_SERVER],true)
else
AC_MSG_NOTICE([Disabling server])
fi
LIBS="$LIBS -lz"
AC_CONFIG_FILES(Makefile moses/src/Makefile moses-cmd/src/Makefile misc/Makefile mert/Makefile)
AC_CONFIG_FILES(Makefile moses/src/Makefile moses-cmd/src/Makefile misc/Makefile mert/Makefile server/Makefile)
AC_OUTPUT()

223
m4/ax_boost_base.m4 Normal file
View File

@ -0,0 +1,223 @@
# ===========================================================================
# http://autoconf-archive.cryp.to/ax_boost_base.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_BOOST_BASE([MINIMUM-VERSION])
#
# DESCRIPTION
#
# Test for the Boost C++ libraries of a particular version (or newer)
#
# If no path to the installed boost library is given the macro searchs
# under /usr, /usr/local, /opt and /opt/local and evaluates the
# $BOOST_ROOT environment variable. Further documentation is available at
# <http://randspringer.de/boost/index.html>.
#
# This macro calls:
#
# AC_SUBST(BOOST_CPPFLAGS) / AC_SUBST(BOOST_LDFLAGS)
#
# And sets:
#
# HAVE_BOOST
#
# LAST MODIFICATION
#
# 2008-04-12
#
# COPYLEFT
#
# Copyright (c) 2008 Thomas Porschberg <thomas@randspringer.de>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved.
AC_DEFUN([AX_BOOST_BASE],
[
AC_ARG_WITH([boost],
AS_HELP_STRING([--with-boost@<:@=DIR@:>@], [use boost (default is yes) - it is possible to specify the root directory for boost (optional)]),
[
if test "$withval" = "no"; then
want_boost="no"
elif test "$withval" = "yes"; then
want_boost="yes"
ac_boost_path=""
else
want_boost="yes"
ac_boost_path="$withval"
fi
],
[want_boost="yes"])
AC_ARG_WITH([boost-libdir],
AS_HELP_STRING([--with-boost-libdir=LIB_DIR],
[Force given directory for boost libraries. Note that this will overwrite library path detection, so use this parameter only if default library detection fails and you know exactly where your boost libraries are located.]),
[
if test -d $withval
then
ac_boost_lib_path="$withval"
else
AC_MSG_ERROR(--with-boost-libdir expected directory name)
fi
],
[ac_boost_lib_path=""]
)
if test "x$want_boost" = "xyes"; then
boost_lib_version_req=ifelse([$1], ,1.20.0,$1)
boost_lib_version_req_shorten=`expr $boost_lib_version_req : '\([[0-9]]*\.[[0-9]]*\)'`
boost_lib_version_req_major=`expr $boost_lib_version_req : '\([[0-9]]*\)'`
boost_lib_version_req_minor=`expr $boost_lib_version_req : '[[0-9]]*\.\([[0-9]]*\)'`
boost_lib_version_req_sub_minor=`expr $boost_lib_version_req : '[[0-9]]*\.[[0-9]]*\.\([[0-9]]*\)'`
if test "x$boost_lib_version_req_sub_minor" = "x" ; then
boost_lib_version_req_sub_minor="0"
fi
WANT_BOOST_VERSION=`expr $boost_lib_version_req_major \* 100000 \+ $boost_lib_version_req_minor \* 100 \+ $boost_lib_version_req_sub_minor`
AC_MSG_CHECKING(for boostlib >= $boost_lib_version_req)
succeeded=no
dnl first we check the system location for boost libraries
dnl this location ist chosen if boost libraries are installed with the --layout=system option
dnl or if you install boost with RPM
if test "$ac_boost_path" != ""; then
BOOST_LDFLAGS="-L$ac_boost_path/lib"
BOOST_CPPFLAGS="-I$ac_boost_path/include"
else
for ac_boost_path_tmp in /usr /usr/local /opt /opt/local ; do
if test -d "$ac_boost_path_tmp/include/boost" && test -r "$ac_boost_path_tmp/include/boost"; then
BOOST_LDFLAGS="-L$ac_boost_path_tmp/lib"
BOOST_CPPFLAGS="-I$ac_boost_path_tmp/include"
break;
fi
done
fi
dnl overwrite ld flags if we have required special directory with
dnl --with-boost-libdir parameter
if test "$ac_boost_lib_path" != ""; then
BOOST_LDFLAGS="-L$ac_boost_lib_path"
fi
CPPFLAGS_SAVED="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
export CPPFLAGS
LDFLAGS_SAVED="$LDFLAGS"
LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
export LDFLAGS
AC_LANG_PUSH(C++)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
@%:@include <boost/version.hpp>
]], [[
#if BOOST_VERSION >= $WANT_BOOST_VERSION
// Everything is okay
#else
# error Boost version is too old
#endif
]])],[
AC_MSG_RESULT(yes)
succeeded=yes
found_system=yes
],[
])
AC_LANG_POP([C++])
dnl if we found no boost with system layout we search for boost libraries
dnl built and installed without the --layout=system option or for a staged(not installed) version
if test "x$succeeded" != "xyes"; then
_version=0
if test "$ac_boost_path" != ""; then
if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then
for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do
_version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'`
V_CHECK=`expr $_version_tmp \> $_version`
if test "$V_CHECK" = "1" ; then
_version=$_version_tmp
fi
VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
BOOST_CPPFLAGS="-I$ac_boost_path/include/boost-$VERSION_UNDERSCORE"
done
fi
else
for ac_boost_path in /usr /usr/local /opt /opt/local ; do
if test -d "$ac_boost_path" && test -r "$ac_boost_path"; then
for i in `ls -d $ac_boost_path/include/boost-* 2>/dev/null`; do
_version_tmp=`echo $i | sed "s#$ac_boost_path##" | sed 's/\/include\/boost-//' | sed 's/_/./'`
V_CHECK=`expr $_version_tmp \> $_version`
if test "$V_CHECK" = "1" ; then
_version=$_version_tmp
best_path=$ac_boost_path
fi
done
fi
done
VERSION_UNDERSCORE=`echo $_version | sed 's/\./_/'`
BOOST_CPPFLAGS="-I$best_path/include/boost-$VERSION_UNDERSCORE"
if test "$ac_boost_lib_path" = ""
then
BOOST_LDFLAGS="-L$best_path/lib"
fi
if test "x$BOOST_ROOT" != "x"; then
if test -d "$BOOST_ROOT" && test -r "$BOOST_ROOT" && test -d "$BOOST_ROOT/stage/lib" && test -r "$BOOST_ROOT/stage/lib"; then
version_dir=`expr //$BOOST_ROOT : '.*/\(.*\)'`
stage_version=`echo $version_dir | sed 's/boost_//' | sed 's/_/./g'`
stage_version_shorten=`expr $stage_version : '\([[0-9]]*\.[[0-9]]*\)'`
V_CHECK=`expr $stage_version_shorten \>\= $_version`
if test "$V_CHECK" = "1" -a "$ac_boost_lib_path" = "" ; then
AC_MSG_NOTICE(We will use a staged boost library from $BOOST_ROOT)
BOOST_CPPFLAGS="-I$BOOST_ROOT"
BOOST_LDFLAGS="-L$BOOST_ROOT/stage/lib"
fi
fi
fi
fi
CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
export CPPFLAGS
LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
export LDFLAGS
AC_LANG_PUSH(C++)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
@%:@include <boost/version.hpp>
]], [[
#if BOOST_VERSION >= $WANT_BOOST_VERSION
// Everything is okay
#else
# error Boost version is too old
#endif
]])],[
AC_MSG_RESULT(yes)
succeeded=yes
found_system=yes
],[
])
AC_LANG_POP([C++])
fi
if test "$succeeded" != "yes" ; then
if test "$_version" = "0" ; then
AC_MSG_ERROR([[We could not detect the boost libraries (version $boost_lib_version_req_shorten or higher). If you have a staged boost library (still not installed) please specify \$BOOST_ROOT in your environment and do not give a PATH to --with-boost option. If you are sure you have boost installed, then check your version number looking in <boost/version.hpp>. See http://randspringer.de/boost for more documentation.]])
else
AC_MSG_NOTICE([Your boost libraries seems to old (version $_version).])
fi
else
AC_SUBST(BOOST_CPPFLAGS)
AC_SUBST(BOOST_LDFLAGS)
AC_DEFINE(HAVE_BOOST,,[define if the Boost library is available])
fi
CPPFLAGS="$CPPFLAGS_SAVED"
LDFLAGS="$LDFLAGS_SAVED"
fi
])

143
m4/ax_boost_thread.m4 Normal file
View File

@ -0,0 +1,143 @@
# ===========================================================================
# http://www.nongnu.org/autoconf-archive/ax_boost_thread.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_BOOST_THREAD
#
# DESCRIPTION
#
# Test for Thread library from the Boost C++ libraries. The macro requires
# a preceding call to AX_BOOST_BASE. Further documentation is available at
# <http://randspringer.de/boost/index.html>.
#
# This macro calls:
#
# AC_SUBST(BOOST_THREAD_LIB)
#
# And sets:
#
# HAVE_BOOST_THREAD
#
# LICENSE
#
# Copyright (c) 2009 Thomas Porschberg <thomas@randspringer.de>
# Copyright (c) 2009 Michael Tindal
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved.
AC_DEFUN([AX_BOOST_THREAD],
[
AC_ARG_WITH([boost-thread],
AS_HELP_STRING([--with-boost-thread@<:@=special-lib@:>@],
[use the Thread library from boost - it is possible to specify a certain library for the linker
e.g. --with-boost-thread=boost_thread-gcc-mt ]),
[
if test "$withval" = "no"; then
want_boost="no"
elif test "$withval" = "yes"; then
want_boost="yes"
ax_boost_user_thread_lib=""
else
want_boost="yes"
ax_boost_user_thread_lib="$withval"
fi
],
[want_boost="yes"]
)
if test "x$want_boost" = "xyes"; then
AC_REQUIRE([AC_PROG_CC])
AC_REQUIRE([AC_CANONICAL_BUILD])
CPPFLAGS_SAVED="$CPPFLAGS"
CPPFLAGS="$CPPFLAGS $BOOST_CPPFLAGS"
export CPPFLAGS
LDFLAGS_SAVED="$LDFLAGS"
LDFLAGS="$LDFLAGS $BOOST_LDFLAGS"
export LDFLAGS
AC_CACHE_CHECK(whether the Boost::Thread library is available,
ax_cv_boost_thread,
[AC_LANG_PUSH([C++])
CXXFLAGS_SAVE=$CXXFLAGS
if test "x$build_os" = "xsolaris" ; then
CXXFLAGS="-pthreads $CXXFLAGS"
elif test "x$build_os" = "xming32" ; then
CXXFLAGS="-mthreads $CXXFLAGS"
else
CXXFLAGS="-pthread $CXXFLAGS"
fi
AC_COMPILE_IFELSE(AC_LANG_PROGRAM([[@%:@include <boost/thread/thread.hpp>]],
[[boost::thread_group thrds;
return 0;]]),
ax_cv_boost_thread=yes, ax_cv_boost_thread=no)
CXXFLAGS=$CXXFLAGS_SAVE
AC_LANG_POP([C++])
])
if test "x$ax_cv_boost_thread" = "xyes"; then
if test "x$build_os" = "xsolaris" ; then
BOOST_CPPFLAGS="-pthreads $BOOST_CPPFLAGS"
elif test "x$build_os" = "xming32" ; then
BOOST_CPPFLAGS="-mthreads $BOOST_CPPFLAGS"
else
BOOST_CPPFLAGS="-pthread $BOOST_CPPFLAGS"
fi
AC_SUBST(BOOST_CPPFLAGS)
AC_DEFINE(HAVE_BOOST_THREAD,,[define if the Boost::Thread library is available])
BOOSTLIBDIR=`echo $BOOST_LDFLAGS | sed -e 's/@<:@^\/@:>@*//'`
LDFLAGS_SAVE=$LDFLAGS
case "x$build_os" in
*bsd* )
LDFLAGS="-pthread $LDFLAGS"
break;
;;
esac
if test "x$ax_boost_user_thread_lib" = "x"; then
for libextension in `ls $BOOSTLIBDIR/libboost_thread*.so* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_thread.*\)\.so.*$;\1;'` `ls $BOOSTLIBDIR/libboost_thread*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^lib\(boost_thread.*\)\.a*$;\1;'`; do
ax_lib=${libextension}
AC_CHECK_LIB($ax_lib, exit,
[BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break],
[link_thread="no"])
done
if test "x$link_thread" != "xyes"; then
for libextension in `ls $BOOSTLIBDIR/boost_thread*.dll* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_thread.*\)\.dll.*$;\1;'` `ls $BOOSTLIBDIR/boost_thread*.a* 2>/dev/null | sed 's,.*/,,' | sed -e 's;^\(boost_thread.*\)\.a*$;\1;'` ; do
ax_lib=${libextension}
AC_CHECK_LIB($ax_lib, exit,
[BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break],
[link_thread="no"])
done
fi
else
for ax_lib in $ax_boost_user_thread_lib boost_thread-$ax_boost_user_thread_lib; do
AC_CHECK_LIB($ax_lib, exit,
[BOOST_THREAD_LIB="-l$ax_lib"; AC_SUBST(BOOST_THREAD_LIB) link_thread="yes"; break],
[link_thread="no"])
done
fi
if test "x$link_thread" = "xno"; then
AC_MSG_ERROR(Could not link against $ax_lib !)
else
case "x$build_os" in
*bsd* )
BOOST_LDFLAGS="-pthread $BOOST_LDFLAGS"
break;
;;
esac
fi
fi
CPPFLAGS="$CPPFLAGS_SAVED"
LDFLAGS="$LDFLAGS_SAVED"
fi
])

52
m4/ax_xmlrpc_c.m4 Normal file
View File

@ -0,0 +1,52 @@
AC_DEFUN([AX_XMLRPC_C], [
AC_MSG_CHECKING(for XMLRPC-C)
AC_ARG_WITH(xmlrpc-c,
[ --with-xmlrpc-c=PATH Enable XMLRPC-C support.],
[
if test "$withval" = "no"; then
AC_MSG_RESULT(no)
else
if test "$withval" = "yes"; then
xmlrpc_cc_prg="xmlrpc-c-config"
else
xmlrpc_cc_prg="$withval"
fi
if eval $xmlrpc_cc_prg --version 2>/dev/null >/dev/null; then
XMLRPC_C_CPPFLAGS=`$xmlrpc_cc_prg --cflags c++2 abyss-server`
XMLRPC_C_LIBS=`$xmlrpc_cc_prg c++2 abyss-server --libs`
CXXFLAGS_SAVED=$CXXFLAGS
CXXFLAGS="$CXXFLAGS $XMLRPC_C_CPPFLAGS"
LIBS_SAVED=$LIBS
LIBS="$LIBS $XMLRPC_C_LIBS"
AC_TRY_LINK(
[ #include <xmlrpc-c/server.h>
],[ xmlrpc_registry_new(NULL); ],
[
AC_MSG_RESULT(ok)
], [
AC_MSG_RESULT(failed)
AC_MSG_ERROR(Could not compile XMLRPC-C test.)
])
dnl AC_DEFINE(HAVE_XMLRPC_C, 1, Support for XMLRPC-C.)
have_xmlrpc_c=yes
AC_SUBST(XMLRPC_C_LIBS)
AC_SUBST(XMLRPC_C_CPPFLAGS)
LIBS=$LIBS_SAVED
CXXFLAGS=$CXXFLAGS_SAVED
else
AC_MSG_RESULT(failed)
AC_MSG_ERROR(Could not compile XMLRPC-C test.)
fi
fi
],[
AC_MSG_RESULT(ignored)
])
])

View File

@ -4,13 +4,13 @@ processPhraseTable_SOURCES = GenerateTuples.cpp processPhraseTable.cpp
processLexicalTable_SOURCES = processLexicalTable.cpp
queryLexicalTable_SOURCES = queryLexicalTable.cpp
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -I$(top_srcdir)/moses/src
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS)
processPhraseTable_LDADD = -L$(top_srcdir)/moses/src -lmoses
processPhraseTable_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
processPhraseTable_LDADD = -L$(top_srcdir)/moses/src -lmoses $(BOOST_LDFLAGS) $(BOOST_THREAD_LIB)
processPhraseTable_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
processLexicalTable_LDADD = -L$(top_srcdir)/moses/src -lmoses
processLexicalTable_LDADD = -L$(top_srcdir)/moses/src -lmoses $(BOOST_LDFLAGS) $(BOOST_THREAD_LIB)
processLexicalTable_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
queryLexicalTable_LDADD = -L$(top_srcdir)/moses/src -lmoses
queryLexicalTable_LDADD = -L$(top_srcdir)/moses/src -lmoses $(BOOST_LDFLAGS) $(BOOST_THREAD_LIB)
queryLexicalTable_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a

View File

@ -210,47 +210,9 @@ void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<
}
}
void OutputWordAlignment(std::ostream &out, const TargetPhrase &phrase, size_t srcoffset, size_t trgoffset, FactorDirection direction)
{
size_t size = phrase.GetSize();
if (size){
out << " ";
/* out << phrase;
out << " ===> offset: (" << srcoffset << "," << trgoffset << ")";
out << " ===> size: (" << phrase.GetAlignmentPair().GetAlignmentPhrase(Input).GetSize() << ","
<< phrase.GetAlignmentPair().GetAlignmentPhrase(Output).GetSize() << ") ===> ";
*/
AlignmentPhrase alignphrase=phrase.GetAlignmentPair().GetAlignmentPhrase(direction);
/* alignphrase.print(out,0);
out << " ===> ";
// out << alignphrase << " ===> ";
*/
if (direction == Input){
alignphrase.Shift(trgoffset);
alignphrase.print(out,srcoffset);
}
else{
alignphrase.Shift(srcoffset);
alignphrase.print(out,trgoffset);
}
/*
// out << alignphrase << " ===> ";
out << "\n";
*/
}
}
void OutputWordAlignment(std::ostream &out, const Hypothesis *hypo, FactorDirection direction)
{
size_t srcoffset, trgoffset;
if ( hypo != NULL)
{
srcoffset=hypo->GetCurrSourceWordsRange().GetStartPos();
trgoffset=hypo->GetCurrTargetWordsRange().GetStartPos();
OutputWordAlignment(out, hypo->GetPrevHypo(),direction);
OutputWordAlignment(out, hypo->GetCurrTargetPhrase(), srcoffset, trgoffset, direction);
}
}
void IOWrapper::Backtrack(const Hypothesis *hypo){
@ -282,7 +244,7 @@ void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
void OutputInput(std::ostream& os, const Hypothesis* hypo)
{
size_t len = StaticData::Instance().GetInput()->GetSize();
size_t len = hypo->GetInput().GetSize();
std::vector<const Phrase*> inp_phrases(len, 0);
OutputInput(inp_phrases, hypo);
for (size_t i=0; i<len; ++i)
@ -411,11 +373,11 @@ void IOWrapper::OutputNBestList(const TrellisPathList &nBestList, long translati
// translation components
if (StaticData::Instance().GetInputType()==SentenceInput){
// translation components for text input
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
vector<PhraseDictionaryFeature*> pds = StaticData::Instance().GetPhraseDictionaries();
if (pds.size() > 0) {
if (labeledOutput)
*m_nBestStream << " tm:";
vector<PhraseDictionary*>::iterator iter;
vector<PhraseDictionaryFeature*>::iterator iter;
for (iter = pds.begin(); iter != pds.end(); ++iter) {
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
for (size_t j = 0; j<scores.size(); ++j)
@ -427,9 +389,9 @@ void IOWrapper::OutputNBestList(const TrellisPathList &nBestList, long translati
// translation components for Confusion Network input
// first translation component has GetNumInputScores() scores from the input Confusion Network
// at the beginning of the vector
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
vector<PhraseDictionaryFeature*> pds = StaticData::Instance().GetPhraseDictionaries();
if (pds.size() > 0) {
vector<PhraseDictionary*>::iterator iter;
vector<PhraseDictionaryFeature*>::iterator iter;
iter = pds.begin();
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
@ -496,25 +458,7 @@ void IOWrapper::OutputNBestList(const TrellisPathList &nBestList, long translati
}
if (includeWordAlignment){
//word-to-word alignment (source-to-target)
*m_nBestStream << " |||";
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
{
const Hypothesis &edge = *edges[currEdge];
WordsRange targetRange = path.GetTargetWordsRange(edge);
OutputWordAlignment(*m_nBestStream, edge.GetCurrTargetPhrase(),edge.GetCurrSourceWordsRange().GetStartPos(),targetRange.GetStartPos(), Input);
}
//word-to-word alignment (target-to-source)
*m_nBestStream << " |||";
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
{
const Hypothesis &edge = *edges[currEdge];
WordsRange targetRange = path.GetTargetWordsRange(edge);
OutputWordAlignment(*m_nBestStream, edge.GetCurrTargetPhrase(),edge.GetCurrSourceWordsRange().GetStartPos(),targetRange.GetStartPos(), Output);
}
}
*m_nBestStream << endl;
}
@ -522,3 +466,51 @@ void IOWrapper::OutputNBestList(const TrellisPathList &nBestList, long translati
*m_nBestStream<<std::flush;
}
bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source)
{
delete source;
switch(inputType)
{
case SentenceInput: source = ioWrapper.GetInput(new Sentence(Input)); break;
case ConfusionNetworkInput: source = ioWrapper.GetInput(new ConfusionNet); break;
case WordLatticeInput: source = ioWrapper.GetInput(new WordLattice); break;
default: TRACE_ERR("Unknown input type: " << inputType << "\n");
}
return (source ? true : false);
}
IOWrapper *GetIODevice(const StaticData &staticData)
{
IOWrapper *ioWrapper;
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
,&outputFactorOrder = staticData.GetOutputFactorOrder();
FactorMask inputFactorUsed(inputFactorOrder);
// io
if (staticData.GetParam("input-file").size() == 1)
{
VERBOSE(2,"IO from File" << endl);
string filePath = staticData.GetParam("input-file")[0];
ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
, staticData.GetNBestSize()
, staticData.GetNBestFilePath()
, filePath);
}
else
{
VERBOSE(1,"IO from STDOUT/STDIN" << endl);
ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
, staticData.GetNBestSize()
, staticData.GetNBestFilePath());
}
ioWrapper->ResetTranslationId();
IFVERBOSE(1)
PrintUserTime("Created input-output object");
return ioWrapper;
}

View File

@ -44,6 +44,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "TrellisPathList.h"
#include "InputFileStream.h"
#include "InputType.h"
#include "WordLattice.h"
class IOWrapper
{
@ -98,3 +99,7 @@ public:
return *m_outputSearchGraphStream;
}
};
IOWrapper *GetIODevice(const Moses::StaticData &staticData);
bool ReadInput(IOWrapper &ioWrapper, Moses::InputTypeEnum inputType, Moses::InputType*& source);
void OutputSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder ,bool reportSegmentation, bool reportAllFactors);

View File

@ -61,25 +61,14 @@ POSSIBILITY OF SUCH DAMAGE.
#include "hypergraph.pb.h"
#endif
using namespace std;
using namespace Moses;
bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source)
{
delete source;
switch(inputType)
{
case SentenceInput: source = ioWrapper.GetInput(new Sentence(Input)); break;
case ConfusionNetworkInput: source = ioWrapper.GetInput(new ConfusionNet); break;
case WordLatticeInput: source = ioWrapper.GetInput(new WordLattice); break;
default: TRACE_ERR("Unknown input type: " << inputType << "\n");
}
return (source ? true : false);
}
int main(int argc, char* argv[])
{
#ifdef HAVE_PROTOBUF
GOOGLE_PROTOBUF_VERIFY_VERSION;
#endif
@ -223,35 +212,4 @@ int main(int argc, char* argv[])
#endif
}
IOWrapper *GetIODevice(const StaticData &staticData)
{
IOWrapper *ioWrapper;
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
,&outputFactorOrder = staticData.GetOutputFactorOrder();
FactorMask inputFactorUsed(inputFactorOrder);
// io
if (staticData.GetParam("input-file").size() == 1)
{
VERBOSE(2,"IO from File" << endl);
string filePath = staticData.GetParam("input-file")[0];
ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
, staticData.GetNBestSize()
, staticData.GetNBestFilePath()
, filePath);
}
else
{
VERBOSE(1,"IO from STDOUT/STDIN" << endl);
ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
, staticData.GetNBestSize()
, staticData.GetNBestFilePath());
}
ioWrapper->ResetTranslationId();
IFVERBOSE(1)
PrintUserTime("Created input-output object");
return ioWrapper;
}

View File

@ -39,4 +39,3 @@ POSSIBILITY OF SUCH DAMAGE.
class IOWrapper;
int main(int argc, char* argv[]);
IOWrapper *GetIODevice(const Moses::StaticData &staticData);

216
moses-cmd/src/MainMT.cpp Normal file
View File

@ -0,0 +1,216 @@
// $Id: $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
/**
* Main for multithreaded moses.
**/
#include <sstream>
#include <vector>
#include <boost/thread/mutex.hpp>
#if defined(BOOST_HAS_PTHREADS)
#include <pthread.h>
#endif
#include "Hypothesis.h"
#include "IOWrapper.h"
#include "Manager.h"
#include "StaticData.h"
#include "ThreadPool.h"
#include "Util.h"
#include "mbr.h"
using namespace std;
using namespace Moses;
/**
* Makes sure output goes in the correct order.
**/
class OutputCollector {
public:
OutputCollector() :
m_nextOutput(0) {}
void Write(int sourceId, Manager& manager) {
//create the output string
//Note that this is copied from Main.cpp. Some refactoring
//could remove the duplicate code.
const StaticData& staticData = StaticData::Instance();
ostringstream out;
if (!staticData.UseMBR()) {
const Hypothesis* hypo = manager.GetBestHypothesis();
if (hypo) {
OutputSurface(
out,
hypo,
staticData.GetOutputFactorOrder(),
staticData.GetReportSegmentation(),
staticData.GetReportAllFactors());
}
out << endl;
} else {
//MBR decoding
size_t nBestSize = staticData.GetMBRSize();
if (nBestSize <= 0) {
cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
exit(1);
} else {
TrellisPathList nBestList;
manager.CalcNBest(nBestSize, nBestList,true);
VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl);
IFVERBOSE(2) { PrintUserTime("calculated n-best list for MBR decoding"); }
vector<const Factor*> mbrBestHypo = doMBR(nBestList);
for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
const Factor *factor = mbrBestHypo[i];
if (i>0) out << " ";
out << factor->GetString();
}
out << endl;
IFVERBOSE(2) { PrintUserTime("finished MBR decoding"); }
}
}
Write(sourceId,out.str());
}
private:
/**
* Write or cache the output, as appropriate.
**/
void Write(int sourceId,const string& output) {
boost::mutex::scoped_lock lock(m_mutex);
if (sourceId == m_nextOutput) {
//This is the one we were expecting
cout << output;
++m_nextOutput;
//see if there's any more
map<int,string>::iterator iter;
while ((iter = m_outputs.find(m_nextOutput)) != m_outputs.end()) {
cout << iter->second;
m_outputs.erase(iter);
++m_nextOutput;
}
} else {
//save for later
m_outputs[sourceId] = output;
}
}
map<int,string> m_outputs;
int m_nextOutput;
boost::mutex m_mutex;
};
/**
* Translates a sentence.
**/
class TranslationTask : public Task {
public:
TranslationTask(size_t lineNumber,
InputType* source, OutputCollector& outputCollector) :
m_source(source), m_lineNumber(lineNumber),
m_outputCollector(outputCollector) {}
void Run() {
#if defined(BOOST_HAS_PTHREADS)
TRACE_ERR("Translating line " << m_lineNumber << " in thread id " << (int)pthread_self() << std::endl);
#endif
const StaticData &staticData = StaticData::Instance();
Sentence sentence(Input);
const vector<FactorType> &inputFactorOrder =
staticData.GetInputFactorOrder();
Manager manager(*m_source,staticData.GetSearchAlgorithm());
manager.ProcessSentence();
m_outputCollector.Write(m_lineNumber,manager);
}
~TranslationTask() {delete m_source;}
private:
InputType* m_source;
size_t m_lineNumber;
OutputCollector& m_outputCollector;
};
int main(int argc, char** argv) {
//extract pool-size args, send others to moses
char** mosesargv = new char*[argc+2];
int mosesargc = 0;
int threadcount = 10;
for (int i = 0; i < argc; ++i) {
if (!strcmp(argv[i], "-threads")) {
++i;
if (i >= argc) {
cerr << "Error: Missing argument to -threads" << endl;
exit(1);
} else {
threadcount = atoi(argv[i]);
}
} else {
mosesargv[mosesargc] = new char[strlen(argv[i])+1];
strcpy(mosesargv[mosesargc],argv[i]);
++mosesargc;
}
}
if (threadcount <= 0) {
cerr << "Error: Must specify a positive number of threads" << endl;
exit(1);
}
Parameter* params = new Parameter();
if (!params->LoadParam(mosesargc,mosesargv)) {
params->Explain();
exit(1);
}
if (!StaticData::LoadDataStatic(params)) {
exit(1);
}
const StaticData& staticData = StaticData::Instance();
IOWrapper* ioWrapper = GetIODevice(staticData);
if (!ioWrapper) {
cerr << "Error; Failed to create IO object" << endl;
exit(1);
}
ThreadPool pool(threadcount);
InputType* source = NULL;
size_t lineCount = 0;
OutputCollector outputCollector;
while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) {
TranslationTask* task =
new TranslationTask(lineCount,source, outputCollector);
pool.Submit(task);
source = NULL; //make sure it doesn't get deleted
++lineCount;
}
pool.Stop(true); //flush remaining jobs
return 0;
}

View File

@ -1,7 +1,16 @@
bin_PROGRAMS = moses
if WITH_THREADS
bin_PROGRAMS = moses mosesmt
else
bin_PROGRAMS = moses
endif
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src $(BOOST_CPPFLAGS)
moses_SOURCES = Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src
moses_LDADD = -L$(top_srcdir)/moses/src -lmoses $(BOOST_LDFLAGS) $(BOOST_THREAD_LIB)
moses_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
mosesmt_SOURCES = MainMT.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp ThreadPool.cpp
mosesmt_LDADD = -L$(top_srcdir)/moses/src $(BOOST_LDFLAGS) -lmoses $(BOOST_THREAD_LIB)
mosesmt_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
moses_LDADD = -L$(top_srcdir)/moses/src -lmoses
moses_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a

View File

@ -0,0 +1,95 @@
// $Id: $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "ThreadPool.h"
using namespace std;
using namespace Moses;
Moses::ThreadPool::ThreadPool( size_t numThreads )
: m_stopped(false), m_stopping(false)
{
for (size_t i = 0; i < numThreads; ++i) {
m_threads.create_thread(boost::bind(&ThreadPool::Execute,this));
}
}
void Moses::ThreadPool::Execute()
{
do {
Task* task = NULL;
{ // Find a job to perform
boost::mutex::scoped_lock lock(m_mutex);
if (m_tasks.empty() && !m_stopped) {
m_threadNeeded.wait(lock);
}
if (!m_stopped && !m_tasks.empty()) {
task = m_tasks.front();
m_tasks.pop();
}
}
//Execute job
if (task) {
task->Run();
delete task;
}
m_threadAvailable.notify_all();
} while (!m_stopped);
TRACE_ERR("Thread " << (int)pthread_self() << " exiting" << endl);
}
void Moses::ThreadPool::Submit( Task* task )
{
boost::mutex::scoped_lock lock(m_mutex);
if (m_stopping) {
throw runtime_error("ThreadPool stopping - unable to accept new jobs");
}
m_tasks.push(task);
m_threadNeeded.notify_all();
}
void Moses::ThreadPool::Stop(bool processRemainingJobs)
{
{
//prevent more jobs from being added to the queue
boost::mutex::scoped_lock lock(m_mutex);
if (m_stopped) return;
m_stopping = true;
}
if (processRemainingJobs) {
boost::mutex::scoped_lock lock(m_mutex);
//wait for queue to drain.
while (!m_tasks.empty() && !m_stopped) {
m_threadAvailable.wait(lock);
}
}
//tell all threads to stop
{
boost::mutex::scoped_lock lock(m_mutex);
m_stopped = true;
}
m_threadNeeded.notify_all();
cerr << m_threads.size() << endl;
m_threads.join_all();
}

107
moses-cmd/src/ThreadPool.h Normal file
View File

@ -0,0 +1,107 @@
// $Id: $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <iostream>
#include <queue>
#include <vector>
#include <boost/bind.hpp>
#include <boost/thread.hpp>
#include "Util.h"
/**
* Classes to implement a ThreadPool.
**/
namespace Moses {
/**
* A task to be executed by the ThreadPool
**/
class Task {
public:
virtual void Run() = 0;
virtual ~Task() {}
};
class ThreadPool {
public:
/**
* Construct a thread pool of a fixed size.
**/
ThreadPool(size_t numThreads);
/**
* Add a job to the threadpool.
**/
void Submit(Task* task);
/**
* Wait until all queued jobs have completed, and shut down
* the ThreadPool.
**/
void Stop(bool processRemainingJobs = false);
~ThreadPool() { Stop(); }
private:
/**
* The main loop executed by each thread.
**/
void Execute();
std::queue<Task*> m_tasks;
boost::thread_group m_threads;
boost::mutex m_mutex;
boost::condition_variable m_threadNeeded;
boost::condition_variable m_threadAvailable;
bool m_stopped;
bool m_stopping;
};
#include <pthread.h>
class TestTask : public Task {
public:
TestTask(int id) : m_id(id) {}
virtual void Run() {
int tid = (int)pthread_self();
std::cerr << "Executing " << m_id << " in thread id " << tid << std::endl;
}
virtual ~TestTask() {}
private:
int m_id;
};
}

View File

@ -1,102 +0,0 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <algorithm>
#include "AlignmentElement.h"
using namespace std;
namespace Moses
{
AlignmentElement::AlignmentElement(const ContainerType &alignInfo)
{
insert_iterator<ContainerType> insertIter( m_collection, m_collection.end() );
copy(alignInfo.begin(), alignInfo.end(), insertIter);
};
AlignmentElement::AlignmentElement(const vector<AlignmentElementType> &alignInfo)
{
insert_iterator<ContainerType> insertIter( m_collection, m_collection.end() );
copy(alignInfo.begin(), alignInfo.end(), insertIter);
};
AlignmentElement::AlignmentElement(const AlignmentElement &alignInfo)
{
insert_iterator<ContainerType> insertIter( m_collection, m_collection.end() );
copy(alignInfo.begin(), alignInfo.end(), insertIter);
};
AlignmentElement& AlignmentElement::operator=(const AlignmentElement& alignInfo)
{
insert_iterator<ContainerType> insertIter( m_collection, m_collection.end() );
copy(alignInfo.begin(), alignInfo.end(), insertIter);
return *this;
}
void AlignmentElement::Shift(int shift)
{
ContainerType newColl;
ContainerType::const_iterator iter;
for (iter = m_collection.begin() ; iter != m_collection.end() ; ++iter){
if (*iter!=-1) newColl.insert(*iter + shift);
else newColl.insert(*iter);
}
m_collection = newColl;
}
std::ostream& operator<<(std::ostream& out, const AlignmentElement &alignElement)
{
const AlignmentElement::ContainerType &elemSet = alignElement.GetCollection();
// out << "(";
if (elemSet.size() > 0)
{
AlignmentElement::ContainerType::const_iterator iter = elemSet.begin();
out << *iter;
for (++iter ; iter != elemSet.end() ; ++iter)
out << "," << *iter;
}
// out << ")";
return out;
}
void AlignmentElement::SetIntersect(const AlignmentElement &otherElement)
{
ContainerType newElement;
set_intersection(m_collection.begin() , m_collection.end()
,otherElement.begin() , otherElement.end()
,inserter(newElement , newElement.begin()) );
m_collection = newElement;
}
void AlignmentElement::SetUniformAlignment(size_t otherPhraseSize)
{
for (size_t pos = 0 ; pos < otherPhraseSize ; ++pos)
m_collection.insert(pos);
}
TO_STRING_BODY(AlignmentElement);
}

View File

@ -1,110 +0,0 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by th e Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <iostream>
#include <set>
#include <vector>
#include "Util.h"
namespace Moses
{
typedef short int AlignmentElementType;
//! set of alignments of 1 word
class AlignmentElement
{
friend std::ostream& operator<<(std::ostream& out, const AlignmentElement &alignElement);
protected:
typedef std::set<AlignmentElementType> ContainerType;
ContainerType m_collection;
public:
typedef ContainerType::iterator iterator;
typedef ContainerType::const_iterator const_iterator;
const_iterator begin() const { return m_collection.begin(); }
const_iterator end() const { return m_collection.end(); }
AlignmentElement(){};
~AlignmentElement(){};
//! inital constructor from parsed info from phrase table
AlignmentElement(const ContainerType &alignInfo);
AlignmentElement(const std::vector<AlignmentElementType> &alignInfo);
AlignmentElement(const AlignmentElement &alignInfo);
AlignmentElement& operator=(const AlignmentElement &copy);
//! number of words this element aligns to
size_t GetSize() const
{
return m_collection.size();
}
bool IsEmpty() const
{
return m_collection.empty();
}
//! return internal collection of elements
const ContainerType &GetCollection() const
{
return m_collection;
}
/** compare all alignments for this word.
* Return true iff both words are aligned to the same words
*/
bool Equals(const AlignmentElement &compare) const
{
return m_collection == compare.GetCollection();
}
/** used by the unknown word handler.
* Set alignment to 0
*/
void SetIdentityAlignment()
{
m_collection.insert(0);
}
/** align to all elements on other side, where the size of the other
* phrase is otherPhraseSize. Used when element has no alignment info
*/
void SetUniformAlignment(size_t otherPhraseSize);
/** set intersect with other element. Used when applying trans opt to a hypo
*/
void SetIntersect(const AlignmentElement &otherElement);
void Add(size_t pos)
{
m_collection.insert(pos);
}
// shift alignment so that it is comparitable to another alignment.
void Shift(int shift);
TO_STRING();
};
}

View File

@ -1,102 +0,0 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "AlignmentPair.h"
#include "AlignmentPhrase.h"
#include "WordsRange.h"
using namespace std;
namespace Moses
{
AlignmentPhraseInserter AlignmentPair::GetInserter(FactorDirection direction)
{
return (direction == Input) ? back_insert_iterator<AlignmentPhrase::CollectionType>(m_sourceAlign.GetVector())
: back_insert_iterator<AlignmentPhrase::CollectionType>(m_targetAlign.GetVector());
}
void AlignmentPair::SetIdentityAlignment()
{
AlignmentElement alignment;
alignment.SetIdentityAlignment();
m_sourceAlign.Add(alignment);
m_targetAlign.Add(alignment);
}
bool AlignmentPair::IsCompatible(const AlignmentPair &compare
, size_t sourceStart
, size_t targetStart) const
{
// source
bool ret = GetAlignmentPhrase(Input).IsCompatible(
compare.GetAlignmentPhrase(Input)
, sourceStart
, targetStart);
if (!ret)
return false;
// target
return GetAlignmentPhrase(Output).IsCompatible(
compare.GetAlignmentPhrase(Output)
, targetStart
, sourceStart);
}
void AlignmentPair::Add(const AlignmentPair &newAlignment
, const WordsRange &sourceRange
, const WordsRange &targetRange)
{
m_sourceAlign.Add(newAlignment.m_sourceAlign
, targetRange.GetStartPos()
, sourceRange.GetStartPos());
m_targetAlign.Add(newAlignment.m_targetAlign
, sourceRange.GetStartPos()
, targetRange.GetStartPos());
}
void AlignmentPair::Merge(const AlignmentPair &newAlignment, const WordsRange &sourceRange, const WordsRange &targetRange)
{
m_sourceAlign.Merge(newAlignment.m_sourceAlign
, targetRange.GetStartPos()
, sourceRange.GetStartPos());
m_targetAlign.Merge(newAlignment.m_targetAlign
, sourceRange.GetStartPos()
, targetRange.GetStartPos());
}
TO_STRING_BODY(AlignmentPair);
std::ostream& operator<<(std::ostream &out, const AlignmentPair &alignmentPair)
{
// out << "f2e: " << alignmentPair.m_sourceAlign << ""
// << " , e2f: " << alignmentPair.m_targetAlign << " ";
out << "f2e: ";
alignmentPair.m_sourceAlign.print(out);
out << " , e2f: ";
alignmentPair.m_targetAlign.print(out);
out << " ";
return out;
}
}

View File

@ -1,112 +0,0 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <iostream>
#include <vector>
#include <iterator>
#include "TypeDef.h"
#include "Util.h"
#include "AlignmentPhrase.h"
namespace Moses
{
typedef std::back_insert_iterator<AlignmentPhrase::CollectionType> AlignmentPhraseInserter;
/** represent the alignment info between source and target phrase */
class AlignmentPair
{
friend std::ostream& operator<<(std::ostream&, const AlignmentPair&);
protected:
AlignmentPhrase m_sourceAlign, m_targetAlign;
public:
// constructor
AlignmentPair()
{}
// constructor, init source size. used in hypo
AlignmentPair(size_t sourceSize)
:m_sourceAlign(sourceSize)
{}
// constructor, by copy
AlignmentPair(const AlignmentPair& a){
m_sourceAlign=a.GetAlignmentPhrase(Input);
m_targetAlign=a.GetAlignmentPhrase(Output);
};
// constructor, by copy
AlignmentPair(const AlignmentPhrase& a, const AlignmentPhrase& b){
SetAlignmentPhrase(a,b);
};
~AlignmentPair(){};
/** get the back_insert_iterator to the source or target alignment vector so that
* they could be populated
*/
AlignmentPhraseInserter GetInserter(FactorDirection direction);
const AlignmentPhrase &GetAlignmentPhrase(FactorDirection direction) const
{
return (direction == Input) ? m_sourceAlign : m_targetAlign;
}
AlignmentPhrase &GetAlignmentPhrase(FactorDirection direction)
{
return (direction == Input) ? m_sourceAlign : m_targetAlign;
}
void SetAlignmentPhrase(FactorDirection direction, const AlignmentPhrase& a)
{
if (direction == Input) m_sourceAlign=a;
else m_targetAlign=a;
}
void SetAlignmentPhrase(const AlignmentPhrase& a, const AlignmentPhrase& b)
{
m_sourceAlign=a;
m_targetAlign=b;
}
/** used by the unknown word handler.
* Set alignment to 0
*/
void SetIdentityAlignment();
//! call Merge() for source and and Add() target alignment phrase
void Add(const AlignmentPair &newAlignment, const WordsRange &sourceRange, const WordsRange &targetRange);
//! call Merge for both source and target alignment phrase
void Merge(const AlignmentPair &newAlignment, const WordsRange &sourceRange, const WordsRange &targetRange);
bool IsCompatible(const AlignmentPair &compare
, size_t sourceStart
, size_t targetStart) const;
TO_STRING();
};
}

View File

@ -1,233 +0,0 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "AlignmentPhrase.h"
#include "WordsRange.h"
#include "WordsBitmap.h"
#include "UserMessage.h"
using namespace std;
namespace Moses
{
void EmptyAlignment(string &Align, size_t Size)
{
Align = " ";
for (size_t pos = 0 ; pos < Size ; ++pos)
Align += "() ";
}
void UniformAlignment(string &Align, size_t fSize, size_t eSize)
{
std::stringstream AlignStream;
for (size_t fpos = 0 ; fpos < fSize ; ++fpos){
AlignStream << "(";
for (size_t epos = 0 ; epos < eSize ; ++epos){
if (epos) AlignStream << ",";
AlignStream << epos;
}
AlignStream << ") ";
}
Align = AlignStream.str();
}
AlignmentPhrase::AlignmentPhrase(const AlignmentPhrase &copy)
: m_collection(copy.m_collection.size())
{
for (size_t pos = 0 ; pos < copy.m_collection.size() ; ++pos)
{
if (copy.Exists(pos))
m_collection[pos] = new AlignmentElement(copy.GetElement(pos));
else
m_collection[pos] = NULL;
}
}
AlignmentPhrase& AlignmentPhrase::operator=(const AlignmentPhrase &copy)
{
m_collection.resize(copy.GetSize());
// m_collection=AlignmentPhrase(copy.GetSize());
for (size_t pos = 0 ; pos < copy.GetSize() ; ++pos)
{
if (copy.Exists(pos))
m_collection[pos] = new AlignmentElement(copy.GetElement(pos));
else
m_collection[pos] = NULL;
}
return *this;
}
AlignmentPhrase::AlignmentPhrase(size_t size)
:m_collection(size)
{
for (size_t pos = 0 ; pos < size ; ++pos)
{
m_collection[pos] = NULL;
}
}
AlignmentPhrase::~AlignmentPhrase()
{
RemoveAllInColl(m_collection);
}
bool AlignmentPhrase::IsCompatible(const AlignmentPhrase &compare, size_t mergePosStart, size_t shiftPos) const
{
const size_t compareSize = min(GetSize() - mergePosStart , compare.GetSize());
size_t posThis = mergePosStart;
for (size_t posCompare = 0 ; posCompare < compareSize ; ++posCompare)
{
if (!Exists(posThis))
continue;
assert(posThis < GetSize());
const AlignmentElement &alignThis = GetElement(posThis);
AlignmentElement alignCompare = compare.GetElement(posCompare);
// shift alignment
alignCompare.Shift( (int)shiftPos);
if (!alignThis.Equals(alignCompare))
return false;
posThis++;
}
return true;
}
void AlignmentPhrase::Add(const AlignmentPhrase &newAlignment, size_t shift, size_t startPos)
{
size_t insertPos = startPos;
for (size_t pos = 0 ; pos < newAlignment.GetSize() ; ++pos)
{
// shift alignment
AlignmentElement alignElement = newAlignment.GetElement(pos);
alignElement.Shift( (int)shift );
if (insertPos >= GetSize())
{ // probably doing target. append alignment to end
assert(insertPos == GetSize());
Add(alignElement);
}
else
{
if (Exists(insertPos))
{ // add
m_collection[insertPos]->SetIntersect(alignElement);
}
else
m_collection[insertPos] = new AlignmentElement(alignElement);
}
insertPos++;
}
}
void AlignmentPhrase::Shift(size_t shift)
{
for (size_t pos = 0 ; pos < GetSize() ; ++pos)
{
// shift alignment
GetElement(pos).Shift( (int)shift );
}
}
void AlignmentPhrase::Merge(const AlignmentPhrase &newAlignment, size_t shift, size_t startPos)
{
assert(startPos < GetSize());
size_t insertPos = startPos;
for (size_t pos = 0 ; pos < newAlignment.GetSize() ; ++pos)
{
// shift alignment
AlignmentElement alignElement = newAlignment.GetElement(pos);
alignElement.Shift( (int)shift );
// merge elements to only contain co-joined elements
GetElement(insertPos).SetIntersect(alignElement);
insertPos++;
}
}
void AlignmentPhrase::AddUniformAlignmentElement(std::list<size_t> &uniformAlignmentTarget)
{
list<size_t>::iterator iter;
for (iter = uniformAlignmentTarget.begin() ; iter != uniformAlignmentTarget.end() ; ++iter)
{
for (size_t pos = 0 ; pos < GetSize() ; ++pos)
{
AlignmentElement &alignElement = GetElement(pos);
alignElement.Add(*iter);
}
}
}
std::ostream& operator<<(std::ostream& out, const AlignmentPhrase &alignmentPhrase)
{
for (size_t pos = 0 ; pos < alignmentPhrase.GetSize() ; ++pos)
{
if (alignmentPhrase.Exists(pos))
{
if (pos) out << " ";
const AlignmentElement &alignElement = alignmentPhrase.GetElement(pos);
out << alignElement;
}
else{
stringstream strme;
strme << "No alignment at position " << pos;
UserMessage::Add(strme.str());
abort();
}
}
return out;
}
void AlignmentPhrase::print(std::ostream& out, size_t offset) const
{
for (size_t pos = 0 ; pos < GetSize() ; ++pos)
{
if (Exists(pos))
{
if (pos) out << " ";
out << pos+offset << "=";
const AlignmentElement &alignElement = GetElement(pos);
out << alignElement;
}
else{
stringstream strme;
strme << "No alignment at position " << pos;
UserMessage::Add(strme.str());
abort();
// out << pos+offset << "=";
}
}
}
TO_STRING_BODY(AlignmentPhrase);
}

View File

@ -1,108 +0,0 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <iostream>
#include <vector>
#include "AlignmentElement.h"
#include "Util.h"
namespace Moses
{
void EmptyAlignment(std::string &Align, size_t Size);
void UniformAlignment(std::string &Align, size_t fSize, size_t eSize);
class WordsRange;
class WordsBitmap;
//! alignments of each word in a phrase
class AlignmentPhrase
{
friend std::ostream& operator<<(std::ostream& out, const AlignmentPhrase &alignmentPhrase);
public:
typedef std::vector<AlignmentElement*> CollectionType;
protected:
CollectionType m_collection;
public:
AlignmentPhrase(){};
AlignmentPhrase(size_t size);
/** copy constructor */
AlignmentPhrase(const AlignmentPhrase &copy);
AlignmentPhrase& operator=(const AlignmentPhrase&);
/** destructor */
~AlignmentPhrase();
/** compare with another alignment phrase, return true if the other alignment phrase is a
* subset of this. Used to see whether a trans opt can be used to expand a hypo
*/
bool IsCompatible(const AlignmentPhrase &compare, size_t mergePosStart, size_t shiftPos) const;
//! add newAlignment to end of this alignment phrase, offsetting by newAlignmentRange.GetStartPos()
void Add(const AlignmentPhrase &newAlignment, size_t shift, size_t startPos);
/*< merge newAlignment to this alignment phrase, offsetting by newAlignmentRange.GetStartPos().
Use intersection of each alignment element
*/
void Merge(const AlignmentPhrase &newAlignment, size_t shift, size_t startPos);
void Shift(size_t shift);
size_t GetSize() const
{
return m_collection.size();
}
CollectionType &GetVector()
{
return m_collection;
}
void Add(const AlignmentElement &element)
{
m_collection.push_back(new AlignmentElement(element));
}
// add elements which didn't have alignments, so are set to uniform on the other side
void AddUniformAlignmentElement(std::list<size_t> &uniformAlignmentTarget);
AlignmentElement &GetElement(size_t pos)
{ return *m_collection[pos]; }
const AlignmentElement &GetElement(size_t pos) const
{ return *m_collection[pos]; }
bool Exists(size_t pos) const
{
return m_collection[pos] != NULL;
}
void print(std::ostream& out, size_t offset=0) const;
TO_STRING();
};
}

View File

@ -57,22 +57,27 @@ class HypothesisScoreOrdererNoDistortion
class HypothesisScoreOrdererWithDistortion
{
public:
static const WordsRange *transOptRange; // TODO. HACK!!
HypothesisScoreOrdererWithDistortion(const WordsRange* transOptRange) :
m_transOptRange(transOptRange) {}
const WordsRange* m_transOptRange;
bool operator()(const Hypothesis* hypoA, const Hypothesis* hypoB) const
{
assert (transOptRange != NULL);
assert (m_transOptRange != NULL);
const float weightDistortion = StaticData::Instance().GetWeightDistortion();
const DistortionScoreProducer *dsp = StaticData::Instance().GetDistortionScoreProducer();
const float distortionScoreA = dsp->CalculateDistortionScore(
*hypoA,
hypoA->GetCurrSourceWordsRange(),
*transOptRange,
*m_transOptRange,
hypoA->GetWordsBitmap().GetFirstGapPos()
);
const float distortionScoreB = dsp->CalculateDistortionScore(
*hypoB,
hypoB->GetCurrSourceWordsRange(),
*transOptRange,
*m_transOptRange,
hypoB->GetWordsBitmap().GetFirstGapPos()
);
@ -95,8 +100,6 @@ class HypothesisScoreOrdererWithDistortion
};
const WordsRange *HypothesisScoreOrdererWithDistortion::transOptRange = NULL;
////////////////////////////////////////////////////////////////////////////////
// BackwardsEdge Code
////////////////////////////////////////////////////////////////////////////////
@ -104,7 +107,8 @@ const WordsRange *HypothesisScoreOrdererWithDistortion::transOptRange = NULL;
BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
, BitmapContainer &parent
, const TranslationOptionList &translations
, const SquareMatrix &futureScore)
, const SquareMatrix &futureScore,
const InputType& itype)
: m_initialized(false)
, m_prevBitmapContainer(prevBitmapContainer)
, m_parent(parent)
@ -131,7 +135,6 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
}
const WordsRange &transOptRange = translations.Get(0)->GetSourceWordsRange();
const InputType *itype = StaticData::Instance().GetInput();
HypothesisSet::const_iterator iterHypo = m_prevBitmapContainer.GetHypotheses().begin();
HypothesisSet::const_iterator iterEnd = m_prevBitmapContainer.GetHypotheses().end();
@ -149,7 +152,7 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
}
else
{
int distortionDistance = itype->ComputeDistortionDistance(hypo.GetCurrSourceWordsRange()
int distortionDistance = itype.ComputeDistortionDistance(hypo.GetCurrSourceWordsRange()
, transOptRange);
if (distortionDistance <= maxDistortion)
@ -169,8 +172,8 @@ BackwardsEdge::BackwardsEdge(const BitmapContainer &prevBitmapContainer
assert(m_hypotheses[0]->GetTotalScore() >= m_hypotheses[1]->GetTotalScore());
}
HypothesisScoreOrdererWithDistortion::transOptRange = &transOptRange;
std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrdererWithDistortion());
HypothesisScoreOrdererWithDistortion orderer (&transOptRange);
std::sort(m_hypotheses.begin(), m_hypotheses.end(), orderer);
// std::sort(m_hypotheses.begin(), m_hypotheses.end(), HypothesisScoreOrdererNoDistortion());
}

View File

@ -185,7 +185,8 @@ class BackwardsEdge
BackwardsEdge(const BitmapContainer &prevBitmapContainer
, BitmapContainer &parent
, const TranslationOptionList &translations
, const SquareMatrix &futureScore);
, const SquareMatrix &futureScore,
const InputType& source);
~BackwardsEdge();
bool GetInitialized();

View File

@ -35,14 +35,24 @@ class DecodeGraph
{
protected:
std::list<const DecodeStep*> m_steps;
size_t m_position;
public:
/**
* position: The position of this graph within the decode sequence.
**/
DecodeGraph(size_t position): m_position(position) {}
//! iterators
typedef std::list<const DecodeStep*>::iterator iterator;
typedef std::list<const DecodeStep*>::const_iterator const_iterator;
const_iterator begin() const { return m_steps.begin(); }
const_iterator end() const { return m_steps.end(); }
size_t GetPosition() const
{
return m_position;
}
~DecodeGraph();
//! Add another decode step to the graph

View File

@ -29,14 +29,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
DecodeStepTranslation::DecodeStepTranslation(PhraseDictionary* dict, const DecodeStep* prev)
: DecodeStep(dict, prev)
: DecodeStep(dict, prev), m_phraseDictionary(dict)
{
}
const PhraseDictionary &DecodeStepTranslation::GetPhraseDictionary() const
/*const PhraseDictionary &DecodeStepTranslation::GetPhraseDictionary() const
{
return *static_cast<const PhraseDictionary*>(m_ptr);
}
return *m_phraseDictionary;
}*/
TranslationOption *DecodeStepTranslation::MergeTranslation(const TranslationOption& oldTO, const TargetPhrase &targetPhrase) const
{
@ -102,11 +102,10 @@ void DecodeStepTranslation::ProcessInitialTranslation(
,PartialTranslOptColl &outputPartialTranslOptColl
, size_t startPos, size_t endPos, bool adhereTableLimit) const
{
const PhraseDictionary &phraseDictionary = GetPhraseDictionary();
const size_t tableLimit = phraseDictionary.GetTableLimit();
const size_t tableLimit = m_phraseDictionary->GetTableLimit();
const WordsRange wordsRange(startPos, endPos);
const TargetPhraseCollection *phraseColl = phraseDictionary.GetTargetPhraseCollection(source,wordsRange);
const TargetPhraseCollection *phraseColl = m_phraseDictionary->GetTargetPhraseCollection(source,wordsRange);
if (phraseColl != NULL)
{

View File

@ -22,6 +22,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#pragma once
#include "DecodeStep.h"
#include "PhraseDictionary.h"
namespace Moses
{
@ -57,6 +58,7 @@ private:
This function runs IsCompatible() to ensure the two can be merged
*/
TranslationOption *MergeTranslation(const TranslationOption& oldTO, const TargetPhrase &targetPhrase) const;
PhraseDictionary* m_phraseDictionary;
};

View File

@ -51,11 +51,12 @@ std::string DistortionScoreProducer::GetScoreProducerWeightShortName() const
return "d";
}
float DistortionScoreProducer::CalculateDistortionScore(const WordsRange &prev, const WordsRange &curr, const int FirstGap) const
float DistortionScoreProducer::CalculateDistortionScore(const Hypothesis& hypo,
const WordsRange &prev, const WordsRange &curr, const int FirstGap) const
{
const int USE_OLD = 1;
if (USE_OLD) {
return - (float) StaticData::Instance().GetInput()->ComputeDistortionDistance(prev, curr);
return - (float) hypo.GetInput().ComputeDistortionDistance(prev, curr);
}
// Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
@ -85,6 +86,7 @@ FFState* DistortionScoreProducer::Evaluate(
ScoreComponentCollection* out) const {
const DistortionState_traditional* prev = static_cast<const DistortionState_traditional*>(prev_state);
const float distortionScore = CalculateDistortionScore(
hypo,
prev->range,
hypo.GetCurrSourceWordsRange(),
prev->first_gap);

View File

@ -16,7 +16,8 @@ class DistortionScoreProducer : public StatefulFeatureFunction {
public:
DistortionScoreProducer(ScoreIndexManager &scoreIndexManager);
float CalculateDistortionScore(const WordsRange &prev, const WordsRange &curr, const int FirstGapPosition) const;
float CalculateDistortionScore(const Hypothesis& hypo,
const WordsRange &prev, const WordsRange &curr, const int FirstGapPosition) const;
size_t GetNumScoreComponents() const;
std::string GetScoreProducerDescription() const;

View File

@ -38,7 +38,10 @@ void FactorCollection::LoadVocab(FactorDirection direction, FactorType factorTyp
ifstream inFile(filePath.c_str());
string line;
#ifdef WITH_THREADS
boost::upgrade_lock<boost::shared_mutex> lock(m_accessLock);
boost::upgrade_to_unique_lock<boost::shared_mutex> uniqueLock(lock);
#endif
while( !getline(inFile, line, '\n').eof())
{
vector<string> token = Tokenize( line );
@ -53,6 +56,9 @@ void FactorCollection::LoadVocab(FactorDirection direction, FactorType factorTyp
bool FactorCollection::Exists(FactorDirection direction, FactorType factorType, const string &factorString)
{
#ifdef WITH_THREADS
boost::shared_lock<boost::shared_mutex> lock(m_accessLock);
#endif
// find string id
const string *ptrString=&(*m_factorStringCollection.insert(factorString).first);
@ -67,6 +73,10 @@ const Factor *FactorCollection::AddFactor(FactorDirection direction
, FactorType factorType
, const string &factorString)
{
#ifdef WITH_THREADS
boost::upgrade_lock<boost::shared_mutex> lock(m_accessLock);
boost::upgrade_to_unique_lock<boost::shared_mutex> uniqueLock(lock);
#endif
// find string id
const string *ptrString=&(*m_factorStringCollection.insert(factorString).first);
pair<FactorSet::iterator, bool> ret = m_collection.insert( Factor(direction, factorType, ptrString, m_factorId) );

View File

@ -23,6 +23,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <set>
#include <string>
#ifdef WITH_THREADS
#include <boost/thread/shared_mutex.hpp>
#endif
#include "Factor.h"
namespace Moses
@ -47,6 +52,10 @@ class FactorCollection
protected:
static FactorCollection s_instance;
#ifdef WITH_THREADS
//reader-writer lock
boost::shared_mutex m_accessLock;
#endif
size_t m_factorId; /**< unique, contiguous ids, starting from 0, for each factor */
FactorSet m_collection; /**< collection of all factors */

View File

@ -109,8 +109,8 @@ inline FILE* fOpen(const char* fn,const char* m) {
if(FILE* f=fopen(fn,m))
return f;
else {
assert(false);
UserMessage::Add(std::string("ERROR: could not open file ") + fn + " with mode " + m + "\n");
assert(false);
return NULL;
}
}

View File

@ -34,12 +34,12 @@ public:
operator Ptr () {load();return t;}
const T& operator* () const {load();return *t;}
const Ptr operator->() const {load();return t;}
operator const Ptr () const {load();return t;}
Ptr operator->() const {load();return t;}
operator Ptr () const {load();return t;}
// direct access to pointer, use with care!
Ptr getPtr() {return t;}
const Ptr getPtr() const {return t;}
Ptr getPtr() const {return t;}
operator bool() const {return (f && pos!=InvalidOffT);}

View File

@ -36,6 +36,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "StaticData.h"
#include "InputType.h"
#include "LMList.h"
#include "Manager.h"
#include "hash.h"
using namespace std;
@ -48,9 +49,8 @@ unsigned int Hypothesis::s_HypothesesCreated = 0;
ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
#endif
Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
Hypothesis::Hypothesis(Manager& manager, InputType const& source, const TargetPhrase &emptyTarget)
: m_prevHypo(NULL)
, m_transOpt(NULL)
, m_targetPhrase(emptyTarget)
, m_sourcePhrase(0)
, m_sourceCompleted(source.GetSize())
@ -60,8 +60,10 @@ Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
, m_wordDeleted(false)
, m_ffStates(StaticData::Instance().GetScoreIndexManager().GetStatefulFeatureFunctions().size())
, m_arcList(NULL)
, m_id(0)
, m_alignPair(source.GetSize())
, m_transOpt(NULL)
, m_manager(manager)
, m_id(0)
{ // used for initial seeding of trans process
// initialize scores
//_hash_computed = false;
@ -78,7 +80,6 @@ Hypothesis::Hypothesis(InputType const& source, const TargetPhrase &emptyTarget)
Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
: m_prevHypo(&prevHypo)
, m_targetPhrase(transOpt.GetTargetPhrase())
, m_transOpt(&transOpt)
, m_sourcePhrase(transOpt.GetSourcePhrase())
, m_sourceCompleted (prevHypo.m_sourceCompleted )
, m_sourceInput (prevHypo.m_sourceInput)
@ -88,11 +89,12 @@ Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &tran
, m_wordDeleted(false)
, m_totalScore(0.0f)
, m_futureScore(0.0f)
, m_ffStates(prevHypo.m_ffStates.size())
, m_scoreBreakdown (prevHypo.m_scoreBreakdown)
, m_ffStates(prevHypo.m_ffStates.size())
, m_arcList(NULL)
, m_transOpt(&transOpt)
, m_manager(prevHypo.GetManager())
, m_id(s_HypothesesCreated++)
, m_alignPair(prevHypo.m_alignPair)
{
// assert that we are not extending our hypothesis by retranslating something
// that this hypothesis has already translated!
@ -221,13 +223,13 @@ Hypothesis* Hypothesis::Create(const Hypothesis &prevHypo, const TranslationOpti
* return the subclass of Hypothesis most appropriate to the given target phrase
*/
Hypothesis* Hypothesis::Create(InputType const& m_source, const TargetPhrase &emptyTarget)
Hypothesis* Hypothesis::Create(Manager& manager, InputType const& m_source, const TargetPhrase &emptyTarget)
{
#ifdef USE_HYPO_POOL
Hypothesis *ptr = s_objectPool.getPtr();
return new(ptr) Hypothesis(m_source, emptyTarget);
return new(ptr) Hypothesis(manager, m_source, emptyTarget);
#else
return new Hypothesis(m_source, emptyTarget);
return new Hypothesis(manager, m_source, emptyTarget);
#endif
}
@ -301,7 +303,7 @@ void Hypothesis::CalcScore(const SquareMatrix &futureScore)
// TOTAL
m_totalScore = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore;
IFVERBOSE(2) { staticData.GetSentenceStats().AddTimeOtherScore( clock()-t ); }
IFVERBOSE(2) { m_manager.GetSentenceStats().AddTimeOtherScore( clock()-t ); }
}
/** Calculates the expected score of extending this hypothesis with the
@ -334,7 +336,7 @@ float Hypothesis::CalcExpectedScore( const SquareMatrix &futureScore ) {
// TOTAL
float total = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore + estimatedLMScore;
IFVERBOSE(2) { staticData.GetSentenceStats().AddTimeEstimateScore( clock()-t ); }
IFVERBOSE(2) { m_manager.GetSentenceStats().AddTimeEstimateScore( clock()-t ); }
return total;
}
@ -355,7 +357,7 @@ void Hypothesis::CalcRemainingScore()
// TOTAL
m_totalScore = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore;
IFVERBOSE(2) { StaticData::Instance().GetSentenceStats().AddTimeOtherScore( clock()-t ); }
IFVERBOSE(2) { m_manager.GetSentenceStats().AddTimeOtherScore( clock()-t ); }
}
const Hypothesis* Hypothesis::GetPrevHypo()const{
@ -387,10 +389,7 @@ void Hypothesis::PrintHypothesis() const
TRACE_ERR( "\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()<<": "
<< *m_sourcePhrase <<endl);
TRACE_ERR( "\ttranslated as: "<<(Phrase&) m_targetPhrase<<endl); // <<" => translation cost "<<m_score[ScoreType::PhraseTrans];
if (PrintAlignmentInfo()){
TRACE_ERR( "\tsource-target word alignment: "<< m_targetPhrase.GetAlignmentPair().GetAlignmentPhrase(Input) << endl); // <<" => source to target word-to-word alignment
TRACE_ERR( "\ttarget-source word alignment: "<< m_targetPhrase.GetAlignmentPair().GetAlignmentPhrase(Output) << endl); // <<" => target to source word-to-word alignment
}
if (m_wordDeleted) TRACE_ERR( "\tword deleted"<<endl);
// TRACE_ERR( "\tdistance: "<<GetCurrSourceWordsRange().CalcDistortion(m_prevHypo->GetCurrSourceWordsRange())); // << " => distortion cost "<<(m_score[ScoreType::Distortion]*weightDistortion)<<endl;
// TRACE_ERR( "\tlanguage model cost "); // <<m_score[ScoreType::LanguageModelScore]<<endl;
@ -456,14 +455,7 @@ ostream& operator<<(ostream& out, const Hypothesis& hypothesis)
out << " " << hypothesis.GetScoreBreakdown();
// alignment
if (hypothesis.PrintAlignmentInfo()){
out << " [f2e:";
hypothesis.SourceAlignmentToStream(out);
out << "]";
out << " [e2f:";
hypothesis.TargetAlignmentToStream(out);
out << "]";
}
return out;
}

View File

@ -36,7 +36,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "LexicalReordering.h"
#include "InputType.h"
#include "ObjectPool.h"
#include "AlignmentPair.h"
namespace Moses
{
@ -47,6 +46,7 @@ class TranslationOption;
class WordsRange;
class Hypothesis;
class FFState;
class Manager;
typedef std::vector<Hypothesis*> ArcList;
@ -82,14 +82,14 @@ protected:
std::vector<const FFState*> m_ffStates;
const Hypothesis *m_winningHypo;
ArcList *m_arcList; /*! all arcs that end at the same trellis point as this hypothesis */
AlignmentPair m_alignPair;
const TranslationOption *m_transOpt;
Manager& m_manager;
int m_id; /*! numeric ID of this hypothesis, used for logging */
static unsigned int s_HypothesesCreated; // Statistics: how many hypotheses were created in total
/*! used by initial seeding of the translation process */
Hypothesis(InputType const& source, const TargetPhrase &emptyTarget);
Hypothesis(Manager& manager, InputType const& source, const TargetPhrase &emptyTarget);
/*! used when creating a new hypothesis using a translation option (phrase translation) */
Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt);
@ -104,15 +104,17 @@ public:
/** return the subclass of Hypothesis most appropriate to the given translation option */
static Hypothesis* Create(const Hypothesis &prevHypo, const TranslationOption &transOpt, const Phrase* constraint);
static Hypothesis* Create(const WordsBitmap &initialCoverage);
static Hypothesis* Create(Manager& manager, const WordsBitmap &initialCoverage);
/** return the subclass of Hypothesis most appropriate to the given target phrase */
static Hypothesis* Create(InputType const& source, const TargetPhrase &emptyTarget);
static Hypothesis* Create(Manager& manager, InputType const& source, const TargetPhrase &emptyTarget);
/** return the subclass of Hypothesis most appropriate to the given translation option */
Hypothesis* CreateNext(const TranslationOption &transOpt, const Phrase* constraint) const;
void PrintHypothesis() const;
const InputType& GetInput() const {return m_sourceInput;}
/** return target phrase used to create this hypothesis */
// const Phrase &GetCurrTargetPhrase() const
@ -133,6 +135,11 @@ public:
{
return m_currTargetWordsRange;
}
Manager& GetManager() const
{
return m_manager;
}
/** output length of the translation option used to create this hypothesis */
inline size_t GetCurrTargetLength() const
@ -223,37 +230,9 @@ public:
inline bool PrintAlignmentInfo() const{ return GetCurrTargetPhrase().PrintAlignmentInfo(); }
void SourceAlignmentToStream(std::ostream& out) const
{
if (m_prevHypo != NULL)
{
m_prevHypo->SourceAlignmentToStream(out);
AlignmentPhrase alignSourcePhrase=GetCurrTargetPhrase().GetAlignmentPair().GetAlignmentPhrase(Input);
alignSourcePhrase.Shift(m_currTargetWordsRange.GetStartPos());
out << " ";
/*
out << "\nGetCurrTargetPhrase(): " << GetCurrTargetPhrase();
out << "\nm_currTargetWordsRange: " << m_currTargetWordsRange << "->";
*/
alignSourcePhrase.print(out,m_currSourceWordsRange.GetStartPos());
}
}
void TargetAlignmentToStream(std::ostream& out) const
{
if (m_prevHypo != NULL)
{
m_prevHypo->TargetAlignmentToStream(out);
AlignmentPhrase alignTargetPhrase=GetCurrTargetPhrase().GetAlignmentPair().GetAlignmentPhrase(Output);
alignTargetPhrase.Shift(m_currSourceWordsRange.GetStartPos());
out << " ";
/*
out << "\nGetCurrTargetPhrase(): " << GetCurrTargetPhrase();
out << "\nm_currSourceWordsRange: " << m_currSourceWordsRange << "->";
*/
alignTargetPhrase.print(out,m_currTargetWordsRange.GetStartPos());
}
}
TO_STRING();
@ -283,11 +262,7 @@ public:
//! vector of what source words were aligned to each target
const AlignmentPair &GetAlignmentPair() const
{
return m_alignPair;
}
//! target span that trans opt would populate if applied to this hypo. Used for alignment check
size_t GetNextStartPos(const TranslationOption &transOpt) const;

View File

@ -8,14 +8,19 @@
namespace Moses
{
class Manager;
class HypothesisStack
{
protected:
typedef std::set< Hypothesis*, HypothesisRecombinationOrderer > _HCType;
_HCType m_hypos; /**< contains hypotheses */
Manager& m_manager;
public:
HypothesisStack(Manager& manager): m_manager(manager) {}
typedef _HCType::iterator iterator;
typedef _HCType::const_iterator const_iterator;
//! iterators

View File

@ -26,12 +26,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TypeDef.h"
#include "Util.h"
#include "StaticData.h"
#include "Manager.h"
using namespace std;
namespace Moses
{
HypothesisStackCubePruning::HypothesisStackCubePruning()
HypothesisStackCubePruning::HypothesisStackCubePruning(Manager& manager) :
HypothesisStack(manager)
{
m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
m_bestScore = -std::numeric_limits<float>::infinity();
@ -85,7 +87,7 @@ bool HypothesisStackCubePruning::AddPrune(Hypothesis *hypo)
{
if (hypo->GetTotalScore() < m_worstScore)
{ // too bad for stack. don't bother adding hypo into collection
StaticData::Instance().GetSentenceStats().AddDiscarded();
m_manager.GetSentenceStats().AddDiscarded();
VERBOSE(3,"discarded, too bad for stack" << std::endl);
FREEHYPO(hypo);
return false;
@ -103,7 +105,7 @@ bool HypothesisStackCubePruning::AddPrune(Hypothesis *hypo)
Hypothesis *hypoExisting = *iterExisting;
assert(iterExisting != m_hypos.end());
StaticData::Instance().GetSentenceStats().AddRecombination(*hypo, **iterExisting);
m_manager.GetSentenceStats().AddRecombination(*hypo, **iterExisting);
// found existing hypo with same target ending.
// keep the best 1
@ -187,7 +189,7 @@ void HypothesisStackCubePruning::PruneToSize(size_t newSize)
{
iterator iterRemove = iter++;
Remove(iterRemove);
StaticData::Instance().GetSentenceStats().AddPruning();
m_manager.GetSentenceStats().AddPruning();
}
else
{
@ -273,7 +275,8 @@ void HypothesisStackCubePruning::SetBitmapAccessor(const WordsBitmap &newBitmap
BackwardsEdge *edge = new BackwardsEdge(bitmapContainer
, *bmContainer
, transOptList
, futureScore);
, futureScore,
m_manager.GetSource());
bmContainer->AddBackwardsEdge(edge);
}

View File

@ -33,6 +33,7 @@ namespace Moses
class BitmapContainer;
class TranslationOptionList;
class Manager;
typedef std::map<WordsBitmap, BitmapContainer*> _BMType;
@ -60,7 +61,7 @@ protected:
void RemoveAll();
public:
HypothesisStackCubePruning();
HypothesisStackCubePruning(Manager& manager);
~HypothesisStackCubePruning()
{
RemoveAll();

View File

@ -26,12 +26,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TypeDef.h"
#include "Util.h"
#include "StaticData.h"
#include "Manager.h"
using namespace std;
namespace Moses
{
HypothesisStackNormal::HypothesisStackNormal()
HypothesisStackNormal::HypothesisStackNormal(Manager& manager) :
HypothesisStack(manager)
{
m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
m_bestScore = -std::numeric_limits<float>::infinity();
@ -96,7 +98,7 @@ bool HypothesisStackNormal::AddPrune(Hypothesis *hypo)
&& ! ( m_minHypoStackDiversity > 0
&& hypo->GetTotalScore() >= GetWorstScoreForBitmap( hypo->GetWordsBitmap() ) ) )
{
StaticData::Instance().GetSentenceStats().AddDiscarded();
m_manager.GetSentenceStats().AddDiscarded();
VERBOSE(3,"discarded, too bad for stack" << std::endl);
FREEHYPO(hypo);
return false;
@ -114,7 +116,7 @@ bool HypothesisStackNormal::AddPrune(Hypothesis *hypo)
Hypothesis *hypoExisting = *iterExisting;
assert(iterExisting != m_hypos.end());
StaticData::Instance().GetSentenceStats().AddRecombination(*hypo, **iterExisting);
m_manager.GetSentenceStats().AddRecombination(*hypo, **iterExisting);
// found existing hypo with same target ending.
// keep the best 1
@ -211,7 +213,7 @@ void HypothesisStackNormal::PruneToSize(size_t newSize)
if (! included[i])
{
FREEHYPO( hypos[i] );
StaticData::Instance().GetSentenceStats().AddPruning();
m_manager.GetSentenceStats().AddPruning();
}
}
free(included);

View File

@ -69,7 +69,7 @@ public:
return GetWorstScoreForBitmap( coverage.GetID() );
}
HypothesisStackNormal();
HypothesisStackNormal(Manager& manager);
/** adds the hypo, but only if within thresholds (beamThr, stackSize).
* This function will recombine hypotheses silently! There is no record

View File

@ -28,6 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "LanguageModel.h"
#include "TypeDef.h"
#include "Util.h"
#include "Manager.h"
#include "FactorCollection.h"
#include "Phrase.h"
#include "StaticData.h"
@ -183,7 +184,7 @@ FFState* LanguageModel::Evaluate(
res->lmstate = GetState(contextFactor);
}
out->PlusEquals(this, lmScore);
IFVERBOSE(2) { StaticData::Instance().GetSentenceStats().AddTimeCalcLM( clock()-t ); }
IFVERBOSE(2) { hypo.GetManager().GetSentenceStats().AddTimeCalcLM( clock()-t ); }
return res;
}

View File

@ -206,9 +206,10 @@ LexicalReorderingTableTree::LexicalReorderingTableTree(
const std::vector<FactorType>& f_factors,
const std::vector<FactorType>& e_factors,
const std::vector<FactorType>& c_factors)
: LexicalReorderingTable(f_factors, e_factors, c_factors)
: LexicalReorderingTable(f_factors, e_factors, c_factors), m_UseCache(false), m_FilePath(filePath)
{
m_Table.Read(filePath+".binlexr");
m_Table.reset(new PrefixTreeMap());
m_Table->Read(m_FilePath+".binlexr");
}
LexicalReorderingTableTree::~LexicalReorderingTableTree(){
@ -240,7 +241,7 @@ Score LexicalReorderingTableTree::GetScore(const Phrase& f, const Phrase& e, con
//not in cache go to file...
Score score;
Candidates cands;
m_Table.GetCandidates(MakeTableKey(f,e), &cands);
m_Table->GetCandidates(MakeTableKey(f,e), &cands);
if(cands.empty()){
return Score();
}
@ -271,7 +272,7 @@ Score LexicalReorderingTableTree::auxFindScoreForContext(const Candidates& cands
*/
cvec.push_back(context.GetWord(i).GetString(m_FactorsC, false));
}
IPhrase c = m_Table.ConvertPhrase(cvec,TargetVocId);
IPhrase c = m_Table->ConvertPhrase(cvec,TargetVocId);
IPhrase sub_c;
IPhrase::iterator start = c.begin();
for(size_t j = 0; j <= context.GetSize(); ++j, ++start){
@ -302,6 +303,11 @@ void LexicalReorderingTableTree::InitializeForInput(const InputType& input){
// Cache(*s); ... this just takes up too much memory, we cache elsewhere
DisableCache();
}
if (!m_Table.get()) {
//load thread specific table.
m_Table.reset(new PrefixTreeMap());
m_Table->Read(m_FilePath+".binlexr");
}
};
bool LexicalReorderingTableTree::Create(std::istream& inFile,
@ -515,7 +521,7 @@ IPhrase LexicalReorderingTableTree::MakeTableKey(const Phrase& f,
*/
keyPart.push_back(f.GetWord(i).GetString(m_FactorsF, false));
}
auxAppend(key, m_Table.ConvertPhrase(keyPart, SourceVocId));
auxAppend(key, m_Table->ConvertPhrase(keyPart, SourceVocId));
keyPart.clear();
}
if(!m_FactorsE.empty()){
@ -529,7 +535,7 @@ IPhrase LexicalReorderingTableTree::MakeTableKey(const Phrase& f,
*/
keyPart.push_back(e.GetWord(i).GetString(m_FactorsE, false));
}
auxAppend(key, m_Table.ConvertPhrase(keyPart,TargetVocId));
auxAppend(key, m_Table->ConvertPhrase(keyPart,TargetVocId));
//keyPart.clear();
}
return key;
@ -547,20 +553,20 @@ void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f){
if(m_FactorsE.empty()){
//f is all of key...
Candidates cands;
m_Table.GetCandidates(MakeTableKey(f,Phrase(Output)),&cands);
m_Table->GetCandidates(MakeTableKey(f,Phrase(Output)),&cands);
m_Cache[MakeCacheKey(f,Phrase(Output))] = cands;
} else {
ObjectPool<PPimp> pool;
PPimp* pPos = m_Table.GetRoot();
PPimp* pPos = m_Table->GetRoot();
//1) goto subtree for f
for(int i = 0; i < f.GetSize() && 0 != pPos && pPos->isValid(); ++i){
/* old code
pPos = m_Table.Extend(pPos, auxClearString(f.GetWord(i).ToString(m_FactorsF)), SourceVocId);
*/
pPos = m_Table.Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId);
pPos = m_Table->Extend(pPos, f.GetWord(i).GetString(m_FactorsF, false), SourceVocId);
}
if(0 != pPos && pPos->isValid()){
pPos = m_Table.Extend(pPos, PrefixTreeMap::MagicWord);
pPos = m_Table->Extend(pPos, PrefixTreeMap::MagicWord);
}
if(0 == pPos || !pPos->isValid()){
return;
@ -574,9 +580,9 @@ void LexicalReorderingTableTree::auxCacheForSrcPhrase(const Phrase& f){
while(!stack.empty()){
if(stack.back().pos->isValid()){
LabelId w = stack.back().pos->ptr()->getKey(stack.back().pos->idx);
std::string next_path = stack.back().path + " " + m_Table.ConvertWord(w,TargetVocId);
std::string next_path = stack.back().path + " " + m_Table->ConvertWord(w,TargetVocId);
//cache this
m_Table.GetCandidates(*stack.back().pos,&cands);
m_Table->GetCandidates(*stack.back().pos,&cands);
if(!cands.empty()){
m_Cache[cache_key + auxClearString(next_path)] = cands;
}

View File

@ -6,6 +6,11 @@
#include <map>
#include <string>
#include <iostream>
#ifdef WITH_THREADS
#include <boost/thread/tss.hpp>
#endif
//moses dependencies:
#include "TypeDef.h"
#include "Phrase.h"
@ -106,7 +111,9 @@ class LexicalReorderingTableTree : public LexicalReorderingTable {
m_UseCache = false;
};
void ClearCache(){
m_Cache.clear();
if (m_UseCache) {
m_Cache.clear();
}
};
virtual std::vector<float> GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
@ -130,12 +137,17 @@ class LexicalReorderingTableTree : public LexicalReorderingTable {
private:
//typedef LexicalReorderingCand CandType;
typedef std::map< std::string, Candidates > CacheType;
typedef PrefixTreeMap TableType;
#ifdef WITH_THREADS
typedef boost::thread_specific_ptr<PrefixTreeMap> TableType;
#else
typedef std::auto_ptr<PrefixTreeMap> TableType;
#endif
static const int SourceVocId = 0;
static const int TargetVocId = 1;
bool m_UseCache;
std::string m_FilePath;
CacheType m_Cache;
TableType m_Table;
};

View File

@ -1,9 +1,6 @@
lib_LIBRARIES = libmoses.a
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES $(BOOST_CPPFLAGS)
libmoses_a_SOURCES = \
AlignmentElement.cpp \
AlignmentPhrase.cpp \
AlignmentPair.cpp \
BitmapContainer.cpp \
ConfusionNet.cpp \
DecodeGraph.cpp \

View File

@ -52,7 +52,7 @@ namespace Moses
Manager::Manager(InputType const& source, SearchAlgorithm searchAlgorithm)
:m_source(source)
,m_transOptColl(source.CreateTranslationOptionCollection())
,m_search(Search::CreateSearch(source, searchAlgorithm, *m_transOptColl))
,m_search(Search::CreateSearch(*this, source, searchAlgorithm, *m_transOptColl))
,m_start(clock())
,interrupted_flag(0)
{
@ -82,23 +82,24 @@ void Manager::ProcessSentence()
{
// reset statistics
const StaticData &staticData = StaticData::Instance();
staticData.ResetSentenceStats(m_source);
ResetSentenceStats(m_source);
// collect translation options for this sentence
const vector <DecodeGraph*>
&decodeStepVL = staticData.GetDecodeStepVL();
vector <DecodeGraph*>
decodeStepVL = staticData.GetDecodeStepVL(m_source);
m_transOptColl->CreateTranslationOptions(decodeStepVL);
// some reporting on how long this took
clock_t gotOptions = clock();
float et = (gotOptions - m_start);
IFVERBOSE(2) { staticData.GetSentenceStats().AddTimeCollectOpts( gotOptions - m_start ); }
IFVERBOSE(2) { GetSentenceStats().AddTimeCollectOpts( gotOptions - m_start ); }
et /= (float)CLOCKS_PER_SEC;
VERBOSE(1, "Collecting options took " << et << " seconds" << endl);
// search for best translation with the specified algorithm
m_search->ProcessSentence();
VERBOSE(1, "Search took " << ((clock()-m_start)/(float)CLOCKS_PER_SEC) << " seconds" << endl);
RemoveAllInColl(decodeStepVL);
}
/**
@ -177,13 +178,13 @@ void Manager::CalcDecoderStatistics() const
const Hypothesis *hypo = GetBestHypothesis();
if (hypo != NULL)
{
StaticData::Instance().GetSentenceStats().CalcFinalStats(*hypo);
GetSentenceStats().CalcFinalStats(*hypo);
IFVERBOSE(2) {
if (hypo != NULL) {
string buff;
string buff2;
TRACE_ERR( "Source and Target Units:"
<< *StaticData::Instance().GetInput());
<< hypo->GetInput());
buff2.insert(0,"] ");
buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString());
buff2.insert(0,":");
@ -221,11 +222,11 @@ void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo
<< "\ta=";
// phrase table scores
const std::vector<PhraseDictionary*> &phraseTables = staticData.GetPhraseDictionaries();
std::vector<PhraseDictionary*>::const_iterator iterPhraseTable;
const std::vector<PhraseDictionaryFeature*> &phraseTables = staticData.GetPhraseDictionaries();
std::vector<PhraseDictionaryFeature*>::const_iterator iterPhraseTable;
for (iterPhraseTable = phraseTables.begin() ; iterPhraseTable != phraseTables.end() ; ++iterPhraseTable)
{
const PhraseDictionary *phraseTable = *iterPhraseTable;
const PhraseDictionaryFeature *phraseTable = *iterPhraseTable;
vector<float> scores = hypo->GetScoreBreakdown().GetScoresForProducer(phraseTable);
outputWordGraphStream << scores[0];

View File

@ -104,11 +104,22 @@ public:
void SerializeSearchGraphPB(long translationId, std::ostream& outputStream) const;
#endif
void GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const;
const InputType& GetSource() const {return m_source;}
/***
* to be called after processing a sentence (which may consist of more than just calling ProcessSentence() )
*/
void CalcDecoderStatistics() const;
void ResetSentenceStats(const InputType& source)
{
m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
}
SentenceStats& GetSentenceStats() const
{
return *m_sentenceStats;
}
std::auto_ptr<SentenceStats> m_sentenceStats;
};
}

View File

@ -179,16 +179,16 @@ public:
StringTgtCand::first_type const& factorStrings=cands[i].first;
StringTgtCand::second_type const& probVector=cands[i].second;
StringWordAlignmentCand::second_type const& swaVector=swacands[i].second;
StringWordAlignmentCand::second_type const& twaVector=twacands[i].second;
//StringWordAlignmentCand::second_type const& swaVector=swacands[i].second;
//StringWordAlignmentCand::second_type const& twaVector=twacands[i].second;
std::vector<float> scoreVector(probVector.size());
std::transform(probVector.begin(),probVector.end(),scoreVector.begin(),
TransformScore);
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),
FloorScore);
// CreateTargetPhrase(targetPhrase,factorStrings,scoreVector,&src);
CreateTargetPhrase(targetPhrase,factorStrings,scoreVector,swaVector,twaVector,&src);
CreateTargetPhrase(targetPhrase,factorStrings,scoreVector,&src);
//CreateTargetPhrase(targetPhrase,factorStrings,scoreVector,swaVector,twaVector,&src);
costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
tCands.push_back(targetPhrase);
}
@ -293,35 +293,14 @@ public:
for(size_t l=0;l<m_output.size();++l)
w[m_output[l]]= factorCollection.AddFactor(Output, m_output[l], factors[l]);
}
targetPhrase.SetScore(m_obj, scoreVector, m_weights, m_weightWP, *m_languageModels);
targetPhrase.SetScore(m_obj->GetFeature(), scoreVector, m_weights, m_weightWP, *m_languageModels);
targetPhrase.SetSourcePhrase(srcPtr);
// targetPhrase.CreateAlignmentInfo("???", "???", 44);
}
void CreateTargetPhrase(TargetPhrase& targetPhrase,
StringTgtCand::first_type const& factorStrings,
StringTgtCand::second_type const& scoreVector,
StringWordAlignmentCand::second_type const& swaVector,
StringWordAlignmentCand::second_type const& twaVector,
Phrase const* srcPtr=0) const
{
FactorCollection &factorCollection = FactorCollection::Instance();
for(size_t k=0;k<factorStrings.size();++k)
{
std::vector<std::string> factors=TokenizeMultiCharSeparator(*factorStrings[k],StaticData::Instance().GetFactorDelimiter());
Word& w=targetPhrase.AddWord();
for(size_t l=0;l<m_output.size();++l)
w[m_output[l]]= factorCollection.AddFactor(Output, m_output[l], factors[l]);
}
targetPhrase.SetScore(m_obj, scoreVector, m_weights, m_weightWP, *m_languageModels);
targetPhrase.SetSourcePhrase(srcPtr);
targetPhrase.CreateAlignmentInfo(swaVector, twaVector);
}
TargetPhraseCollection* PruneTargetCandidates(std::vector<TargetPhrase> const & tCands,
std::vector<std::pair<float,size_t> >& costs) const

View File

@ -21,41 +21,125 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "PhraseDictionary.h"
#include "PhraseDictionaryTreeAdaptor.h"
#include "StaticData.h"
#include "InputType.h"
#include "TranslationOption.h"
namespace Moses
{
PhraseDictionary::PhraseDictionary(size_t numScoreComponent)
: Dictionary(numScoreComponent),m_tableLimit(0)
{
const_cast<ScoreIndexManager&>(StaticData::Instance().GetScoreIndexManager()).AddScoreProducer(this);
}
namespace Moses {
PhraseDictionary::~PhraseDictionary() {}
const TargetPhraseCollection *PhraseDictionary::
GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const
{
return GetTargetPhraseCollection(src.GetSubString(range));
return GetTargetPhraseCollection(src.GetSubString(range));
}
std::string PhraseDictionary::GetScoreProducerDescription() const
PhraseDictionaryFeature::PhraseDictionaryFeature
( size_t numScoreComponent
, unsigned numInputScores
, const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, const std::string &filePath
, const std::vector<float> &weight
, size_t tableLimit):
m_numScoreComponent(numScoreComponent),
m_numInputScores(numInputScores),
m_input(input),
m_output(output),
m_filePath(filePath),
m_weight(weight),
m_tableLimit(tableLimit)
{
const StaticData& staticData = StaticData::Instance();
const_cast<ScoreIndexManager&>(staticData.GetScoreIndexManager()).AddScoreProducer(this);
//if we're using an in-memory phrase table, then load it now, otherwise wait
if (!FileExists(filePath+".binphr.idx"))
{ // memory phrase table
VERBOSE(2,"using standard phrase tables" << endl);
if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) {
m_filePath += ".gz";
VERBOSE(2,"Using gzipped file" << endl);
}
if (staticData.GetInputType() != SentenceInput)
{
UserMessage::Add("Must use binary phrase table for this input type");
assert(false);
}
PhraseDictionaryMemory* pdm = new PhraseDictionaryMemory(m_numScoreComponent,this);
assert(pdm->Load(m_input
, m_output
, m_filePath
, m_weight
, m_tableLimit
, staticData.GetAllLM()
, staticData.GetWeightWordPenalty()));
m_memoryDictionary.reset(pdm);
}
else
{
//don't initialise the tree dictionary until it's required
}
}
PhraseDictionary* PhraseDictionaryFeature::GetDictionary
(const InputType& source) {
PhraseDictionary* dict = NULL;
if (m_memoryDictionary.get()) {
dict = m_memoryDictionary.get();
} else {
if (!m_treeDictionary.get()) {
//load the tree dictionary for this thread
const StaticData& staticData = StaticData::Instance();
PhraseDictionaryTreeAdaptor* pdta = new PhraseDictionaryTreeAdaptor(m_numScoreComponent, m_numInputScores,this);
assert(pdta->Load(
m_input
, m_output
, m_filePath
, m_weight
, m_tableLimit
, staticData.GetAllLM()
, staticData.GetWeightWordPenalty()));
m_treeDictionary.reset(pdta);
}
dict = m_treeDictionary.get();
}
dict->InitializeForInput(source);
return dict;
}
PhraseDictionaryFeature::~PhraseDictionaryFeature() {}
std::string PhraseDictionaryFeature::GetScoreProducerDescription() const
{
return "PhraseModel";
}
size_t PhraseDictionary::GetNumScoreComponents() const
size_t PhraseDictionaryFeature::GetNumScoreComponents() const
{
return m_numScoreComponent;
}
size_t PhraseDictionary::GetNumInputScores() const { return 0;}
size_t PhraseDictionaryFeature::GetNumInputScores() const
{
return m_numInputScores;
}
bool PhraseDictionary::ComputeValueInTranslationOption() const {
bool PhraseDictionaryFeature::ComputeValueInTranslationOption() const {
return true;
}
const PhraseDictionaryFeature* PhraseDictionary::GetFeature() const {
return m_feature;
}
}

View File

@ -26,6 +26,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <list>
#include <vector>
#include <string>
#ifdef WITH_THREADS
#include <boost/thread/tss.hpp>
#endif
#include "Phrase.h"
#include "TargetPhrase.h"
#include "Dictionary.h"
@ -39,24 +44,58 @@ class StaticData;
class InputType;
class WordsRange;
/** abstract base class for phrase table classes
*/
class PhraseDictionary : public Dictionary, public StatelessFeatureFunction
class PhraseDictionaryFeature;
/**
* Abstract base class for phrase dictionaries (tables).
**/
class PhraseDictionary: public Dictionary {
public:
PhraseDictionary(size_t numScoreComponent, const PhraseDictionaryFeature* feature):
Dictionary(numScoreComponent), m_tableLimit(0), m_feature(feature) {}
//! table limit number.
size_t GetTableLimit() const { return m_tableLimit; }
DecodeType GetDecodeType() const { return Translate; }
const PhraseDictionaryFeature* GetFeature() const;
/** set/change translation weights and recalc weighted score for each translation.
* TODO This may be redundant now we use ScoreCollection
*/
virtual void SetWeightTransModel(const std::vector<float> &weightT)=0;
//! find list of translations that can translates src. Only for phrase input
virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const=0;
//! find list of translations that can translates a portion of src. Used by confusion network decoding
virtual const TargetPhraseCollection *GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const;
//! Create entry for translation of source to targetPhrase
virtual void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)=0;
virtual void InitializeForInput(InputType const& source) = 0;
protected:
size_t m_tableLimit;
const PhraseDictionaryFeature* m_feature;
};
/**
* Represents a feature derived from a phrase table.
*/
class PhraseDictionaryFeature : public StatelessFeatureFunction
{
protected:
size_t m_tableLimit;
std::string m_filePath; // just for debugging purposes
public:
PhraseDictionary(size_t numScoreComponent);
virtual ~PhraseDictionary();
PhraseDictionaryFeature( size_t numScoreComponent
, unsigned numInputScores
, const std::vector<FactorType> &input
, const std::vector<FactorType> &output
, const std::string &filePath
, const std::vector<float> &weight
, size_t tableLimit);
virtual ~PhraseDictionaryFeature();
DecodeType GetDecodeType() const { return Translate; }
//! table limit number.
size_t GetTableLimit() const { return m_tableLimit; }
virtual bool ComputeValueInTranslationOption() const;
//! Overriden by load on demand phrase tables classes to load data for each input
virtual void InitializeForInput(InputType const &/*source*/) {}
std::string GetScoreProducerDescription() const;
std::string GetScoreProducerWeightShortName() const
{
@ -66,21 +105,26 @@ class PhraseDictionary : public Dictionary, public StatelessFeatureFunction
size_t GetNumInputScores() const;
virtual bool ComputeValueInTranslationOption() const;
PhraseDictionary* GetDictionary(const InputType& source);
private:
size_t m_numScoreComponent;
unsigned m_numInputScores;
std::vector<FactorType> m_input;
std::vector<FactorType> m_output;
std::string m_filePath;
std::vector<float> m_weight;
size_t m_tableLimit;
//Only instantiate one of these
std::auto_ptr<PhraseDictionary> m_memoryDictionary;
#ifdef WITH_THREADS
boost::thread_specific_ptr<PhraseDictionary> m_treeDictionary;
#else
std::auto_ptr<PhraseDictionary> m_treeDictionary;
#endif
/** set/change translation weights and recalc weighted score for each translation.
* TODO This may be redundant now we use ScoreCollection
*/
virtual void SetWeightTransModel(const std::vector<float> &weightT)=0;
//! find list of translations that can translates src. Only for phrase input
virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const=0;
//! find list of translations that can translates a portion of src. Used by confusion network decoding
virtual const TargetPhraseCollection *GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const;
//! Create entry for translation of source to targetPhrase
virtual void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase)=0;
};
}

View File

@ -33,7 +33,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "StaticData.h"
#include "WordsRange.h"
#include "UserMessage.h"
#include "AlignmentPair.h"
using namespace std;
@ -50,7 +49,6 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
const StaticData &staticData = StaticData::Instance();
m_tableLimit = tableLimit;
m_filePath = filePath;
//factors
m_inputFactors = FactorMask(input);
@ -132,22 +130,13 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
targetPhrase.SetSourcePhrase(&sourcePhrase);
targetPhrase.CreateFromString( output, targetPhraseString, factorDelimiter);
// load alignment info only when present and relevant
if (staticData.UseAlignmentInfo()){
if (numElement==3){
stringstream strme;
strme << "You are using AlignmentInfo, but this info not available in the Phrase Table. Only " <<numElement<<" fields on line " << line_num;
UserMessage::Add(strme.str());
return false;
}
targetPhrase.CreateAlignmentInfo(sourceAlignString, targetAlignString);
}
// component score, for n-best output
std::vector<float> scv(scoreVector.size());
std::transform(scoreVector.begin(),scoreVector.end(),scv.begin(),TransformScore);
std::transform(scv.begin(),scv.end(),scv.begin(),FloorScore);
targetPhrase.SetScore(this, scv, weight, weightWP, languageModels);
targetPhrase.SetScore(m_feature, scv, weight, weightWP, languageModels);
AddEquivPhrase(sourcePhrase, targetPhrase);

View File

@ -42,10 +42,8 @@ protected:
TargetPhraseCollection *CreateTargetPhraseCollection(const Phrase &source);
public:
PhraseDictionaryMemory(size_t numScoreComponent)
: MyBase(numScoreComponent)
{
}
PhraseDictionaryMemory(size_t numScoreComponent, PhraseDictionaryFeature* feature)
: PhraseDictionary(numScoreComponent,feature) {}
virtual ~PhraseDictionaryMemory();
bool Load(const std::vector<FactorType> &input
@ -62,6 +60,8 @@ public:
// for mert
void SetWeightTransModel(const std::vector<float> &weightT);
virtual void InitializeForInput(InputType const&)
{/* Don't do anything source specific here as this object is shared between threads.*/}
TO_STRING();

View File

@ -64,8 +64,9 @@ const PhraseDictionaryNode *PhraseDictionaryNode::GetChild(const Word &word) con
return NULL;
}
void PhraseDictionaryNode::SetWeightTransModel(const PhraseDictionaryMemory *phraseDictionary
, const std::vector<float> &weightT)
void PhraseDictionaryNode::SetWeightTransModel(
const PhraseDictionaryMemory *phraseDictionary,
const std::vector<float> &weightT)
{
// recursively set weights
NodeMap::iterator iterNodeMap;
@ -84,7 +85,7 @@ void PhraseDictionaryNode::SetWeightTransModel(const PhraseDictionaryMemory *phr
++iterTargetPhrase)
{
TargetPhrase &targetPhrase = **iterTargetPhrase;
targetPhrase.SetWeights(phraseDictionary, weightT);
targetPhrase.SetWeights(phraseDictionary->GetFeature(), weightT);
}
}

View File

@ -32,6 +32,7 @@ namespace Moses
{
class PhraseDictionaryMemory;
class PhraseDictionaryFeature;
/** One node of the PhraseDictionaryMemory structure
*/

View File

@ -115,18 +115,36 @@ PhraseDictionaryTree::PrefixPtr::operator bool() const
return imp && imp->isValid();
}
typedef LVoc<std::string> WordVoc;
static WordVoc* ReadVoc(const std::string& filename) {
static std::map<std::string,WordVoc*> vocs;
#ifdef HAVE_THREADS
boost::mutex mutex;
boost::mutex::scoped_lock lock(mutex);
#endif
std::map<std::string,WordVoc*>::iterator vi = vocs.find(filename);
if (vi == vocs.end()) {
WordVoc* voc = new WordVoc();
voc->Read(filename);
vocs[filename] = voc;
}
return vocs[filename];
}
struct PDTimp {
typedef PrefixTreeF<LabelId,OFF_T> PTF;
typedef FilePtr<PTF> CPT;
typedef std::vector<CPT> Data;
typedef LVoc<std::string> WordVoc;
Data data;
std::vector<OFF_T> srcOffsets;
FILE *os,*ot;
WordVoc sv,tv;
WordVoc* sv;
WordVoc* tv;
ObjectPool<PPimp> pPool;
// a comparison with the Boost MemPools might be useful
@ -189,7 +207,7 @@ struct PDTimp {
std::vector<std::string const*> vs;
vs.reserve(iphrase.size());
for(size_t j=0;j<iphrase.size();++j)
vs.push_back(&tv.symbol(iphrase[j]));
vs.push_back(&tv->symbol(iphrase[j]));
rv.push_back(StringTgtCand(vs,i->GetScores()));
}
}
@ -206,7 +224,7 @@ struct PDTimp {
std::vector<std::string const*> vs;
vs.reserve(iphrase.size());
for(size_t j=0;j<iphrase.size();++j)
vs.push_back(&tv.symbol(iphrase[j]));
vs.push_back(&tv->symbol(iphrase[j]));
rv.push_back(StringTgtCand(vs,i->GetScores()));
swa.push_back(StringWordAlignmentCand(vs,(i->GetSourceAlignment())));
twa.push_back(StringWordAlignmentCand(vs,(i->GetTargetAlignment())));
@ -223,7 +241,7 @@ struct PDTimp {
assert(p);
if(w.empty() || w==EPSILON) return p;
LabelId wi=sv.index(w);
LabelId wi=sv->index(w);
if(wi==InvalidLabelId) return PPtr(); // unknown word
else if(p.imp->isRoot())
@ -300,8 +318,10 @@ int PDTimp::Read(const std::string& fn)
for(size_t i=0;i<data.size();++i)
data[i]=CPT(os,srcOffsets[i]);
sv.Read(ifsv);
tv.Read(iftv);
sv = ReadVoc(ifsv);
tv = ReadVoc(iftv);
//sv.Read(ifsv);
//tv.Read(iftv);
TRACE_ERR("binary phrasefile loaded, default OFF_T: "<<PTF::getDefault()
<<"\n");
@ -320,7 +340,7 @@ void PDTimp::PrintTgtCand(const TgtCands& tcand,std::ostream& out) const
const IPhrase& iphr=tcand[i].GetPhrase();
out << i << " -- " << sc << " -- ";
for(size_t j=0;j<iphr.size();++j) out << tv.symbol(iphr[j])<<" ";
for(size_t j=0;j<iphr.size();++j) out << tv->symbol(iphr[j])<<" ";
out<< " -- ";
for (size_t j=0;j<srcAlign.size();j++) out << " " << srcAlign[j];
out << " -- ";
@ -370,7 +390,7 @@ GetTargetCandidates(const std::vector<std::string>& src,
IPhrase f(src.size());
for(size_t i=0;i<src.size();++i)
{
f[i]=imp->sv.index(src[i]);
f[i]=imp->sv->index(src[i]);
if(f[i]==InvalidLabelId) return;
}
@ -388,7 +408,7 @@ GetTargetCandidates(const std::vector<std::string>& src,
IPhrase f(src.size());
for(size_t i=0;i<src.size();++i)
{
f[i]=imp->sv.index(src[i]);
f[i]=imp->sv->index(src[i]);
if(f[i]==InvalidLabelId) return;
}
@ -405,7 +425,7 @@ PrintTargetCandidates(const std::vector<std::string>& src,
IPhrase f(src.size());
for(size_t i=0;i<src.size();++i)
{
f[i]=imp->sv.index(src[i]);
f[i]=imp->sv->index(src[i]);
if(f[i]==InvalidLabelId)
{
TRACE_ERR("the source phrase '"<<src<<"' contains an unknown word '"
@ -447,6 +467,8 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
std::vector<OFF_T> vo;
size_t lnc=0;
size_t numElement = NOT_FOUND; // 3=old format, 5=async format which include word alignment info
imp->sv = new WordVoc();
imp->tv = new WordVoc();
while(getline(inFile, line))
{
@ -490,30 +512,13 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
std::vector<std::string> wordVec = Tokenize(sourcePhraseString);
for (size_t i = 0 ; i < wordVec.size() ; ++i)
f.push_back(imp->sv.add(wordVec[i]));
f.push_back(imp->sv->add(wordVec[i]));
wordVec = Tokenize(targetPhraseString);
for (size_t i = 0 ; i < wordVec.size() ; ++i)
e.push_back(imp->tv.add(wordVec[i]));
e.push_back(imp->tv->add(wordVec[i]));
if (!PrintWordAlignment()){// word-to-word alignment are not used, create empty word-to-word alignment
EmptyAlignment(sourceAlignString, f.size());
EmptyAlignment(targetAlignString, e.size());
}
else if (numElement==3){
stringstream strme;
strme << "You are asking for AlignmentInfo, but this info not available in the Phrase Table. Only " <<numElement<<" fields on line " << lnc << " : " << line;
strme << endl << "Deleting files " << ofn << " and " << oft << "..." << endl;
if( remove( ofn.c_str() ) != 0 ) strme << "Error deleting file " << ofn;
else strme << "File " << ofn << " successfully deleted";
strme << endl;
if( remove( oft.c_str() ) != 0 ) strme << "Error deleting file " << oft;
else strme << "File " << oft << " successfully deleted";
strme << endl;
UserMessage::Add(strme.str());
exit(1);
}
//change "()" into "(-1)" for both source and target word-to-word alignments
std::string emtpyAlignStr="()";
@ -648,8 +653,8 @@ int PhraseDictionaryTree::Create(std::istream& inFile,const std::string& out)
fWriteVector(oi,vo);
fClose(oi);
imp->sv.Write(ofsv);
imp->tv.Write(oftv);
imp->sv->Write(ofsv);
imp->tv->Write(oftv);
return 1;
}

View File

@ -5,6 +5,11 @@
#include <string>
#include <vector>
#include <iostream>
#ifdef WITH_THREADS
#include <boost/thread/mutex.hpp>
#endif
#include "TypeDef.h"
#include "Dictionary.h"

View File

@ -23,8 +23,9 @@ namespace Moses
*************************************************************/
PhraseDictionaryTreeAdaptor::
PhraseDictionaryTreeAdaptor(size_t numScoreComponent,unsigned numInputScores)
: MyBase(numScoreComponent),imp(new PDTAimp(this,numInputScores)) {}
PhraseDictionaryTreeAdaptor(size_t numScoreComponent, unsigned numInputScores, const PhraseDictionaryFeature* feature)
: PhraseDictionary(numScoreComponent,feature), imp(new PDTAimp(this,numInputScores)) {
}
PhraseDictionaryTreeAdaptor::~PhraseDictionaryTreeAdaptor()
{
@ -32,21 +33,6 @@ PhraseDictionaryTreeAdaptor::~PhraseDictionaryTreeAdaptor()
delete imp;
}
void PhraseDictionaryTreeAdaptor::CleanUp()
{
imp->CleanUp();
MyBase::CleanUp();
}
void PhraseDictionaryTreeAdaptor::InitializeForInput(InputType const& source)
{
// caching only required for confusion net
if(ConfusionNet const* cn=dynamic_cast<ConfusionNet const*>(&source))
imp->CacheSource(*cn);
//else if(Sentence const* s=dynamic_cast<Sentence const*>(&source))
// following removed by phi, not helpful
// imp->CacheSource(ConfusionNet(*s));
}
bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
, const std::vector<FactorType> &output
@ -54,8 +40,7 @@ bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
, const std::vector<float> &weight
, size_t tableLimit
, const LMList &languageModels
, float weightWP
)
, float weightWP)
{
if(m_numScoreComponent!=weight.size()) {
stringstream strme;
@ -64,7 +49,6 @@ bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
UserMessage::Add(strme.str());
return false;
}
m_filePath = filePath;
// set Dictionary members
m_inputFactors = FactorMask(input);
@ -79,6 +63,13 @@ bool PhraseDictionaryTreeAdaptor::Load(const std::vector<FactorType> &input
return true;
}
void PhraseDictionaryTreeAdaptor::InitializeForInput(InputType const& source) {
imp->CleanUp();
// caching only required for confusion net
if(ConfusionNet const* cn=dynamic_cast<ConfusionNet const*>(&source))
imp->CacheSource(*cn);
}
TargetPhraseCollection const*
PhraseDictionaryTreeAdaptor::GetTargetPhraseCollection(Phrase const &src) const
{

View File

@ -27,7 +27,7 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary {
void operator=(const PhraseDictionaryTreeAdaptor&);
public:
PhraseDictionaryTreeAdaptor(size_t numScoreComponent,unsigned numInputScores);
PhraseDictionaryTreeAdaptor(size_t numScoreComponent, unsigned numInputScores, const PhraseDictionaryFeature* feature);
virtual ~PhraseDictionaryTreeAdaptor();
// enable/disable caching
@ -46,19 +46,14 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary {
, const std::vector<float> &weight
, size_t tableLimit
, const LMList &languageModels
, float weightWP
);
, float weightWP);
// get translation candidates for a given source phrase
// returns null pointer if nothing found
TargetPhraseCollection const* GetTargetPhraseCollection(Phrase const &src) const;
TargetPhraseCollection const* GetTargetPhraseCollection(InputType const& src,WordsRange const & srcRange) const;
// clean up temporary memory etc.
void CleanUp();
void InitializeForInput(InputType const& source);
// change model scaling factors
void SetWeightTransModel(const std::vector<float> &weightT);
@ -73,6 +68,7 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary {
}
size_t GetNumInputScores() const;
virtual void InitializeForInput(InputType const& source);
};

View File

@ -59,13 +59,28 @@ void PrefixTreeMap::FreeMemory() {
for(Data::iterator i = m_Data.begin(); i != m_Data.end(); ++i){
i->free();
}
for(size_t i = 0; i < m_Voc.size(); ++i){
/*for(size_t i = 0; i < m_Voc.size(); ++i){
delete m_Voc[i];
m_Voc[i] = 0;
}
}*/
m_PtrPool.reset();
}
static WordVoc* ReadVoc(const std::string& filename) {
static std::map<std::string,WordVoc*> vocs;
#ifdef WITH_THREADS
boost::mutex mutex;
boost::mutex::scoped_lock lock(mutex);
#endif
std::map<std::string,WordVoc*>::iterator vi = vocs.find(filename);
if (vi == vocs.end()) {
WordVoc* voc = new WordVoc();
voc->Read(filename);
vocs[filename] = voc;
}
return vocs[filename];
}
int PrefixTreeMap::Read(const std::string& fileNameStem, int numVocs){
std::string ifs(fileNameStem + ".srctree"),
ift(fileNameStem + ".tgtdata"),
@ -77,7 +92,13 @@ int PrefixTreeMap::Read(const std::string& fileNameStem, int numVocs){
fReadVector(ii,srcOffsets);
fClose(ii);
if (m_FileSrc) {
fClose(m_FileSrc);
}
m_FileSrc = fOpen(ifs.c_str(),"rb");
if (m_FileTgt) {
fClose(m_FileTgt);
}
m_FileTgt = fOpen(ift.c_str(),"rb");
m_Data.resize(srcOffsets.size());
@ -99,8 +120,9 @@ int PrefixTreeMap::Read(const std::string& fileNameStem, int numVocs){
m_Voc.resize(numVocs);
for(int i = 0; i < numVocs; ++i){
sprintf(num, "%d", i);
m_Voc[i] = new WordVoc();
m_Voc[i]->Read(ifv + num);
//m_Voc[i] = new WordVoc();
//m_Voc[i]->Read(ifv + num);
m_Voc[i] = ReadVoc(ifv + num);
}
TRACE_ERR("binary file loaded, default OFF_T: "<< PTF::getDefault()<<"\n");

View File

@ -4,6 +4,12 @@
#include<vector>
#include<climits>
#include<iostream>
#include <map>
#ifdef WITH_THREADS
#include <boost/thread/mutex.hpp>
#endif
#include "PrefixTree.h"
#include "File.h"

View File

@ -1,24 +1,22 @@
#include "Manager.h"
#include "SearchCubePruning.h"
#include "SearchNormal.h"
#include "UserMessage.h"
namespace Moses
{
Search::Search()
{
// long sentenceID = m_source.GetTranslationId();
// m_constraint = staticData.GetConstrainingPhrase(sentenceID);
}
Search *Search::CreateSearch(const InputType &source, SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl)
Search *Search::CreateSearch(Manager& manager, const InputType &source,
SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl)
{
switch(searchAlgorithm)
{
case Normal:
return new SearchNormal(source, transOptColl);
return new SearchNormal(manager,source, transOptColl);
case CubePruning:
return new SearchCubePruning(source, transOptColl);
return new SearchCubePruning(manager, source, transOptColl);
case CubeGrowing:
return NULL;
default:

View File

@ -12,6 +12,7 @@ class HypothesisStack;
class Hypothesis;
class InputType;
class TranslationOptionCollection;
class Manager;
class Search
{
@ -19,16 +20,18 @@ public:
virtual const std::vector < HypothesisStack* >& GetHypothesisStacks() const = 0;
virtual const Hypothesis *GetBestHypothesis() const = 0;
virtual void ProcessSentence() = 0;
Search();
Search(Manager& manager) : m_manager(manager) {}
virtual ~Search()
{}
// Factory
static Search *CreateSearch(const InputType &source, SearchAlgorithm searchAlgorithm, const TranslationOptionCollection &transOptColl);
static Search *CreateSearch(Manager& manager, const InputType &source, SearchAlgorithm searchAlgorithm,
const TranslationOptionCollection &transOptColl);
protected:
const Phrase *m_constraint;
Manager& m_manager;
};

View File

@ -1,4 +1,4 @@
#include "Manager.h"
#include "Util.h"
#include "SearchCubePruning.h"
#include "StaticData.h"
@ -41,8 +41,9 @@ class BitmapContainerOrderer
}
};
SearchCubePruning::SearchCubePruning(const InputType &source, const TranslationOptionCollection &transOptColl)
:m_source(source)
SearchCubePruning::SearchCubePruning(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl)
:Search(manager)
,m_source(source)
,m_hypoStackColl(source.GetSize() + 1)
,m_initialTargetPhrase(Output)
,m_start(clock())
@ -56,7 +57,7 @@ SearchCubePruning::SearchCubePruning(const InputType &source, const TranslationO
std::vector < HypothesisStackCubePruning >::iterator iterStack;
for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind)
{
HypothesisStackCubePruning *sourceHypoColl = new HypothesisStackCubePruning();
HypothesisStackCubePruning *sourceHypoColl = new HypothesisStackCubePruning(m_manager);
sourceHypoColl->SetMaxHypoStackSize(staticData.GetMaxHypoStackSize());
sourceHypoColl->SetBeamWidth(staticData.GetBeamWidth());
@ -78,7 +79,7 @@ void SearchCubePruning::ProcessSentence()
const StaticData &staticData = StaticData::Instance();
// initial seed hypothesis: nothing translated, no words produced
Hypothesis *hypo = Hypothesis::Create(m_source, m_initialTargetPhrase);
Hypothesis *hypo = Hypothesis::Create(m_manager,m_source, m_initialTargetPhrase);
HypothesisStackCubePruning &firstStack = *static_cast<HypothesisStackCubePruning*>(m_hypoStackColl.front());
firstStack.AddInitial(hypo);
@ -153,8 +154,8 @@ void SearchCubePruning::ProcessSentence()
PrintBitmapContainerGraph();
// some more logging
IFVERBOSE(2) { staticData.GetSentenceStats().SetTimeTotal( clock()-m_start ); }
VERBOSE(2, staticData.GetSentenceStats());
IFVERBOSE(2) { m_manager.GetSentenceStats().SetTimeTotal( clock()-m_start ); }
VERBOSE(2, m_manager.GetSentenceStats());
}
void SearchCubePruning::CreateForwardTodos(HypothesisStackCubePruning &stack)

View File

@ -30,7 +30,7 @@ protected:
void PrintBitmapContainerGraph();
public:
SearchCubePruning(const InputType &source, const TranslationOptionCollection &transOptColl);
SearchCubePruning(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl);
~SearchCubePruning();
void ProcessSentence();

View File

@ -1,3 +1,4 @@
#include "Manager.h"
#include "Timer.h"
#include "SearchNormal.h"
@ -9,8 +10,9 @@ namespace Moses
* /param source input sentence
* /param transOptColl collection of translation options to be used for this sentence
*/
SearchNormal::SearchNormal(const InputType &source, const TranslationOptionCollection &transOptColl)
:m_source(source)
SearchNormal::SearchNormal(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl)
:Search(manager)
,m_source(source)
,m_hypoStackColl(source.GetSize() + 1)
,m_initialTargetPhrase(Output)
,m_start(clock())
@ -28,7 +30,7 @@ SearchNormal::SearchNormal(const InputType &source, const TranslationOptionColle
std::vector < HypothesisStackNormal >::iterator iterStack;
for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind)
{
HypothesisStackNormal *sourceHypoColl = new HypothesisStackNormal();
HypothesisStackNormal *sourceHypoColl = new HypothesisStackNormal(m_manager);
sourceHypoColl->SetMaxHypoStackSize(staticData.GetMaxHypoStackSize(),staticData.GetMinHypoStackDiversity());
sourceHypoColl->SetBeamWidth(staticData.GetBeamWidth());
@ -48,11 +50,11 @@ SearchNormal::~SearchNormal()
void SearchNormal::ProcessSentence()
{
const StaticData &staticData = StaticData::Instance();
SentenceStats &stats = staticData.GetSentenceStats();
SentenceStats &stats = m_manager.GetSentenceStats();
clock_t t=0; // used to track time for steps
// initial seed hypothesis: nothing translated, no words produced
Hypothesis *hypo = Hypothesis::Create(m_source, m_initialTargetPhrase);
Hypothesis *hypo = Hypothesis::Create(m_manager,m_source, m_initialTargetPhrase);
m_hypoStackColl[0]->AddPrune(hypo);
// go through each stack
@ -91,8 +93,8 @@ void SearchNormal::ProcessSentence()
}
// some more logging
IFVERBOSE(2) { staticData.GetSentenceStats().SetTimeTotal( clock()-m_start ); }
VERBOSE(2, staticData.GetSentenceStats());
IFVERBOSE(2) { m_manager.GetSentenceStats().SetTimeTotal( clock()-m_start ); }
VERBOSE(2, m_manager.GetSentenceStats());
}
@ -274,7 +276,7 @@ void SearchNormal::ExpandAllHypotheses(const Hypothesis &hypothesis, size_t star
void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt, float expectedScore)
{
const StaticData &staticData = StaticData::Instance();
SentenceStats &stats = staticData.GetSentenceStats();
SentenceStats &stats = m_manager.GetSentenceStats();
clock_t t=0; // used to track time for steps
Hypothesis *newHypo;

View File

@ -10,6 +10,7 @@
namespace Moses
{
class Manager;
class InputType;
class TranslationOptionCollection;
@ -31,7 +32,7 @@ protected:
void ExpandHypothesis(const Hypothesis &hypothesis,const TranslationOption &transOpt, float expectedScore);
public:
SearchNormal(const InputType &source, const TranslationOptionCollection &transOptColl);
SearchNormal(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl);
~SearchNormal();
void ProcessSentence();

View File

@ -373,7 +373,6 @@ bool StaticData::LoadData(Parameter *parameter)
if (!LoadLanguageModels()) return false;
if (!LoadGenerationTables()) return false;
if (!LoadPhraseTables()) return false;
if (!LoadMapping()) return false;
if (!LoadGlobalLexicalModel()) return false;
m_scoreIndexManager.InitFeatureNames();
@ -418,12 +417,11 @@ StaticData::~StaticData()
RemoveAllInColl(m_phraseDictionary);
RemoveAllInColl(m_generationDictionary);
RemoveAllInColl(m_languageModel);
RemoveAllInColl(m_decodeStepVL);
RemoveAllInColl(m_reorderModels);
RemoveAllInColl(m_globalLexicalModels);
// delete trans opt
map<std::pair<const DecodeGraph*, Phrase>, std::pair< TranslationOptionList*, clock_t > >::iterator iterCache;
map<std::pair<size_t, Phrase>, std::pair< TranslationOptionList*, clock_t > >::iterator iterCache;
for (iterCache = m_transOptCache.begin() ; iterCache != m_transOptCache.end() ; ++iterCache)
{
TranslationOptionList *transOptList = iterCache->second.first;
@ -847,47 +845,21 @@ bool StaticData::LoadPhraseTables()
IFVERBOSE(1)
PrintUserTime(string("Start loading PhraseTable ") + filePath);
VERBOSE(1,"filePath: " << filePath << endl);
if (!FileExists(filePath+".binphr.idx"))
{ // memory phrase table
VERBOSE(2,"using standard phrase tables" << endl);
if (!FileExists(filePath) && FileExists(filePath + ".gz")) {
filePath += ".gz";
VERBOSE(2,"Using gzipped file" << endl);
}
if (m_inputType != SentenceInput)
{
UserMessage::Add("Must use binary phrase table for this input type");
return false;
}
PhraseDictionaryMemory *pd=new PhraseDictionaryMemory(numScoreComponent);
if (!pd->Load(input
, output
, filePath
, weight
, maxTargetPhrase[index]
, GetAllLM()
, GetWeightWordPenalty()))
{
delete pd;
return false;
}
m_phraseDictionary.push_back(pd);
}
else
{ // binary phrase table
VERBOSE(1, "using binary phrase tables for idx "<<currDict<<"\n");
PhraseDictionaryTreeAdaptor *pd=new PhraseDictionaryTreeAdaptor(numScoreComponent,(currDict==0 ? m_numInputScores : 0));
if (!pd->Load(input,output,filePath,weight,
maxTargetPhrase[index],
GetAllLM(),
GetWeightWordPenalty()))
{
delete pd;
return false;
}
m_phraseDictionary.push_back(pd);
}
PhraseDictionaryFeature* pdf = new PhraseDictionaryFeature(
numScoreComponent
, (currDict==0 ? m_numInputScores : 0)
, input
, output
, filePath
, weight
, maxTargetPhrase[index]);
m_phraseDictionary.push_back(pdf);
index++;
}
@ -898,8 +870,9 @@ bool StaticData::LoadPhraseTables()
return true;
}
bool StaticData::LoadMapping()
vector<DecodeGraph*> StaticData::GetDecodeStepVL(const InputType& source) const
{
vector<DecodeGraph*> decodeStepVL;
// mapping
const vector<string> &mappingVector = m_parameter->GetParam("mapping");
DecodeStep *prev = 0;
@ -932,7 +905,7 @@ bool StaticData::LoadMapping()
else
{
UserMessage::Add("Malformed mapping!");
return false;
assert(false);
}
DecodeStep* decodeStep = 0;
@ -944,9 +917,9 @@ bool StaticData::LoadMapping()
strme << "No phrase dictionary with index "
<< index << " available!";
UserMessage::Add(strme.str());
return false;
assert(false);
}
decodeStep = new DecodeStepTranslation(m_phraseDictionary[index], prev);
decodeStep = new DecodeStepTranslation(m_phraseDictionary[index]->GetDictionary(source), prev);
break;
case Generate:
if(index>=m_generationDictionary.size())
@ -955,7 +928,7 @@ bool StaticData::LoadMapping()
strme << "No generation dictionary with index "
<< index << " available!";
UserMessage::Add(strme.str());
return false;
assert(false);
}
decodeStep = new DecodeStepGeneration(m_generationDictionary[index], prev);
break;
@ -964,22 +937,20 @@ bool StaticData::LoadMapping()
break;
}
assert(decodeStep);
if (m_decodeStepVL.size() < vectorList + 1)
if (decodeStepVL.size() < vectorList + 1)
{
m_decodeStepVL.push_back(new DecodeGraph());
decodeStepVL.push_back(new DecodeGraph(decodeStepVL.size()));
}
m_decodeStepVL[vectorList]->Add(decodeStep);
decodeStepVL[vectorList]->Add(decodeStep);
prev = decodeStep;
previousVectorList = vectorList;
}
return true;
return decodeStepVL;
}
void StaticData::CleanUpAfterSentenceProcessing() const
{
for(size_t i=0;i<m_phraseDictionary.size();++i)
m_phraseDictionary[i]->CleanUp();
for(size_t i=0;i<m_generationDictionary.size();++i)
m_generationDictionary[i]->CleanUp();
@ -997,10 +968,6 @@ void StaticData::CleanUpAfterSentenceProcessing() const
binary format is used) */
void StaticData::InitializeBeforeSentenceProcessing(InputType const& in) const
{
m_input = &in;
for(size_t i=0;i<m_phraseDictionary.size();++i) {
m_phraseDictionary[i]->InitializeForInput(in);
}
for(size_t i=0;i<m_reorderModels.size();++i) {
m_reorderModels[i]->InitializeForInput(in);
}
@ -1031,9 +998,11 @@ void StaticData::SetWeightsForScoreProducer(const ScoreProducer* sp, const std::
const TranslationOptionList* StaticData::FindTransOptListInCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase) const
{
std::pair<const DecodeGraph*, Phrase> key(&decodeGraph, sourcePhrase);
std::map<std::pair<const DecodeGraph*, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter
std::pair<size_t, Phrase> key(decodeGraph.GetPosition(), sourcePhrase);
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_transOptCacheMutex);
#endif
std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter
= m_transOptCache.find(key);
if (iter == m_transOptCache.end())
return NULL;
@ -1048,7 +1017,7 @@ void StaticData::ReduceTransOptCache() const
// find cutoff for last used time
priority_queue< clock_t > lastUsedTimes;
std::map<std::pair<const DecodeGraph*, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter;
std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iter;
iter = m_transOptCache.begin();
while( iter != m_transOptCache.end() )
{
@ -1065,7 +1034,7 @@ void StaticData::ReduceTransOptCache() const
{
if (iter->second.second < cutoffLastUsedTime)
{
std::map<std::pair<const DecodeGraph*, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iterRemove = iter++;
std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> >::iterator iterRemove = iter++;
delete iterRemove->second.first;
m_transOptCache.erase(iterRemove);
}
@ -1076,8 +1045,11 @@ void StaticData::ReduceTransOptCache() const
void StaticData::AddTransOptListToCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase, const TranslationOptionList &transOptList) const
{
std::pair<const DecodeGraph*, Phrase> key(&decodeGraph, sourcePhrase);
std::pair<size_t, Phrase> key(decodeGraph.GetPosition(), sourcePhrase);
TranslationOptionList* storedTransOptList = new TranslationOptionList(transOptList);
#ifdef WITH_THREADS
boost::mutex::scoped_lock lock(m_transOptCacheMutex);
#endif
m_transOptCache[key] = make_pair( storedTransOptList, clock() );
ReduceTransOptCache();
}

View File

@ -25,6 +25,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <vector>
#include <map>
#include <memory>
#ifdef WITH_THREADS
#include <boost/thread/mutex.hpp>
#endif
#include "TypeDef.h"
#include "ScoreIndexManager.h"
#include "FactorCollection.h"
@ -46,7 +51,7 @@ namespace Moses
class InputType;
class LexicalReordering;
class GlobalLexicalModel;
class PhraseDictionary;
class PhraseDictionaryFeature;
class GenerationDictionary;
class DistortionScoreProducer;
class WordPenaltyProducer;
@ -61,9 +66,8 @@ private:
protected:
std::map<long,Phrase> m_constraints;
std::vector<PhraseDictionary*> m_phraseDictionary;
std::vector<PhraseDictionaryFeature*> m_phraseDictionary;
std::vector<GenerationDictionary*> m_generationDictionary;
std::vector <DecodeGraph*> m_decodeStepVL;
Parameter *m_parameter;
std::vector<FactorType> m_inputFactorOrder, m_outputFactorOrder;
LMList m_languageModel;
@ -129,7 +133,7 @@ protected:
bool m_PrintAlignmentInfo;
bool m_PrintAlignmentInfoNbest;
mutable std::auto_ptr<SentenceStats> m_sentenceStats;
std::string m_factorDelimiter; //! by default, |, but it can be changed
size_t m_maxFactorIdx[2]; //! number of factors on source and target side
size_t m_maxNumFactors; //! max number of factors on both source and target sides
@ -144,10 +148,13 @@ protected:
size_t m_timeout_threshold; //! seconds after which time out is activated
bool m_useTransOptCache; //! flag indicating, if the persistent translation option cache should be used
mutable std::map<std::pair<const DecodeGraph*, Phrase>, pair<TranslationOptionList*,clock_t> > m_transOptCache; //! persistent translation option cache
mutable std::map<std::pair<size_t, Phrase>, pair<TranslationOptionList*,clock_t> > m_transOptCache; //! persistent translation option cache
size_t m_transOptCacheMaxSize; //! maximum size for persistent translation option cache
mutable const InputType* m_input; //! holds reference to current sentence
//FIXME: Single lock for cache not most efficient. However using a
//reader-writer for LRU cache is tricky - how to record last used time?
#ifdef WITH_THREADS
mutable boost::mutex m_transOptCacheMutex;
#endif
bool m_isAlwaysCreateDirectTranslationOption;
//! constructor. only the 1 static variable can be created
@ -176,9 +183,9 @@ protected:
//! load all generation tables as specified in ini file
bool LoadGenerationTables();
//! load decoding steps
bool LoadMapping();
bool LoadLexicalReorderingModel();
bool LoadGlobalLexicalModel();
void ReduceTransOptCache() const;
public:
@ -228,10 +235,7 @@ public:
return m_outputFactorOrder;
}
const std::vector<DecodeGraph*> &GetDecodeStepVL() const
{
return m_decodeStepVL;
}
std::vector<DecodeGraph*> GetDecodeStepVL(const InputType& source) const;
inline bool GetSourceStartPosMattersForRecombination() const
{
@ -352,7 +356,7 @@ public:
{
return m_phraseDictionary.size();
}
const std::vector<PhraseDictionary*> &GetPhraseDictionaries() const
const std::vector<PhraseDictionaryFeature*> &GetPhraseDictionaries() const
{
return m_phraseDictionary;
}
@ -381,10 +385,7 @@ public:
{
return m_isDetailedTranslationReportingEnabled;
}
void ResetSentenceStats(const InputType& source) const
{
m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
}
bool IsLabeledNBestList() const
{
return m_labeledNBestList;
@ -430,13 +431,9 @@ public:
InputTypeEnum GetInputType() const {return m_inputType;}
SearchAlgorithm GetSearchAlgorithm() const {return m_searchAlgorithm;}
size_t GetNumInputScores() const {return m_numInputScores;}
const InputType* GetInput() const { return m_input; }
void InitializeBeforeSentenceProcessing(InputType const&) const;
void CleanUpAfterSentenceProcessing() const;
SentenceStats& GetSentenceStats() const
{
return *m_sentenceStats;
}
const std::vector<float>& GetAllWeights() const
{
return m_allWeights;
@ -470,7 +467,7 @@ public:
bool GetUseTransOptCache() const { return m_useTransOptCache; }
void AddTransOptListToCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase, const TranslationOptionList &transOptList) const;
void ReduceTransOptCache() const;
const TranslationOptionList* FindTransOptListInCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase) const;
};

View File

@ -62,10 +62,7 @@ void TargetPhrase::WriteToRulePB(hgmert::Rule* pb) const {
}
#endif
void TargetPhrase::SetAlignment()
{
m_alignmentPair.SetIdentityAlignment();
}
void TargetPhrase::SetScore(float score)
{
@ -204,142 +201,14 @@ TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const
return clone;
}
// helper functions
void AddAlignmentElement(AlignmentPhraseInserter &inserter
, const string &str
, size_t phraseSize
, size_t otherPhraseSize
, list<size_t> &uniformAlignment)
{
// input
vector<string> alignPhraseVector = Tokenize(str);
// from
// "(0) (3) (1,2)"
// to
// "(0)" "(3)" "(1,2)"
assert (alignPhraseVector.size() == phraseSize) ;
const size_t inputSize = alignPhraseVector.size();
for (size_t pos = 0 ; pos < inputSize ; ++pos)
{
string alignElementStr = alignPhraseVector[pos];
//change "()" into "(-1)" for both source and target word-to-word alignments
std::string emtpyAlignStr="()";
std::string replaceAlignStr="(-1)";
alignElementStr=Replace(alignElementStr,emtpyAlignStr,replaceAlignStr);
//remove all "(" from both source and target word-to-word alignments
emtpyAlignStr="(";
replaceAlignStr="";
alignElementStr=Replace(alignElementStr,emtpyAlignStr,replaceAlignStr);
//remove all ")" from both source and target word-to-word alignments
emtpyAlignStr=")";
replaceAlignStr="";
alignElementStr=Replace(alignElementStr,emtpyAlignStr,replaceAlignStr);
AlignmentElement *alignElement = new AlignmentElement(Tokenize<AlignmentElementType>(alignElementStr, ","));
// "(1,2)"
// to
// [1] [2]
if (alignElement->GetSize() == 0)
{ // no alignment info. add uniform alignment, ie. can be aligned to any word
alignElement->SetUniformAlignment(otherPhraseSize);
uniformAlignment.push_back(pos);
}
**inserter = alignElement;
(*inserter)++;
}
}
// helper functions
void AddAlignmentElement(AlignmentPhraseInserter &inserter
, const WordAlignments &wa
, size_t phraseSize
, size_t otherPhraseSize
, list<size_t> &uniformAlignment)
{
// from
// "(0) (3) (1,2)"
// to
// "(0)" "(3)" "(1,2)"
assert (wa.size() == phraseSize) ;
const size_t inputSize = wa.size();
for (size_t pos = 0 ; pos < inputSize ; ++pos)
{
string alignElementStr = wa[pos];
AlignmentElement *alignElement = new AlignmentElement(Tokenize<AlignmentElementType>(alignElementStr, ","));
// "(1,2)"
// to
// [1] [2]
if (alignElement->GetSize() == 0)
{ // no alignment info. add uniform alignment, ie. can be aligned to any word
alignElement->SetUniformAlignment(otherPhraseSize);
uniformAlignment.push_back(pos);
}
**inserter = alignElement;
(*inserter)++;
}
}
void TargetPhrase::CreateAlignmentInfo(const WordAlignments &swa
, const WordAlignments &twa)
{
AlignmentPhraseInserter sourceInserter = m_alignmentPair.GetInserter(Input);
AlignmentPhraseInserter targetInserter = m_alignmentPair.GetInserter(Output);
list<size_t> uniformAlignmentSource, uniformAlignmentTarget;
if (!UseWordAlignment()){ //build uniform word-to-word alignment to fit the internal structure which requires their presence
std::string srcAlignStr,trgAlignStr;
UniformAlignment(srcAlignStr, m_sourcePhrase->GetSize(), GetSize());
UniformAlignment(trgAlignStr, GetSize(), m_sourcePhrase->GetSize());
CreateAlignmentInfo(srcAlignStr,trgAlignStr);
}
else{
AddAlignmentElement(sourceInserter
, swa
, m_sourcePhrase->GetSize()
, GetSize()
, uniformAlignmentSource);
AddAlignmentElement(targetInserter
, twa
, GetSize()
, m_sourcePhrase->GetSize()
, uniformAlignmentTarget);
}
// propergate uniform alignments to other side
// m_alignmentPair.GetAlignmentPhrase(Output).AddUniformAlignmentElement(uniformAlignmentSource);
// m_alignmentPair.GetAlignmentPhrase(Input).AddUniformAlignmentElement(uniformAlignmentTarget);
}
void TargetPhrase::CreateAlignmentInfo(const string &sourceStr
, const string &targetStr)
{
AlignmentPhraseInserter sourceInserter = m_alignmentPair.GetInserter(Input);
AlignmentPhraseInserter targetInserter = m_alignmentPair.GetInserter(Output);
list<size_t> uniformAlignmentSource, uniformAlignmentTarget;
AddAlignmentElement(sourceInserter
, sourceStr
, m_sourcePhrase->GetSize()
, GetSize()
, uniformAlignmentSource);
AddAlignmentElement(targetInserter
, targetStr
, GetSize()
, m_sourcePhrase->GetSize()
, uniformAlignmentTarget);
// propergate uniform alignments to other side
// m_alignmentPair.GetAlignmentPhrase(Output).AddUniformAlignmentElement(uniformAlignmentSource);
// m_alignmentPair.GetAlignmentPhrase(Input).AddUniformAlignmentElement(uniformAlignmentTarget);
}
TO_STRING_BODY(TargetPhrase);
@ -347,8 +216,7 @@ std::ostream& operator<<(std::ostream& os, const TargetPhrase& tp)
{
os << static_cast<const Phrase&>(tp);
os << ", pC=" << tp.m_transScore << ", c=" << tp.m_fullScore;
if (tp.PrintAlignmentInfo())
os << ", " << tp.GetAlignmentPair();
return os;
}

View File

@ -25,7 +25,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TypeDef.h"
#include "Phrase.h"
#include "ScoreComponentCollection.h"
#include "AlignmentPair.h"
#if HAVE_CONFIG_H
#include "config.h"
#endif
@ -50,7 +49,6 @@ protected:
float m_transScore, m_ngramScore, m_fullScore;
//float m_ngramScore, m_fullScore;
ScoreComponentCollection m_scoreBreakdown;
AlignmentPair m_alignmentPair;
// in case of confusion net, ptr to source phrase
Phrase const* m_sourcePhrase;
@ -135,20 +133,9 @@ public:
{
return m_sourcePhrase;
}
AlignmentPair &GetAlignmentPair()
{
return m_alignmentPair;
}
const AlignmentPair &GetAlignmentPair() const
{
return m_alignmentPair;
}
/** Parse the alignment info portion of phrase table string to create alignment info */
void CreateAlignmentInfo(const std::string &sourceStr
, const std::string &targetStr);
void CreateAlignmentInfo(const WordAlignments &swa
, const WordAlignments &twa);
void UseWordAlignment(bool a){
wordalignflag=a;

View File

@ -30,7 +30,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Util.h"
#include "TypeDef.h"
#include "ScoreComponentCollection.h"
#include "AlignmentPair.h"
#include "StaticData.h"
namespace Moses

View File

@ -264,14 +264,14 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord,s
targetPhrase.CreateAlignmentInfo("(0)","(0)");
}
else
{
// drop source word. create blank trans opt
targetPhrase.SetAlignment();
//targetPhrase.SetAlignment();
}
transOpt = new TranslationOption(WordsRange(sourcePos, sourcePos + length - 1), targetPhrase, m_source, 0);

View File

@ -32,15 +32,15 @@ fi
echo "Calling $ACLOCAL..."
$ACLOCAL || die "aclocal failed"
$ACLOCAL -I m4 || die "aclocal failed"
echo "Calling $AUTOCONF..."
$AUTOCONF || die "autoconf failed"
$AUTOCONF || die "autoconf failed"
echo "Calling $AUTOMAKE..."
$AUTOMAKE || die "automake failed"
echo
echo "You should now be able to configure and build:"
echo " ./configure [--with-srilm=/path/to/srilm] [--with-irstlm=/path/to/irstlm] [--with-randlm=/path/to/randlm]"
echo " ./configure [--with-srilm=/path/to/srilm] [--with-irstlm=/path/to/irstlm] [--with-randlm=/path/to/randlm] [--with-xmlrpc-c=/path/to/xmlrpc-c-config"
echo " make -j 4"
echo

5
server/Makefile.am Normal file
View File

@ -0,0 +1,5 @@
bin_PROGRAMS = mosesserver
mosesserver_SOURCES = mosesserver.cpp
mosesserver_CPPFLAGS = -W -Wall -I$(top_srcdir)/moses/src $(XMLRPC_C_CPPFLAGS) $(BOOST_CPPFLAGS)
mosesserver_LDADD = -L$(top_srcdir)/moses/src -lmoses $(BOOST_LDFLAGS) $(XMLRPC_C_LIBS) $(BOOST_THREAD_LIB)
mosesserver_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a

23
server/client.perl Executable file
View File

@ -0,0 +1,23 @@
#!/usr/bin/env perl
use XMLRPC::Lite;
$url = "http://localhost:9084/RPC2";
$proxy = XMLRPC::Lite->proxy($url);
#my %param = ("text" => "das ist ein haus das ist ein haus das ist ein haus");
#my %param = ("text" => "je ne sais pas . ");
#my %param = ("text" => "actes pris en application des traités ce euratom dont la publication est obligatoire");
#my %param = ("text" => "actes pris en application des " );
#my %param = ("text" => "je ne sais pas . ", "align" => "true");
my %param = ("text" => "hello !");
$result = $proxy->call("translate",\%param)->result;
print $result->{'text'} . "\n";
if ($result->{'align'}) {
print "Phrase alignments: \n";
$aligns = $result->{'align'};
foreach my $align (@$aligns) {
print $align->{'tgt-start'} . "," . $align->{'src-start'} . ","
. $align->{'src-end'} . "\n";
}
}

163
server/mosesserver.cpp Normal file
View File

@ -0,0 +1,163 @@
#include <cassert>
#include <stdexcept>
#include <iostream>
#include <xmlrpc-c/base.hpp>
#include <xmlrpc-c/registry.hpp>
#include <xmlrpc-c/server_abyss.hpp>
#include "Hypothesis.h"
#include "Manager.h"
#include "StaticData.h"
using namespace Moses;
using namespace std;
class Translator : public xmlrpc_c::method {
public:
Translator() {
// signature and help strings are documentation -- the client
// can query this information with a system.methodSignature and
// system.methodHelp RPC.
this->_signature = "S:S";
this->_help = "Does translation";
}
typedef std::map<std::string, xmlrpc_c::value> params_t;
void
execute(xmlrpc_c::paramList const& paramList,
xmlrpc_c::value * const retvalP) {
const params_t params = paramList.getStruct(0);
paramList.verifyEnd(1);
params_t::const_iterator si = params.find("text");
if (si == params.end()) {
throw xmlrpc_c::fault(
"Missing source text",
xmlrpc_c::fault::CODE_PARSE);
}
const string source(
(xmlrpc_c::value_string(si->second)));
cerr << "Input: " << source << endl;
si = params.find("align");
bool addAlignInfo = (si != params.end());
const StaticData &staticData = StaticData::Instance();
Sentence sentence(Input);
const vector<FactorType> &inputFactorOrder =
staticData.GetInputFactorOrder();
stringstream in(source + "\n");
sentence.Read(in,inputFactorOrder);
Manager manager(sentence,staticData.GetSearchAlgorithm());
manager.ProcessSentence();
const Hypothesis* hypo = manager.GetBestHypothesis();
vector<xmlrpc_c::value> alignInfo;
stringstream out;
outputHypo(out,hypo,addAlignInfo,alignInfo);
map<string, xmlrpc_c::value> retData;
pair<string, xmlrpc_c::value>
text("text", xmlrpc_c::value_string(out.str()));
cerr << "Output: " << out.str() << endl;
if (addAlignInfo) {
retData.insert(pair<string, xmlrpc_c::value>("align", xmlrpc_c::value_array(alignInfo)));
}
retData.insert(text);
*retvalP = xmlrpc_c::value_struct(retData);
}
void outputHypo(ostream& out, const Hypothesis* hypo, bool addAlignmentInfo, vector<xmlrpc_c::value>& alignInfo) {
if (hypo->GetPrevHypo() != NULL) {
outputHypo(out,hypo->GetPrevHypo(),addAlignmentInfo, alignInfo);
TargetPhrase p = hypo->GetTargetPhrase();
for (size_t pos = 0 ; pos < p.GetSize() ; pos++)
{
const Factor *factor = p.GetFactor(pos, 0);
out << *factor << " ";
}
if (addAlignmentInfo) {
/**
* Add the alignment info to the array. This is in target order and consists of
* (tgt-start, src-start, src-end) triples.
**/
map<string, xmlrpc_c::value> phraseAlignInfo;
phraseAlignInfo["tgt-start"] = xmlrpc_c::value_int(hypo->GetCurrTargetWordsRange().GetStartPos());
phraseAlignInfo["src-start"] = xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetStartPos());
phraseAlignInfo["src-end"] = xmlrpc_c::value_int(hypo->GetCurrSourceWordsRange().GetEndPos());
alignInfo.push_back(xmlrpc_c::value_struct(phraseAlignInfo));
}
}
}
};
int main(int argc, char** argv) {
//Extract port and log, send other args to moses
char** mosesargv = new char*[argc+2];
int mosesargc = 0;
int port = 8080;
const char* logfile = "/dev/null";
for (int i = 0; i < argc; ++i) {
if (!strcmp(argv[i],"--server-port")) {
++i;
if (i >= argc) {
cerr << "Error: Missing argument to --server-port" << endl;
exit(1);
} else {
port = atoi(argv[i]);
}
} else if (!strcmp(argv[i],"--server-log")) {
++i;
if (i >= argc) {
cerr << "Error: Missing argument to --server-log" << endl;
exit(1);
} else {
logfile = argv[i];
}
} else {
mosesargv[mosesargc] = new char[strlen(argv[i])+1];
strcpy(mosesargv[mosesargc],argv[i]);
++mosesargc;
}
}
Parameter* params = new Parameter();
if (!params->LoadParam(mosesargc,mosesargv)) {
params->Explain();
exit(1);
}
if (!StaticData::LoadDataStatic(params)) {
exit(1);
}
xmlrpc_c::registry myRegistry;
xmlrpc_c::methodPtr const translator(new Translator);
myRegistry.addMethod("translate", translator);
xmlrpc_c::serverAbyss myAbyssServer(
myRegistry,
port, // TCP port on which to listen
logfile
);
cerr << "Listening on port " << port << endl;
myAbyssServer.run();
// xmlrpc_c::serverAbyss.run() never returns
assert(false);
return 0;
}