Simple program that illustrates how to access a phrase table on disk from an external program

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3063 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
sanmarf 2010-04-07 10:25:50 +00:00
parent e894097edf
commit d30212f19d
11 changed files with 6582 additions and 0 deletions

1
misc/pmoses/AUTHORS Normal file
View File

@ -0,0 +1 @@
Felipe Sánchez-Martínez -- Universtat d'Alacant (Spain)

1
misc/pmoses/COPYING Symbolic link
View File

@ -0,0 +1 @@
/usr/share/automake-1.10/COPYING

0
misc/pmoses/ChangeLog Normal file
View File

1
misc/pmoses/INSTALL Symbolic link
View File

@ -0,0 +1 @@
/usr/share/automake-1.10/INSTALL

8
misc/pmoses/Makefile.am Normal file
View File

@ -0,0 +1,8 @@
# author: Felipe Sánchez Martínez
bin_PROGRAMS = pmoses
EXTRA_DIST = autogen.sh
pmoses_SOURCES = pmoses.cc
pmoses_LDADD = -lz

0
misc/pmoses/NEWS Normal file
View File

4
misc/pmoses/README Normal file
View File

@ -0,0 +1,4 @@
Simple program that illustrates how to access a phrase table on disk from an
external program.

47
misc/pmoses/autogen.sh Executable file
View File

@ -0,0 +1,47 @@
#! /bin/sh
# $Id: autogen.sh,v 1.1.1.1 2006/10/23 16:21:52 sanmarf Exp $
#
# Copyright (c) 2002 Daniel Elstner <daniel.elstner@gmx.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License VERSION 2 as
# published by the Free Software Foundation. You are not allowed to
# use any other version of the license; unless you got the explicit
# permission from the author to do so.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
dir=`echo "$0" | sed 's,[^/]*$,,'`
test "x${dir}" = "x" && dir='.'
if test "x`cd "${dir}" 2>/dev/null && pwd`" != "x`pwd`"
then
echo "This script must be executed directly from the source directory."
exit 1
fi
rm -f config.cache acconfig.h
echo "- aclocal." && \
aclocal && \
echo "- autoconf." && \
autoconf && \
echo "- autoheader." && \
autoheader && \
echo "- automake." && \
automake --add-missing --gnu
echo
echo "You should now be able to configure and build:"
echo " $ ./configure --with-srilm=/path/to/srilm --with-moses=/path/to/moses"
echo " $ make"
echo

6276
misc/pmoses/configure vendored Executable file

File diff suppressed because it is too large Load Diff

116
misc/pmoses/configure.ac Normal file
View File

@ -0,0 +1,116 @@
# -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.
AC_PREREQ(2.52)
AC_INIT([pmoses], [0.1.0], [fsanchez@dlsi.ua.es])
AC_CONFIG_HEADERS([config.h])
# Release versioning
GENERIC_MAJOR_VERSION=0
GENERIC_MINOR_VERSION=1
GENERIC_MICRO_VERSION=0
GENERIC_API_VERSION=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION
AC_SUBST(GENERIC_API_VERSION)
PACKAGE=pmoses
GENERIC_VERSION=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION.$GENERIC_MICRO_VERSION
GENERIC_RELEASE=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION
AC_SUBST(GENERIC_RELEASE)
AC_SUBST(GENERIC_VERSION)
VERSION=$GENERIC_VERSION
AM_INIT_AUTOMAKE($PACKAGE, $VERSION, no-define)
# Checks for programs.
AC_MSG_CHECKING([Compilation architecture: PPC, i686, Other])
ARCH=$(/bin/uname -m)
if test x$ARCH = xppc
then
AC_MSG_RESULT([PowerPC])
CFLAGS="$CFLAGS -Wall -ansi -mpowerpc -maltivec -fno-pic -fomit-frame-pointer"
CXXFLAGS="$CXXFLAGS -Wall -ansi -mpowerpc -maltivec -fno-pic -fomit-frame-pointer"
else
if test x$ARCH = xi686
then
AC_MSG_RESULT([i686])
CFLAGS="$CFLAGS -Wall -ansi -march=i686 -O3 -fno-pic -fomit-frame-pointer"
CXXFLAGS="$CXXFLAGS -Wall -ansi -march=i686 -O3 -fno-pic -fomit-frame-pointer"
else
AC_MSG_RESULT([Other])
CFLAGS="$CFLAGS -Wall -ansi -O3"
CXXFLAGS="$CXXFLAGS -Wall -ansi -O3"
fi
fi
AC_PROG_CXX
AM_SANITY_CHECK
AC_LANG_CPLUSPLUS
AC_ARG_ENABLE(debug,
[ --enable-debug Enable "-g -Wall" compiler options],
[CXXFLAGS="-g -Wall"; CFLAGS="-g -Wall"])
AC_ARG_ENABLE(profile,
[ --enable-profile Enable "-pg -g -Wall" compiler options],
[CXXFLAGS="-pg -g -Wall"; CFLAGS="-pg -g -Wall"; LDFLAGS="-pg"])
AM_CONDITIONAL([SRI_LM], false)
AC_ARG_WITH(srilm,
[AC_HELP_STRING([--with-srilm=PATH], [(required) path to the SRI LM toolkit])],
[with_srilm=$withval],
[with_srilm=no]
)
if test "x$with_srilm" != 'xno'
then
CXXFLAGS="$CXXFLAGS -I${with_srilm}/include"
AC_CHECK_HEADER(Ngram.h,
[AC_DEFINE([HAVE_SRILM], [], [flag for SRILM])],
[AC_MSG_ERROR([Cannot find SRILM!])])
LIB_SRILM="-loolm -ldstruct -lmisc"
MY_ARCH=`${with_srilm}/sbin/machine-type`
LDFLAGS="$LDFLAGS -L${with_srilm}/lib/${MY_ARCH}"
LIBS="$LIBS $LIB_SRILM"
FMTLIBS="$FMTLIBS liboolm.a libdstruct.a libmisc.a"
AM_CONDITIONAL([SRI_LM], true)
fi
if test "x$with_moses" != 'xno'
then
CXXFLAGS="$CXXFLAGS -I${with_moses}/moses/src"
AC_CHECK_HEADER(PhraseDictionaryTreeAdaptor.h,
[AC_DEFINE([HAVE_MOSES], [], [flag for MOSES])],
[AC_MSG_ERROR([Cannot find MOSES!])])
LIB_MOSES="-lmoses"
LDFLAGS="$LDFLAGS -L${with_moses}/moses/src"
LIBS="$LIB_MOSES $LIBS"
FMTLIBS="$FMTLIBS libmoses.a"
AM_CONDITIONAL([MOSES], true)
fi
# Checks for header files.
AC_HEADER_STDC
AC_CHECK_HEADERS([getopt.h])
# Checks for typedefs, structures, and compiler characteristics.
AC_HEADER_STDBOOL
AC_C_CONST
AC_TYPE_SIZE_T
#AC_CHECK_DECLS([fread_unlocked, fwrite_unlocked, fgetc_unlocked, fputc_unlocked, fputs_unlocked, getopt_long])
AC_CHECK_DECLS([getopt_long])
AC_CHECK_FUNCS([setlocale])
AC_OUTPUT([Makefile])

128
misc/pmoses/pmoses.cc Normal file
View File

@ -0,0 +1,128 @@
/*
* Copyright (C) 2009 Felipe Sánchez-Martínez
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
* 02111-1307, USA.
*/
#include <string>
#include <vector>
#include "TypeDef.h"
#include "PhraseDictionaryTreeAdaptor.h"
#include "Phrase.h"
#include "TargetPhraseCollection.h"
#include "LMList.h"
#include "ScoreComponentCollection.h"
using namespace std;
using namespace Moses;
//Delete white spaces from the end and the begining of the string
string trim(string str) {
string::iterator it;
while ((str.length()>0)&&((*(it=str.begin()))==' ')) {
str.erase(it);
}
while ((str.length()>0)&&((*(it=(str.end()-1)))==' ')) {
str.erase(it);
}
for(unsigned i=0; i<str.length(); i++) {
if ((str[i]==' ') && ((i+1)<str.length()) && (str[i+1]==' ')) {
str=str.erase(i,1);
i--;
}
}
return str;
}
int main (int argc, char *argv[]) {
vector<FactorType> input, output;
vector<float> weight;
int numScoreComponent=5;
int numInputScores=0;
int tableLimit=0;
int weightWP=0;
LMList lmList;
input.push_back(0);
output.push_back(0);
weight.push_back(0);
weight.push_back(0);
weight.push_back(0);
weight.push_back(0);
weight.push_back(0);
if (argc<3) {
cerr<<"Error: Wrong number of parameters."<<endl;
cerr<<"Sintax: "<<argv[0]<<" /path/to/phrase/table source phrase"<<endl;
exit(EXIT_FAILURE);
}
string filePath=argv[1];
string source_str="";
for(unsigned i=2; i<argc; i++) {
if (source_str.length()>0) source_str+=" ";
source_str+=argv[i];
}
cerr<<"numScoreComponent: "<<numScoreComponent<<endl;
cerr<<"numInputScores: "<<numInputScores<<endl;
PhraseDictionaryTreeAdaptor *pd=new PhraseDictionaryTreeAdaptor(numScoreComponent, numInputScores);
cerr<<"Table limit: "<<tableLimit<<endl;
cerr<<"WeightWordPenalty: "<<weightWP<<endl;
cerr<<"Source phrase: ___"<<source_str<<"___"<<endl;
if (!pd->Load(input, output, filePath, weight, tableLimit, lmList, weightWP)) {
delete pd;
return false;
}
cerr<<"-------------------------------------------------"<<endl;
FactorDirection direction;
Phrase phrase(direction);
phrase.CreateFromString(input, source_str, "|");
TargetPhraseCollection *tpc = (TargetPhraseCollection*) pd->GetTargetPhraseCollection(phrase);
if (tpc == NULL)
cerr<<"Not found."<<endl;
else {
TargetPhraseCollection::iterator iterTargetPhrase;
for (iterTargetPhrase = tpc->begin(); iterTargetPhrase != tpc->end(); ++iterTargetPhrase) {
//cerr<<(*(*iterTargetPhrase))<<endl;
stringstream strs;
strs<<static_cast<const Phrase&>(*(*iterTargetPhrase));
cerr<<source_str<<" => ___"<<trim(strs.str())<<"___ ";
ScoreComponentCollection scc = (*iterTargetPhrase)->GetScoreBreakdown();
cerr<<"Scores: ";
for(unsigned i=0; i<scc.size(); i++) {
cerr<<scc[i]<<" ";
}
cerr<<endl;
}
}
cerr<<"-------------------------------------------------"<<endl;
}