- print statistics for confusion nets

- small bug fixes


git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@316 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
zens 2006-07-26 20:33:33 +00:00
parent 1ae613e79c
commit 4022ae95bc
7 changed files with 68 additions and 17 deletions

View File

@ -73,6 +73,18 @@ int main(int argc, char* argv[])
{
timer.start("Starting...");
std::cerr
<<"============================================================================\n"
<<"starting "<<argv[0]<<" (build on "<<__DATE__<<")\n"
<<"============================================================================\n"
<<"\n"
<<"the command line was: \n";
for(int i=0;i<argc;++i) std::cerr<<argv[i]<<" ";
std::cerr
<<"\n"
<<"============================================================================\n";
StaticData staticData;
if (!staticData.LoadParameters(argc, argv))

View File

@ -70,21 +70,21 @@ DIST_SOURCES = $(am__moses_SOURCES_DIST)
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = ${SHELL} /home/ws06/cdyer/cvs-moses-tip/moses-cmd/missing --run aclocal-1.9
ACLOCAL = ${SHELL} /home/ws06/rzens/workspace/Jul26/moses-cmd/missing --run aclocal-1.9
AMDEP_FALSE = #
AMDEP_TRUE =
AMTAR = ${SHELL} /home/ws06/cdyer/cvs-moses-tip/moses-cmd/missing --run tar
AUTOCONF = ${SHELL} /home/ws06/cdyer/cvs-moses-tip/moses-cmd/missing --run autoconf
AUTOHEADER = ${SHELL} /home/ws06/cdyer/cvs-moses-tip/moses-cmd/missing --run autoheader
AUTOMAKE = ${SHELL} /home/ws06/cdyer/cvs-moses-tip/moses-cmd/missing --run automake-1.9
AMTAR = ${SHELL} /home/ws06/rzens/workspace/Jul26/moses-cmd/missing --run tar
AUTOCONF = ${SHELL} /home/ws06/rzens/workspace/Jul26/moses-cmd/missing --run autoconf
AUTOHEADER = ${SHELL} /home/ws06/rzens/workspace/Jul26/moses-cmd/missing --run autoheader
AUTOMAKE = ${SHELL} /home/ws06/rzens/workspace/Jul26/moses-cmd/missing --run automake-1.9
AWK = gawk
BUILD_MYSQL_SUPPORT_FALSE =
BUILD_MYSQL_SUPPORT_TRUE = #
CPPFLAGS = -I/home/ws06/cdyer/boost-stage -I/home/ws06/cdyer/boost-stage/include -I/home/ws06/cdyer/cvs-moses-tip/moses-cmd/../moses/src -I/home/ws06/cdyer/srilm/include
CPPFLAGS = -I/home/ws06/cdyer/boost-stage -I/home/ws06/cdyer/boost-stage/include -I/home/ws06/rzens/workspace/Jul26/moses/src -I/home/ws06/cdyer/srilm/include
CXX = g++
CXXCPP = g++ -E
CXXDEPMODE = depmode=gcc3
CXXFLAGS = -g -O2
CXXFLAGS = -O3 -DNDEBUG
CYGPATH_W = echo
DEFS = -DHAVE_CONFIG_H
DEPDIR = .deps
@ -101,11 +101,11 @@ INTERNAL_LM_FALSE =
INTERNAL_LM_TRUE = #
IRST_LM_FALSE =
IRST_LM_TRUE = #
LDFLAGS = -static -L/home/ws06/cdyer/boost-stage/lib -L/home/ws06/cdyer/boost-stage/stage/lib -L/home/ws06/cdyer/cvs-moses-tip/moses-cmd/../moses/src -L/home/ws06/cdyer/srilm/lib/i686
LDFLAGS = -static -L/home/ws06/cdyer/boost-stage/lib -L/home/ws06/cdyer/boost-stage/stage/lib -L/home/ws06/rzens/workspace/Jul26/moses/src -L/home/ws06/cdyer/srilm/lib/i686
LIBOBJS =
LIBS = -lmoses -loolm -ldstruct -lmisc -lboost_iostreams-gcc-mt -lboost_filesystem-gcc-mt -lboost_thread-gcc-mt -lz
LTLIBOBJS =
MAKEINFO = ${SHELL} /home/ws06/cdyer/cvs-moses-tip/moses-cmd/missing --run makeinfo
MAKEINFO = ${SHELL} /home/ws06/rzens/workspace/Jul26/moses-cmd/missing --run makeinfo
MYSQLCLIENT_CPPFLAGS =
MYSQLCLIENT_LDFLAGS =
MYSQLCLIENT_LIBS =
@ -141,7 +141,7 @@ exec_prefix = ${prefix}
host_alias =
includedir = ${prefix}/include
infodir = ${prefix}/info
install_sh = /home/ws06/cdyer/cvs-moses-tip/moses-cmd/install-sh
install_sh = /home/ws06/rzens/workspace/Jul26/moses-cmd/install-sh
libdir = ${exec_prefix}/lib
libexecdir = ${exec_prefix}/libexec
localstatedir = ${prefix}/var

View File

@ -10,8 +10,46 @@
#include "PhraseDictionaryTreeAdaptor.h"
#include "TranslationOptionCollectionConfusionNet.h"
struct CNStats {
unsigned created,destr,read,colls,words;
CNStats() : created(0),destr(0),read(0),colls(0),words(0) {}
~CNStats() {print(std::cerr);}
void createOne() {++created;}
void destroyOne() {++destr;}
void collect(const ConfusionNet& cn)
{
++read;
colls+=cn.GetSize();
for(size_t i=0;i<cn.GetSize();++i)
words+=cn[i].size();
}
void print(std::ostream& out) const
{
if(created>0)
{
out<<"confusion net statistics:\n"
" created:\t"<<created<<"\n"
" destroyed:\t"<<destr<<"\n"
" succ. read:\t"<<read<<"\n"
" columns:\t"<<colls<<"\n"
" words:\t"<<words<<"\n"
" avg. word/column:\t"<<words/(1.0*colls)<<"\n"
" avg. cols/sent:\t"<<colls/(1.0*read)<<"\n"
"\n\n";
}
}
};
CNStats stats;
ConfusionNet::ConfusionNet(FactorCollection* p)
: InputType(),m_factorCollection(p) {}
: InputType(),m_factorCollection(p) {stats.createOne();}
ConfusionNet::~ConfusionNet() {stats.destroyOne();}
void ConfusionNet::SetFactorCollection(FactorCollection *p)
{
@ -38,7 +76,9 @@ int ConfusionNet::Read(std::istream& in,
FactorCollection &factorCollection)
{
SetFactorCollection(&factorCollection);
return ReadF(in,factorOrder,0);
int rv=ReadF(in,factorOrder,0);
if(rv) stats.collect(*this);
return rv;
}

View File

@ -18,6 +18,7 @@ class ConfusionNet : public InputType {
FactorCollection *m_factorCollection;
public:
ConfusionNet(FactorCollection* p=0);
~ConfusionNet();
void SetFactorCollection(FactorCollection*);

View File

@ -64,6 +64,6 @@ protected:
public:
LmId GetLmID( const std::string &str ) const;
virtual float GetValue(const vector<const Factor*> &contextFactor) const;
virtual float GetValue(const std::vector<const Factor*> &contextFactor) const;
};

View File

@ -135,7 +135,7 @@ bool Parameter::FilesExist(const string &paramName, size_t tokenizeIndex,std::ve
for (iter = pathVec.begin() ; iter != pathVec.end() ; ++iter)
{
StringVec vec = Tokenize(*iter);
if (tokenizeIndex > vec.size())
if (tokenizeIndex >= vec.size())
{
stringstream errorMsg("");
errorMsg << "Expected " << tokenizeIndex << " tokens per"

View File

@ -392,8 +392,6 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const FactorArray &sourc
// unknown word, add to target, and add as poss trans
// float weightWP = m_staticData.GetWeightWordPenalty();
// const FactorArray &sourceWord = m_source.GetFactorArray(sourcePos);
size_t isDigit = 0;
if (dropUnknown)
{