moved class InputType to InputType.h;

added more detailed statistics-keeping to SentenceStats;
added custom malloc/realloc (see Util.h);
some commenting


git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@666 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
eherbst 2006-08-11 21:04:38 +00:00
parent c36cc65611
commit 5cb683cd7e
57 changed files with 469 additions and 319 deletions

View File

@ -4,7 +4,7 @@
<cdtproject id="org.eclipse.cdt.managedbuilder.core.managedMake">
<extension id="org.eclipse.cdt.managedbuilder.core.ManagedBuildManager" point="org.eclipse.cdt.core.ScannerInfoProvider"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.domsourceindexer" point="org.eclipse.cdt.core.CIndexer"/>
<extension id="org.eclipse.cdt.core.nullindexer" point="org.eclipse.cdt.core.CIndexer"/>
<data>
<item id="org.eclipse.cdt.core.pathentry">
<pathentry kind="src" path=""/>

View File

@ -1,2 +1,6 @@
configure
.cvsignore
autom4te.cache
aclocal.m4
Debug*
Release*

View File

@ -135,7 +135,7 @@ int main(int argc, const char **argv)
double logPr=0,PP=0,PPwp=0,Pr;
int bos=ng.dict->encode(ng.dict->BoS());
lmt.init_prcache();
lmt.init_probcache();
while(inptxt >> ng){
// reset ngram at begin of sentence

View File

@ -18,29 +18,10 @@
******************************************************************************/
/*
IrstLM: IRST Language Model Toolkit
Copyright (C) 2006 Marcello Federico, ITC-irst Trento, Italy
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef MF_DICTIONARY_H
#define MF_DICTIONARY_H
#include <string.h>
#include <cstring>
#include <iostream>
#define MAX_WORD 100
@ -54,7 +35,6 @@
#define DICT_INITSIZE 100000
#endif
//Begin of sentence symbol
#ifndef BOS_
#define BOS_ "<s>"
@ -66,12 +46,11 @@
#define EOS_ "</s>"
#endif
//End of sentence symbol
//Out-Of-Vocabulary symbol
#ifndef OOV_
#define OOV_ "_unk_"
#endif
typedef struct{
char *word;
int code;
@ -93,7 +72,7 @@ class dictionary{
char ifl; //!< increment flag
int dubv; //!< dictionary size upper bound
int in_oov_lex; //!< flag
int oov_lex_code; //< dictionary
int oov_lex_code; //!< dictionary
char* oov_str; //!< oov string
public:
@ -150,8 +129,6 @@ class dictionary{
int oovfreq=(int)(oovrate * totfreq());
std::cerr << "setting OOV rate to: " << oovrate << " -- freq= " << oovfreq << std::endl;
return freq(oovcode(),oovfreq);
return 1;
}

View File

@ -19,7 +19,7 @@
******************************************************************************/
#include <iostream>
#include <assert.h>
#include <cassert>
#include "mempool.h"
#include "htable.h"
@ -91,7 +91,7 @@ char *htable::search(char *item, HT_ACTION action)
||
action == HT_FIND /* not found, search only */
||
(q = (entry *)memory->alloc())
(q = (entry *)memory->allocate())
==
NULL /* not found, no room */
)
@ -136,13 +136,13 @@ char *htable::scan(HT_ACTION action){
void htable::map(ostream& co,int cols){
entry *p;
char* img=new char[cols+1];
img[cols]='\0';
memset(img,'.',cols);
co << "htable memory map: . (0 items), - (<5), # (>5)\n";
for (int i=0; i<size;i++)
@ -186,7 +186,6 @@ htable::~htable()
delete memory;
}
address htable::HashStr(char *key)
{
char *Key=(htype==STRPTR? *(char **)key:key);
@ -253,7 +252,6 @@ address htable::HashInt(char *key)
return h;
}
int htable::CompStr(char *key1, char *key2)
{
assert(key1 && key2);
@ -329,10 +327,3 @@ htable *ht=new htable(1000/5);
ht2->map();
}
*/

View File

@ -19,11 +19,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
******************************************************************************/
#include <iostream>
#include <fstream>
#include <stdexcept>
#include <assert.h>
#include <cassert>
#include "math.h"
#include "mempool.h"
#include "htable.h"
@ -31,7 +28,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "dictionary.h"
#include "n_gram.h"
#include "lmtable.h"
using namespace std;
inline void error(char* message){

View File

@ -23,7 +23,7 @@
// Copyright Marcello Federico, ITC-irst, 1998
#include <iostream>
#include <assert.h>
#include <cassert>
#include "mempool.h"
using namespace std;
@ -73,7 +73,7 @@ mempool::mempool(int is, int bs){
}
char * mempool::alloc(){
char * mempool::allocate(){
char *ptr;
@ -116,7 +116,7 @@ char * mempool::alloc(){
}
int mempool::free(char* addr){
int mempool::freemem(char* addr){
// do not check if it belongs to this pool !!
/*
@ -389,13 +389,13 @@ storage::~storage(){
}
char *storage::alloc(int size){
char *storage::allocate(int size){
if (size<=setsize){
if (!poolset[size]){
poolset[size]=new mempool(size,poolsize/size);
}
return poolset[size]->alloc();
return poolset[size]->allocate();
}
else{
@ -412,7 +412,7 @@ char *storage::alloc(int size){
char *storage::realloc(char *oldptr,int oldsize,int newsize){
char *storage::reallocate(char *oldptr,int oldsize,int newsize){
char *newptr;
@ -422,7 +422,7 @@ char *storage::realloc(char *oldptr,int oldsize,int newsize){
if (newsize<=setsize){
if (!poolset[newsize])
poolset[newsize]=new mempool(newsize,poolsize/newsize);
newptr=poolset[newsize]->alloc();
newptr=poolset[newsize]->allocate();
memset((char*)newptr,0,newsize);
}
else
@ -430,11 +430,11 @@ char *storage::realloc(char *oldptr,int oldsize,int newsize){
if (oldptr && oldsize){
memcpy(newptr,oldptr,oldsize);
poolset[oldsize]->free(oldptr);
poolset[oldsize]->freemem(oldptr);
}
}
else{
newptr=(char *)std::realloc(oldptr,newsize);
newptr=(char *)realloc(oldptr,newsize);
if (newptr==oldptr)
cerr << "r\b";
else
@ -450,7 +450,7 @@ char *storage::realloc(char *oldptr,int oldsize,int newsize){
}
int storage::free(char *addr,int size){
int storage::freemem(char *addr,int size){
/*
while(size<=setsize){
@ -463,7 +463,7 @@ int storage::free(char *addr,int size){
if (size>setsize)
return free(addr),1;
else{
poolset[size] && poolset[size]->free(addr);
poolset[size] && poolset[size]->freemem(addr);
}
return 1;
}

View File

@ -74,10 +74,10 @@ class mempool{
void map(std::ostream& co);
//! Allocates a single memory entry
char *alloc();
char *allocate();
//! Frees a single memory entry
int free(char* addr);
int freemem(char* addr);
//! Prints statistics about this mempool
void stat();
@ -153,29 +153,20 @@ class storage{
//! Destroys storage
~storage();
/* names of below functions have been changed so as not to interfere with macros for malloc/realloc/etc -- EVH */
//! Allocates memory
char *alloc(int size);
char *allocate(int size);
//! Realloc memory
char *realloc(char *oldptr,int oldsize,int newsize);
char *reallocate(char *oldptr,int oldsize,int newsize);
//! Frees memory of an entry
int free(char *addr,int size=0);
int freemem(char *addr,int size=0);
//! Prints statistics about storage
void stat();
};
#endif

View File

@ -19,7 +19,7 @@
******************************************************************************/
#include <iomanip>
#include <assert.h>
#include <cassert>
#include "mempool.h"
#include "htable.h"
#include "dictionary.h"

View File

@ -19,11 +19,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
******************************************************************************/
#include <iostream>
#include <fstream>
#include <stdexcept>
#include <assert.h>
#include <cassert>
#include "math.h"
#include "mempool.h"
#include "htable.h"
@ -74,7 +71,7 @@ char* ngramcache::get(const int* ngp,char* info){
int ngramcache::add(const int* ngp,const char* info){
char* entry=mp->alloc();
char* entry=mp->allocate();
memcpy(entry,(char*) ngp,sizeof(int) * ngsize);
memcpy(entry + ngsize * sizeof(int),(char *)info,infosize);
char *found=ht->search((char *)entry,HT_ENTER);

View File

@ -1,4 +1,10 @@
Makefile
stamp-h1
config.status
.cvsignore
config.h
autom4te.cache
.cdtproject
gmon.out
Debug*
Release*
gmon.out

View File

@ -67,7 +67,9 @@ InputType*IOCommandLine::GetInput(InputType* in)
return InputOutput::GetInput(in,std::cin,m_inputFactorOrder, m_factorCollection);
}
// help fn
/***
* print surface factor only for the given phrase
*/
void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
{
assert(outputFactorOrder.size() > 0);
@ -99,17 +101,16 @@ void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<
if ( hypo != NULL)
{
OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSourceSpan, reportAllFactors);
OutputSurface(out, hypo->GetPhrase(), outputFactorOrder, reportAllFactors);
OutputSurface(out, hypo->GetTargetPhrase(), outputFactorOrder, reportAllFactors);
if (reportSourceSpan == true
&& hypo->GetPhrase().GetSize() > 0) {
&& hypo->GetTargetPhrase().GetSize() > 0) {
out << "|" << hypo->GetCurrSourceWordsRange().GetStartPos()
<< "-" << hypo->GetCurrSourceWordsRange().GetEndPos() << "| ";
}
}
}
void IOCommandLine::Backtrack(const Hypothesis *hypo){
if (hypo->GetPrevHypo() != NULL) {
@ -207,4 +208,3 @@ void IOCommandLine::SetNBest(const LatticePathList &nBestList, long translationI
m_nBestFile<<std::flush;
}

View File

@ -52,6 +52,11 @@ protected:
const FactorMask &m_inputFactorUsed;
FactorCollection &m_factorCollection;
std::ofstream m_nBestFile;
/***
* if false, print all factors for best hypotheses (useful for error analysis)
*/
bool m_printSurfaceOnly;
public:
IOCommandLine(const std::vector<FactorType> &inputFactorOrder
, const std::vector<FactorType> &outputFactorOrder
@ -82,5 +87,4 @@ inline Sentence *GetInput(std::istream &inputStream
#endif
}
#endif

View File

@ -138,6 +138,8 @@ int main(int argc, char* argv[])
if (staticData.IsDetailedTranslationReportingEnabled()) {
TranslationAnalysis::PrintTranslationAnalysis(std::cerr, manager.GetBestHypothesis());
}
manager.CalcDecoderStatistics(staticData);
staticData.CleanUpAfterSentenceProcessing();
}
@ -198,4 +200,3 @@ InputOutput *GetInputOutput(StaticData &staticData)
return inputOutput;
}

View File

@ -1,5 +1,9 @@
// $Id$
/*
* also see moses/SentenceStats
*/
#ifndef _TRANSLATION_ANALYSIS_H_
#define _TRANSLATION_ANALYSIS_H_

View File

@ -1,3 +1,12 @@
.cdtproject
.cvsignore
autom4te.cache
stamp-h1
Makefile.in
config.h
config.status
Makefile
Debug*
Release*
.deps*
.deps
configure

View File

@ -5,13 +5,12 @@
#include <vector>
#include <iostream>
#include "Word.h"
#include "Input.h"
#include "InputType.h"
class FactorCollection;
class TranslationOptionCollection;
class Sentence;
class ConfusionNet : public InputType {
public:
typedef std::vector<std::pair<Word,float> > Column;
@ -40,8 +39,8 @@ class ConfusionNet : public InputType {
int Read(std::istream& in,const std::vector<FactorType>& factorOrder, FactorCollection &factorCollection);
Phrase GetSubString(const WordsRange&) const;
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //not well defined yet
Phrase GetSubString(const WordsRange&) const; //TODO not defined
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const; //TODO not defined
const FactorArray& GetFactorArray(size_t pos) const;

View File

@ -13,7 +13,7 @@ using namespace std;
* later for computing the score.
*
* default type is Msd, meaning will distinguish between monotone, swap, discontinuous rather than
* just monotone/non monotone.
* just monotone/non-monotone.
*/
int DistortionOrientation::GetOrientation(const Hypothesis *curr_hypothesis, int direction, int type)
{
@ -24,8 +24,8 @@ int DistortionOrientation::GetOrientation(const Hypothesis *curr_hypothesis, int
size_t curr_source_start = currSourceRange.GetStartPos();
size_t curr_source_end = currSourceRange.GetEndPos();
size_t curr_target_end = currTargetRange.GetEndPos();
size_t prev_source_start = NULL;
size_t prev_source_end = NULL;
size_t prev_source_start = 0;
size_t prev_source_end = 0;
if(prevHypo!=NULL){
//don't look for attributes of the previous hypothesis if there is no previous hypothesis.
const WordsRange &prevSourceRange = prevHypo->GetCurrSourceWordsRange();

View File

@ -18,6 +18,7 @@ You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <cassert>
#include <iostream>
#include <limits>
@ -30,13 +31,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "SquareMatrix.h"
#include "LexicalReordering.h"
#include "StaticData.h"
#include "Input.h"
#include "InputType.h"
#include "LMList.h"
#include "hash.h"
using namespace std;
unsigned int Hypothesis::s_numNodes = 0;
unsigned int Hypothesis::s_HypothesesCreated = 0;
ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
@ -100,7 +100,7 @@ Hypothesis::~Hypothesis()
m_arcList->clear();
delete m_arcList;
m_arcList = 0;
m_arcList = NULL;
}
}
@ -325,7 +325,7 @@ void Hypothesis::CalcScore(const StaticData& staticData, const SquareMatrix &fut
//LEXICAL REORDERING COST
std::vector<LexicalReordering*> m_reorderModels = staticData.GetReorderModels();
for(int i = 0; i < m_reorderModels.size(); i++)
for(unsigned int i = 0; i < m_reorderModels.size(); i++)
{
m_scoreBreakdown.PlusEquals(m_reorderModels[i], m_reorderModels[i]->CalcScore(this));
}
@ -405,7 +405,6 @@ void Hypothesis::PrintHypothesis(const InputType &source, float /*weightDistorti
TRACE_ERR("\tscore "<<m_totalScore - m_futureScore<<" + future cost "<<m_futureScore<<" = "<<m_totalScore<<endl);
TRACE_ERR( "\tunweighted feature scores: " << m_scoreBreakdown << endl);
//PrintLMScores();
}
TO_STRING_BODY(Hypothesis)

View File

@ -33,7 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "LanguageModelSingleFactor.h"
#include "ScoreComponentCollection.h"
#include "LexicalReordering.h"
#include "Input.h"
#include "InputType.h"
#include "ObjectPool.h"
class SquareMatrix;
@ -95,8 +95,8 @@ public:
return s_objectPool;
}
static unsigned int s_HypothesesCreated; /**< statistics: how many hypotheses were created in total */
static unsigned int s_numNodes; /**< statistics: how many hypotheses were created in total */
static unsigned int s_HypothesesCreated; // Statistics: how many hypotheses were created in total
int m_id; /**< numeric ID of this hypothesis, used for logging */
/** used by initial seeding of the translation process */
@ -108,7 +108,6 @@ public:
/** return the subclass of Hypothesis most appropriate to the given translation option */
static Hypothesis* Create(const Hypothesis &prevHypo, const TranslationOption &transOpt);
/** return the subclass of Hypothesis most appropriate to the given target phrase */
static Hypothesis* Create(const WordsBitmap &initialCoverage);
/** return the subclass of Hypothesis most appropriate to the given target phrase */
@ -127,20 +126,19 @@ public:
void PrintHypothesis( const InputType &source, float weightDistortion, float weightWordPenalty) const;
/** returns target phrase used to create this hypothesis */
/** return target phrase used to create this hypothesis */
const Phrase &GetTargetPhrase() const
{
return m_targetPhrase;
}
// void PrintLMScores(const LMList &lmListInitial, const LMList &lmListEnd) const;
/** returns input positions covered by the translation option (phrasal translation) used to create this hypothesis */
/** return input positions covered by the translation option (phrasal translation) used to create this hypothesis */
inline const WordsRange &GetCurrSourceWordsRange() const
{
return m_currSourceWordsRange;
}
/** returns ouput word positions of the translation option (phrasal translation) used to create this hypothesis */
inline const WordsRange &GetCurrTargetWordsRange() const
{
return m_currTargetWordsRange;
@ -166,11 +164,7 @@ public:
{
return m_currTargetWordsRange.GetEndPos() + 1;
}
/** same as GetTargetPhrase() */
inline const Phrase &GetPhrase() const
{
return m_targetPhrase;
}
inline const InputType &GetSourcePhrase() const
{
return m_sourceInput;
@ -231,7 +225,7 @@ public:
{
m_prevHypo->ToStream(out);
}
out << GetPhrase();
out << GetTargetPhrase();
}
TO_STRING;

View File

@ -58,7 +58,7 @@ void HypothesisCollection::Add(Hypothesis *hypo)
m_worstScore = m_bestScore + m_beamThreshold;
}
// Prune only of stack is twice as big as needed (lazy pruning)
// Prune only if stack is twice as big as needed (lazy pruning)
if (m_hypos.size() > 2*m_maxHypoStackSize-10)
{
PruneToSize(m_maxHypoStackSize);
@ -86,7 +86,7 @@ void HypothesisCollection::AddPrune(Hypothesis *hypo)
return;
}
StaticData::Instance()->GetSentenceStats().numRecombinations++;
StaticData::Instance()->GetSentenceStats().AddRecombination(*hypo, **iter);
// found existing hypo with same target ending.
// keep the best 1
@ -144,7 +144,7 @@ void HypothesisCollection::PruneToSize(size_t newSize)
// cerr << endl;
++iter;
}
// cerr << "Heap contains " << bestScores.size() << " items" << endl;
// cerr << "Heap contains " << bestScores.size() << " items" << endl;
// pop the top newSize scores (and ignore them, these are the scores of hyps that will remain)
// ensure to never pop beyond heap size
@ -168,7 +168,7 @@ void HypothesisCollection::PruneToSize(size_t newSize)
{
iterator iterRemove = iter++;
Remove(iterRemove);
StaticData::Instance()->GetSentenceStats().numPruned++;
StaticData::Instance()->GetSentenceStats().AddPruning();
}
else
{
@ -179,6 +179,7 @@ void HypothesisCollection::PruneToSize(size_t newSize)
// set the worstScore, so that newly generated hypotheses will not be added if worse than the worst in the stack
m_worstScore = scoreThreshold;
// cerr << "Heap contains " << bestScores.size() << " items" << endl;
}
}

View File

@ -1,6 +1,6 @@
// $Id$
#include "InputOutput.h"
#include "Input.h"
#include "InputType.h"
InputOutput::InputOutput() : sentenceId(0) {}

View File

@ -1,5 +1,4 @@
#include "Input.h"
#include "InputType.h"
InputType::InputType(long translationId) : m_translationId(translationId) {}
InputType::~InputType() {}

View File

@ -41,7 +41,7 @@ protected:
virtual TranslationOptionCollection* CreateTranslationOptionCollection() const=0;
virtual Phrase GetSubString(const WordsRange&) const =0;
virtual std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const=0;
// virtual std::string GetStringRep(const WordsRange&) const=0;
virtual const FactorArray& GetFactorArray(size_t pos) const=0;
TO_STRING;

View File

@ -19,17 +19,14 @@ License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <assert.h>
#include <cassert>
#include <limits>
#include <iostream>
#include <fstream>
#include "dictionary.h"
#include "n_gram.h"
#include "lmtable.h"
#include "LanguageModel_IRST.h"
#include "TypeDef.h"
#include "Util.h"
@ -139,8 +136,8 @@ float LanguageModel_IRST::GetValue(const vector<FactorArrayWrapper> &contextFact
size_t count = contextFactor.size();
m_lmtb_ng->size=0;
if (count< (m_lmtb_size-1)) m_lmtb_ng->pushc(m_lmtb_sentenceEnd);
if (count< m_lmtb_size) m_lmtb_ng->pushc(m_lmtb_sentenceStart);
if (count< (size_t)(m_lmtb_size-1)) m_lmtb_ng->pushc(m_lmtb_sentenceEnd);
if (count< (size_t)m_lmtb_size) m_lmtb_ng->pushc(m_lmtb_sentenceStart);
for (size_t i = 0 ; i < count ; i++)
{

View File

@ -71,5 +71,4 @@ public:
const void CleanUpAfterSentenceProcessing();
const void InitializeBeforeSentenceProcessing();
};
};

View File

@ -19,11 +19,10 @@ License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <assert.h>
#include <cassert>
#include <limits>
#include <iostream>
#include <fstream>
#include "Ngram.h"
#include "Vocab.h"
@ -32,7 +31,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Util.h"
#include "FactorCollection.h"
#include "Phrase.h"
using namespace std;
LanguageModel_SRI::LanguageModel_SRI(bool registerScore)

View File

@ -2,14 +2,13 @@
#include <iostream>
#include <limits>
#include <assert.h>
#include <cassert>
#include <vector>
#include "LexicalReordering.h"
#include "InputFileStream.h"
#include "DistortionOrientation.h"
#include "StaticData.h"
using namespace std;
/*
@ -21,7 +20,7 @@ LexicalReordering::LexicalReordering(const std::string &filename,
int orientation, int direction,
int condition, const std::vector<float>& weights,
vector<FactorType> input, vector<FactorType> output) :
m_orientation(orientation), m_condition(condition), m_filename(filename), m_numberscores(weights.size()), m_sourceFactors(input), m_targetFactors(output)
m_orientation(orientation), m_condition(condition), m_numberscores(weights.size()), m_filename(filename), m_sourceFactors(input), m_targetFactors(output)
{
//add score producer
const_cast<ScoreIndexManager&>(StaticData::Instance()->GetScoreIndexManager()).AddScoreProducer(this);
@ -113,7 +112,7 @@ std::vector<float> LexicalReordering::CalcScore(Hypothesis *hypothesis)
{
std::vector<float> score(m_numberscores, 0);
vector<float> val;
for(int i=0; i < m_direction.size(); i++)
for(unsigned int i=0; i < m_direction.size(); i++)
{
int direction = m_direction[i];
int orientation = DistortionOrientation::GetOrientation(hypothesis, direction);

View File

@ -67,8 +67,8 @@ private:
// different numbers of probabilities for different ranges of
// orientation variable
static const int MSD_NUM_PROBS = 6;
static const int MONO_NUM_PROBS = 4;
static const unsigned int MSD_NUM_PROBS = 6;
static const unsigned int MONO_NUM_PROBS = 4;
int m_orientation; // msd or monotone
std::vector<int> m_direction; // contains forward, backward, or both (bidirectional)

View File

@ -16,7 +16,7 @@ libmoses_a_SOURCES = \
hash.cpp \
Hypothesis.cpp \
HypothesisCollection.cpp \
Input.cpp \
InputType.cpp \
InputFileStream.cpp \
InputOutput.cpp \
LMList.cpp \

View File

@ -55,12 +55,12 @@ Manager::~Manager()
}
/**
* This is the main decoder loop that translates a sentence by expanding
* Main decoder loop that translates a sentence by expanding
* hypotheses stack by stack, until the end of the sentence.
*/
void Manager::ProcessSentence()
{
m_staticData.GetSentenceStats().ZeroAll();
m_staticData.ResetSentenceStats(m_source);
list < DecodeStep* > &decodeStepList = m_staticData.GetDecodeStepList();
// create list of all possible translations
// this is only valid if:
@ -97,7 +97,7 @@ void Manager::ProcessSentence()
// some logging
if (m_staticData.GetVerboseLevel() > 0) {
//OutputHypoStack();
OutputHypoStackSize();
OutputHypoStackSize();
}
}
@ -146,7 +146,7 @@ void Manager::ProcessOneHypothesis(const Hypothesis &hypothesis)
// if there are reordering limits, make sure it is not violated
// the coverage bitmap is handy here (and the position of the first gap)
const WordsBitmap hypoBitmap = hypothesis.GetWordsBitmap();
const size_t hypoWordCount = hypoBitmap.GetWordsCount()
const size_t hypoWordCount = hypoBitmap.GetNumWordsCovered()
, hypoFirstGapPos = hypoBitmap.GetFirstGapPos()
, sourceSize = m_source.GetSize();
@ -226,7 +226,7 @@ void Manager::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOp
}
// add to hypothesis stack
size_t wordsTranslated = newHypo->GetWordsBitmap().GetWordsCount();
size_t wordsTranslated = newHypo->GetWordsBitmap().GetNumWordsCovered();
m_hypoStack[wordsTranslated].AddPrune(newHypo);
}
@ -241,7 +241,7 @@ const Hypothesis *Manager::GetBestHypothesis() const
}
/**
* Logging of hypotheses stack sizes
* Logging of hypothesis stack sizes
*/
void Manager::OutputHypoStackSize()
{
@ -255,7 +255,7 @@ void Manager::OutputHypoStackSize()
}
/**
* Logging of hypotheses stack contents
* Logging of hypothesis stack contents
* \param stack number of stack to be reported, report all stacks if 0
*/
void Manager::OutputHypoStack(int stack)
@ -321,3 +321,8 @@ void Manager::CalcNBest(size_t count, LatticePathList &ret) const
}
}
}
void Manager::CalcDecoderStatistics(const StaticData& staticData) const
{
staticData.GetSentenceStats().CalcFinalStats(*GetBestHypothesis());
}

View File

@ -23,7 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <vector>
#include <list>
#include "Input.h"
#include "InputType.h"
#include "Hypothesis.h"
#include "StaticData.h"
#include "TranslationOption.h"
@ -76,7 +76,7 @@ protected:
InputType const& m_source; /**< source sentence to be translated */
std::vector < HypothesisCollection > m_hypoStack; /**< stacks to store hypothesis (partial translations) */
// no of elements = no of words in source + 1
// no of elements = no of words in source + 1
StaticData &m_staticData; /**< holds various kinds of constants, counters, and global data structures */
TranslationOptionCollection &m_possibleTranslations; /**< pre-computed list of translation options for the phrases in this sentence */
TargetPhrase m_initialTargetPhrase; /**< used to seed 1st hypo */
@ -96,5 +96,9 @@ public:
void ProcessSentence();
const Hypothesis *GetBestHypothesis() const;
void CalcNBest(size_t count, LatticePathList &ret) const;
/***
* to be called after processing a sentence (which may consist of more than just calling ProcessSentence() )
*/
void CalcDecoderStatistics(const StaticData& staticData) const;
};

View File

@ -11,6 +11,7 @@
#include <string>
#include <iostream>
#include <iterator>
#include "Util.h" //malloc() replacement
// template class for pool of objects
// - usefull if many small objects are frequently created and destroyed

View File

@ -32,7 +32,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
/** defines allowed parameters */
/** define allowed parameters */
Parameter::Parameter()
{
AddParam("config", "f", "location of the configuration file");
@ -78,7 +78,7 @@ PARAM_VEC &Parameter::AddParam(const string &paramName, const string &descriptio
return m_setting[paramName];
}
/** initializes a parameter (including abbreviation), sub of constructor */
/** initialize a parameter (including abbreviation), sub of constructor */
PARAM_VEC &Parameter::AddParam(const string &paramName, const string &abbrevName, const string &description)
{
m_valid[paramName] = true;
@ -88,7 +88,7 @@ PARAM_VEC &Parameter::AddParam(const string &paramName, const string &abbrevName
return m_setting[paramName];
}
/** prints descriptions of all parameters */
/** print descriptions of all parameters */
void Parameter::Explain() {
cerr << "Usage:" << endl;
for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++)
@ -103,7 +103,7 @@ void Parameter::Explain() {
}
}
/** checks if an item on the command line is a switch or a value
/** check whether an item on the command line is a switch or a value
* \param token token on the command line to checked **/
bool Parameter::isOption(const char* token) {
@ -185,7 +185,7 @@ bool Parameter::LoadParam(int argc, char* argv[])
return Validate() && noErrorFlag;
}
/** check if parameter settings make sense */
/** check that parameter settings make sense */
bool Parameter::Validate()
{
bool noErrorFlag = true;
@ -240,7 +240,7 @@ bool Parameter::Validate()
return noErrorFlag;
}
/** checks if a file exist */
/** check whether a file exists */
bool Parameter::FilesExist(const string &paramName, size_t tokenizeIndex,std::vector<std::string> const& extensions)
{
using namespace boost::filesystem;
@ -280,13 +280,12 @@ bool Parameter::FilesExist(const string &paramName, size_t tokenizeIndex,std::ve
errorMsg << "File " << pathStr << " does not exist";
UserMessage::Add(errorMsg.str());
return false;
}
}
}
return true;
}
/** looks for a switch in arg, updates parameter */
/** look for a switch in arg, update parameter */
// TODO arg parsing like this does not belong in the library, it belongs
// in moses-cmd
string Parameter::FindParam(const string &paramSwitch, int argc, char* argv[])

View File

@ -26,7 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "memory.h"
#include "FactorCollection.h"
#include "Phrase.h"
#include "Util.h"
#include "Util.h" //malloc() replacement
using namespace std;
@ -39,7 +39,7 @@ Phrase::Phrase(const Phrase &copy)
,m_memPoolIndex(copy.m_memPoolIndex)
{
assert(m_memPoolIndex<s_memPool.size() && s_memPool[m_memPoolIndex]);
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->alloc();
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
memcpy(m_factorArray, copy.m_factorArray, m_phraseSize * sizeof(FactorArray));
}
@ -59,7 +59,7 @@ Phrase& Phrase::operator=(const Phrase& x)
m_arraySize=x.m_arraySize;
m_memPoolIndex=x.m_memPoolIndex;
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->alloc();
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
memcpy(m_factorArray, x.m_factorArray, m_phraseSize * sizeof(FactorArray));
}
return *this;
@ -73,7 +73,7 @@ Phrase::Phrase(FactorDirection direction)
, m_memPoolIndex(0)
{
assert(m_memPoolIndex<s_memPool.size());
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->alloc();
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
}
Phrase::Phrase(FactorDirection direction, const vector< const Word* > &mergeWords)
@ -82,7 +82,7 @@ Phrase::Phrase(FactorDirection direction, const vector< const Word* > &mergeWord
{
m_memPoolIndex = (m_phraseSize + ARRAY_SIZE_INCR - 1) / ARRAY_SIZE_INCR - 1;
m_arraySize = (m_memPoolIndex + 1) * ARRAY_SIZE_INCR;
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->alloc();
m_factorArray = (FactorArray*) s_memPool[m_memPoolIndex]->allocate();
for (size_t currPos = 0 ; currPos < m_phraseSize ; currPos++)
{
@ -101,7 +101,7 @@ Phrase::~Phrase()
{
// RZ:
// will segFault if Phrase was default constructed and AddWord was never called
// not sure if this is really the intended behaviour
// TODO not sure if this is really the intended behaviour
// assertion failure is better than segFault, but if(m_factorArray) might be more appropriate
//assert(m_factorArray);
if(m_factorArray)
@ -176,7 +176,7 @@ FactorArray &Phrase::AddWord()
{
if ((m_phraseSize+1) % ARRAY_SIZE_INCR == 0)
{ // need to expand array
FactorArray *newArray = (FactorArray*) s_memPool[m_memPoolIndex+1]->alloc();
FactorArray *newArray = (FactorArray*) s_memPool[m_memPoolIndex+1]->allocate();
memcpy(newArray, m_factorArray, m_phraseSize * sizeof(FactorArray));
s_memPool[m_memPoolIndex]->free((char*)m_factorArray);

View File

@ -40,7 +40,7 @@ class Phrase
FactorDirection m_direction;
size_t m_phraseSize, //number of words
m_arraySize,
m_memPoolIndex;
m_memPoolIndex; //TODO is this supposed to be the number of mempools allocated?
FactorArray *m_factorArray;
public:
@ -109,7 +109,7 @@ public:
Phrase GetSubString(const WordsRange &wordsRange) const;
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const;
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const;
void push_back(Word const& w) {Word::Copy(AddWord(),w.GetFactorArray());}

View File

@ -1,6 +1,6 @@
#include "PhraseDictionaryBase.h"
#include "StaticData.h"
#include "Input.h"
//#include "InputType.h"
PhraseDictionaryBase::PhraseDictionaryBase(size_t noScoreComponent)
: Dictionary(noScoreComponent),m_maxTargetPhrase(0)
@ -25,4 +25,3 @@ unsigned int PhraseDictionaryBase::GetNumScoreComponents() const
{
return this->GetNoScoreComponents();
}

View File

@ -7,7 +7,7 @@
#include "Phrase.h"
#include "FactorCollection.h"
#include "InputFileStream.h"
#include "Input.h"
#include "InputType.h"
#include "ConfusionNet.h"
#include "Sentence.h"
#include "StaticData.h"

View File

@ -1,5 +1,3 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
@ -19,4 +17,10 @@ License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "PhraseReference.h"
using std::ostream;
ostream& operator << (ostream& out, const PhraseReference& phrase)
{
return out << phrase.GetSubphrase();
}

View File

@ -0,0 +1,48 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#ifndef MOSES_PHRASE_REFERENCE_H
#define MOSES_PHRASE_REFERENCE_H
#include <iostream>
#include "InputType.h"
#include "WordsRange.h"
/***
* hold a reference to a subphrase, the parent Phrase of which may be separately memory-managed
*/
class PhraseReference
{
public:
PhraseReference() : fullPhrase(NULL), range(0, 0) {}
PhraseReference(const InputType& phrase, const WordsRange& r) : fullPhrase(&phrase), range(r) {}
const InputType& GetFullPhrase() const {return *fullPhrase;}
Phrase GetSubphrase() const {return fullPhrase->GetSubString(range);}
protected:
const InputType* fullPhrase;
WordsRange range;
};
std::ostream& operator << (std::ostream& out, const PhraseReference& phrase);
#endif //MOSES_PHRASE_REFERENCE_H

View File

@ -22,7 +22,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#pragma once
#include <numeric>
#include <assert.h>
#include <cassert>
#include "ScoreProducer.h"
#include "ScoreIndexManager.h"

View File

@ -25,7 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <string>
#include "Word.h"
#include "Phrase.h"
#include "Input.h"
#include "InputType.h"
class WordsRangs;
class PhraseDictionaryBase;

View File

@ -0,0 +1,43 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <iostream>
using std::cout;
using std::endl;
#include "SentenceStats.h"
/***
* to be called after decoding a sentence
*/
void SentenceStats::CalcFinalStats(const Hypothesis& bestHypo)
{
//deleted words
AddDeletedWords(bestHypo);
//inserted words--not implemented yet 8/1 TODO
}
void SentenceStats::AddDeletedWords(const Hypothesis& hypo)
{
//don't check either a null pointer or the empty initial hypothesis (if we were given the empty hypo, the null check will save us)
if(hypo.GetPrevHypo() != NULL && hypo.GetPrevHypo()->GetCurrSourceWordsRange().GetWordsCount() > 0) AddDeletedWords(*hypo.GetPrevHypo());
if(hypo.GetCurrTargetWordsRange().GetWordsCount() == 0)
{
m_deletedWords.push_back(PhraseReference(hypo.GetSourcePhrase(), hypo.GetCurrSourceWordsRange()));
}
}

View File

@ -1,21 +1,107 @@
#ifndef _SENTENCE_STATS_H_
#define _SENTENCE_STATS_H_
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <iostream>
#include <string>
#include <vector>
#include "Phrase.h"
#include "Hypothesis.h"
#include "TypeDef.h" //FactorArray
#include "InputType.h"
#include "Util.h" //Join()
#include "PhraseReference.h"
struct SentenceStats
struct recombinationInfo
{
SentenceStats() : numRecombinations(0), numPruned(0) {};
unsigned int numRecombinations;
unsigned int numPruned;
recombinationInfo() {} //for std::vector
recombinationInfo(unsigned int srcWords, float gProb, float bProb) : numSourceWords(srcWords), betterProb(gProb), worseProb(bProb) {}
unsigned int numSourceWords;
float betterProb, worseProb;
};
void ZeroAll() { numRecombinations = 0; numPruned = 0; }
/***
* stats relating to decoder operation on a given sentence
*/
class SentenceStats
{
public:
/***
* to be called before decoding a sentence
*/
SentenceStats(const InputType& source) {Initialize(source);}
void Initialize(const InputType& source)
{
m_numHyposPruned = 0;
m_totalSourceWords = source.GetSize();
m_recombinationInfos.clear();
m_deletedWords.clear();
m_insertedWords.clear();
}
/***
* to be called after decoding a sentence
*/
void CalcFinalStats(const Hypothesis& bestHypo);
unsigned int GetTotalHypos() const {return Hypothesis::s_HypothesesCreated;}
unsigned int GetNumHyposRecombined() const {return m_recombinationInfos.size();}
unsigned int GetNumHyposPruned() const {return m_numHyposPruned;}
unsigned int GetTotalSourceWords() const {return m_totalSourceWords;}
unsigned int GetNumWordsDeleted() const {return m_deletedWords.size();}
unsigned int GetNumWordsInserted() const {return m_insertedWords.size();}
const std::vector<PhraseReference>& GetDeletedWords() const {return m_deletedWords;}
const std::vector<std::string>& GetInsertedWords() const {return m_insertedWords;}
void AddRecombination(const Hypothesis& worseHypo, const Hypothesis& betterHypo)
{
m_recombinationInfos.push_back(recombinationInfo(worseHypo.GetWordsBitmap().GetNumWordsCovered(),
betterHypo.GetTotalScore(), worseHypo.GetTotalScore()));
}
void AddPruning() {m_numHyposPruned++;}
protected:
/***
* auxiliary to CalcFinalStats()
*/
void SentenceStats::AddDeletedWords(const Hypothesis& hypo);
//hypotheses
std::vector<recombinationInfo> m_recombinationInfos;
unsigned int m_numHyposPruned;
//words
unsigned int m_totalSourceWords;
std::vector<PhraseReference> m_deletedWords; //count deleted words/phrases in the final hypothesis
std::vector<std::string> m_insertedWords; //count inserted words in the final hypothesis
};
inline std::ostream& operator<<(std::ostream& os, const SentenceStats& ss)
{
return os << "number of hypotheses recombined=" << ss.numRecombinations << std::endl
<< " \" \" pruned=" << ss.numPruned << std::endl;
return os << "total hypotheses generated = " << ss.GetTotalHypos() << std::endl
<< " number recombined = " << ss.GetNumHyposRecombined() << std::endl
<< " number pruned = " << ss.GetNumHyposPruned() << std::endl
<< " total source words = " << ss.GetTotalSourceWords() << std::endl
<< " words deleted = " << ss.GetNumWordsDeleted() << " (" << Join(" ", ss.GetDeletedWords()) << ")" << std::endl
<< " words inserted = " << ss.GetNumWordsInserted() << " (" << Join(" ", ss.GetInsertedWords()) << ")" << std::endl;
}
#endif

View File

@ -23,7 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <iostream>
#include "TypeDef.h"
#include "Util.h"
#include "Util.h" //malloc() replacement
class SquareMatrix
{

View File

@ -58,7 +58,7 @@ StaticData::StaticData()
{
s_instance = this;
// mempory pools
// memory pools
Phrase::InitializeMemPool();
}
@ -70,7 +70,7 @@ bool StaticData::LoadParameters(int argc, char* argv[])
return false;
}
// input type has to specified BEFORE loading the phrase tables!
// input type has to be specified BEFORE loading the phrase tables!
if(m_parameter.GetParam("inputtype").size())
m_inputType=Scan<int>(m_parameter.GetParam("inputtype")[0]);
TRACE_ERR("input type is: "<<m_inputType<<" (0==default: text input, else confusion net format)\n");
@ -105,14 +105,12 @@ bool StaticData::LoadParameters(int argc, char* argv[])
m_verboseLevel = 0;
}
// printing source phrase spans
if (m_parameter.GetParam("report-source-span").size() > 0)
m_reportSourceSpan = Scan<bool>(m_parameter.GetParam("report-source-span")[0]);
else
m_reportSourceSpan = false;
// print all factors of output translations
if (m_parameter.GetParam("report-all-factors").size() > 0)
m_reportAllFactors = Scan<bool>(m_parameter.GetParam("report-all-factors")[0]);
@ -123,9 +121,7 @@ bool StaticData::LoadParameters(int argc, char* argv[])
//TODO: CHANGE
std::vector<float> distortionWeights = Scan<float>(m_parameter.GetParam("weight-d"));
//input-factors
//input factors
const vector<string> &inputFactorVector = m_parameter.GetParam("input-factors");
for(size_t i=0; i<inputFactorVector.size(); i++)
{
@ -138,7 +134,7 @@ bool StaticData::LoadParameters(int argc, char* argv[])
abort();
}
//output-factors
//output factors
const vector<string> &outputFactorVector = m_parameter.GetParam("output-factors");
for(size_t i=0; i<outputFactorVector.size(); i++)
{
@ -170,7 +166,7 @@ bool StaticData::LoadParameters(int argc, char* argv[])
if (lrFileVector.size() > 0)
{
//TODO: starting to be set up for more than one distortion model; not quite
for(int i=0; i< lrFileVector.size(); i++ )
for(unsigned int i=0; i< lrFileVector.size(); i++ )
{
vector<string> token = Tokenize(lrFileVector[i]);
//characteristics of the phrase table
@ -179,7 +175,7 @@ bool StaticData::LoadParameters(int argc, char* argv[])
std::string filePath= token[2];
//get the weights for the lex reorderer
TRACE_ERR("weights-lex")
//TODO: THIS WEIGHT GETTING IS WHAT STILL NEEDS TO CHANGE TO SUPPORT MULTIPLE LEXICAL REORDERERS
for(size_t i=1; i<distortionWeights.size(); i++)
@ -331,7 +327,7 @@ bool StaticData::LoadParameters(int argc, char* argv[])
if (oldFormat) {
std::cerr << "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
" [WARNING] config file contains old style generation config format.\n"
" Only the first feature value will be ready. Please use the 4-format\n"
" Only the first feature value will be read. Please use the 4-format\n"
" form (similar to the phrase table spec) to specify the # of features.\n"
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n";
}
@ -620,8 +616,6 @@ void StaticData::CleanUpAfterSentenceProcessing()
LanguageModel &languageModel = **iterLM;
languageModel.CleanUpAfterSentenceProcessing();
}
}
void StaticData::InitializeBeforeSentenceProcessing(InputType const& in)

View File

@ -54,7 +54,6 @@ protected:
std::list < DecodeStep* > m_decodeStepList;
Parameter m_parameter;
std::vector<FactorType> m_inputFactorOrder, m_outputFactorOrder;
// boost::shared_ptr<UnknownWordHandler> m_unknownWordHandler; //defaults to NULL; pointer allows polymorphism
LMList m_languageModel;
std::vector<float> m_lexWeights;
ScoreIndexManager m_scoreIndexManager;
@ -98,13 +97,10 @@ protected:
WordPenaltyProducer *m_wpProducer;
bool m_reportSourceSpan;
bool m_reportAllFactors;
mutable SentenceStats m_sentenceStats;
bool m_useDistortionFutureCosts;
bool m_isDetailedTranslationReportingEnabled;
mutable boost::shared_ptr<SentenceStats> m_sentenceStats;
public:
StaticData();
@ -127,11 +123,6 @@ public:
// what the hell?
void LoadPhraseTables();
void LoadMapping();
/* void SetUnknownWordHandler(boost::shared_ptr<UnknownWordHandler> unknownWordHandler)
{
m_unknownWordHandler = unknownWordHandler;
}
*/
const PARAM_VEC &GetParam(const std::string &paramName)
{
return m_parameter.GetParam(paramName);
@ -259,6 +250,10 @@ public:
{
return m_isDetailedTranslationReportingEnabled;
}
void ResetSentenceStats(const InputType& source) const
{
m_sentenceStats = boost::shared_ptr<SentenceStats>(new SentenceStats(source));
}
// for mert
size_t GetNBestSize() const
@ -281,7 +276,7 @@ public:
void CleanUpAfterSentenceProcessing();
SentenceStats& GetSentenceStats() const
{
return m_sentenceStats;
return *m_sentenceStats;
}
const std::vector<float>& GetAllWeights() const
{
@ -292,4 +287,3 @@ public:
bool UseDistortionFutureCosts() const {return m_useDistortionFutureCosts;}
};

View File

@ -14,11 +14,14 @@ class Timer
bool running;
time_t start_time;
//TODO in seconds?
double elapsed_time();
public:
// 'running' is initially false. A timer needs to be explicitly started
// using 'start' or 'restart'
/***
* 'running' is initially false. A timer needs to be explicitly started
* using 'start' or 'restart'
*/
Timer() : running(false), start_time(0) { }
void start(const char* msg = 0);
@ -26,25 +29,25 @@ class Timer
// void stop(const char* msg = 0);
void check(const char* msg = 0);
}; // class timer
//===========================================================================
// Return the total time that the timer has been in the "running"
// state since it was first "started" or last "restarted". For
// "short" time periods (less than an hour), the actual cpu time
// used is reported instead of the elapsed time.
};
/***
* Return the total time that the timer has been in the "running"
* state since it was first "started" or last "restarted". For
* "short" time periods (less than an hour), the actual cpu time
* used is reported instead of the elapsed time.
*/
inline double Timer::elapsed_time()
{
time_t now;
time(&now);
return difftime(now, start_time);
} // timer::elapsed_time
//===========================================================================
// Start a timer. If it is already running, let it continue running.
// Print an optional message.
}
/***
* Start a timer. If it is already running, let it continue running.
* Print an optional message.
*/
inline void Timer::start(const char* msg)
{
// Print an optional message, something like "Starting timer t";
@ -58,11 +61,11 @@ inline void Timer::start(const char* msg)
// Set the start time;
time(&start_time);
}
} // timer::start
//===========================================================================
// Turn the timer off and start it again from 0. Print an optional message.
/***
* Turn the timer off and start it again from 0. Print an optional message.
*/
/*
inline void Timer::restart(const char* msg)
{
@ -76,12 +79,12 @@ inline void Timer::restart(const char* msg)
acc_time = 0;
start_clock = clock();
start_time = time(0);
} // timer::restart
}
*/
//===========================================================================
// Stop the timer and print an optional message.
/***
* Stop the timer and print an optional message.
*/
/*
inline void Timer::stop(const char* msg)
{
@ -92,37 +95,30 @@ inline void Timer::stop(const char* msg)
if (running) acc_time += elapsed_time();
running = false;
} // timer::stop
}
*/
//===========================================================================
// Print out an optional message followed by the current timer timing.
/***
* Print out an optional message followed by the current timer timing.
*/
inline void Timer::check(const char* msg)
{
// Print an optional message, something like "Checking timer t";
if (msg) TRACE_ERR(msg << " : ");
TRACE_ERR("[" << std::setiosflags(std::ios::fixed) << std::setprecision(2)
<< (running ? elapsed_time() : 0) << "] seconds\n");
} // timer::check
//===========================================================================
// Allow timers to be printed to ostreams using the syntax 'os << t'
// for an ostream 'os' and a timer 't'. For example, "cout << t" will
// print out the total amount of time 't' has been "running".
TRACE_ERR("[" << std::setiosflags(std::ios::fixed) << std::setprecision(2) << (running ? elapsed_time() : 0) << "] seconds\n");
}
/***
* Allow timers to be printed to ostreams using the syntax 'os << t'
* for an ostream 'os' and a timer 't'. For example, "cout << t" will
* print out the total amount of time 't' has been "running".
*/
inline std::ostream& operator<<(std::ostream& os, Timer& t)
{
#ifdef TRACE_ENABLE
os << std::setprecision(2) << std::setiosflags(std::ios::fixed)
<< (t.running ? t.elapsed_time() : 0);
#ifdef TRACE_ENABLE
os << std::setprecision(2) << std::setiosflags(std::ios::fixed) << (t.running ? t.elapsed_time() : 0);
#endif
return os;
}
//===========================================================================
#endif // TIMER_H

View File

@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
//TODO this should be a factory function!
TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase)
: m_targetPhrase(targetPhrase),m_sourcePhrase(targetPhrase.GetSourcePhrase())
,m_sourceWordsRange (wordsRange)
@ -38,11 +38,13 @@ TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetP
m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown());
}
// used to create trans opt from unknown word
//TODO this should be a factory function!
TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase, int /*whatever*/)
: m_targetPhrase(targetPhrase)
,m_sourceWordsRange (wordsRange)
,m_futureScore(0)
{ // used to create trans opt from unknown word
{
}
void TranslationOption::MergeNewFeatures(const Phrase& phrase, const ScoreComponentCollection2& score, const std::vector<FactorType>& featuresToAdd)
@ -69,8 +71,6 @@ bool TranslationOption::IsCompatible(const Phrase& phrase, const std::vector<Fac
}
}
bool TranslationOption::Overlap(const Hypothesis &hypothesis) const
{
const WordsBitmap &bitmap = hypothesis.GetWordsBitmap();
@ -104,4 +104,3 @@ ostream& operator<<(ostream& out, const TranslationOption& possibleTranslation)
<< possibleTranslation.GetScoreBreakdown();
return out;
}

View File

@ -25,9 +25,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "LanguageModel.h"
#include "PhraseDictionary.h"
#include "FactorCollection.h"
#include "Input.h"
#include "InputType.h"
#include "Util.h"
#include "StaticData.h"
using namespace std;
@ -307,7 +306,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const FactorArray &sourc
size_t isDigit = 0;
if (StaticData::Instance()->GetDropUnknown())
{
const Factor *f = sourceWord[0]; // ??? hack. shouldn't know which factor is surface
const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
std::string s = f->ToString();
isDigit = s.find_first_of("0123456789");
if (isDigit == string::npos)

View File

@ -82,8 +82,8 @@ const size_t NUM_FACTORS = MAX_NUM_FACTORS;
enum FactorDirection
{
Input = 0
,Output = 1
Input,
Output
};
enum DecodeType
@ -95,7 +95,7 @@ enum DecodeType
namespace LexReorderType
{
enum LexReorderType
enum LexReorderType //TODO explain values
{
Backward
,Forward
@ -109,14 +109,12 @@ namespace DistortionOrientationType
{
enum DistortionOrientationOptions
{
Monotone
,Msd
Monotone, //distinguish only between monotone and non-monotone as possible orientations
Msd //further separate non-monotone into swapped and discontinuous
};
// Possible values for orientation.
enum ORIENTATIONS { MONO, NON_MONO, SWAP, DISC };
};
enum ORIENTATIONS { MONO, NON_MONO, SWAP, DISC }; //TODO explain values
}
enum IOMethod
{

View File

@ -101,4 +101,19 @@ bool Scan<bool>(const std::string &input)
return (lc == "yes" || lc == "y" || lc == "true" || lc == "1");
}
#undef malloc
#undef realloc
void* xmalloc(unsigned int numBytes)
{
char* ptr = (char*)malloc(numBytes);
if(ptr == NULL) std::cout << "[FYI] xmalloc(): malloc returns null on request for " << numBytes << " bytes" << endl;
return ptr;
}
void* xrealloc(void* ptr, unsigned int numBytes)
{
char* rptr = (char*)realloc(ptr, numBytes);
if(rptr == NULL) std::cout << "[FYI] xrealloc(): realloc returns null on request for " << numBytes << " bytes" << endl;
return rptr;
}

View File

@ -27,7 +27,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <string>
#include <vector>
#include <cmath>
#include <assert.h>
#ifdef TRACE_ENABLE
#define TRACE_ERR(str) { std::cerr << str; }
@ -134,7 +133,19 @@ inline std::vector<std::string> TokenizeMultiCharSeparator(
tokens.push_back(str.substr(pos, nextPos - pos));
return tokens;
}
/***
* pre: T can be inserted into an ostream
*/
template <typename T>
std::string Join(const std::string& delimiter, const std::vector<T>& items)
{
std::ostringstream outstr;
if(items.size() == 0) return "";
outstr << items[0];
for(unsigned int i = 1; i < items.size(); i++) outstr << " " << items[i];
return outstr.str();
}
// transform prob to natural log score
@ -180,7 +191,7 @@ inline float UntransformSRIScore(float logNScore)
inline float FloorSRIScore(float sriScore)
{
return (std::max)(sriScore , LOWEST_SCORE);
return std::max(sriScore, LOWEST_SCORE);
}
inline float CalcTranslationScore(const std::vector<float> &scoreVector,
@ -223,5 +234,10 @@ std::string GetMD5Hash(const std::string &filePath);
template<typename T> inline void ShrinkToFit(T& v) {
if(v.capacity()>v.size()) T(v).swap(v);assert(v.capacity()==v.size());}
/***
* include checks for null return value, and helpful print statements
*/
void* xmalloc(unsigned int numBytes);
void* xrealloc(void* ptr, unsigned int numBytes);
#define malloc(x) xmalloc(x)
#define realloc(x, n) xrealloc(x, n)

View File

@ -29,9 +29,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Util.h"
#include "FactorArrayWrapper.h"
class Phrase;
/***
* hold a set of factors for a single word
*
* TODO either replace all uses of FactorArray with Word or vice versa; don't only use the wrapper in half of cases!
*/
class Word : public FactorArrayWrapper
{
friend std::ostream& operator<<(std::ostream&, const Word&);
@ -40,7 +44,9 @@ protected:
FactorArray m_factorArray;
public:
// deep copy
/**
* deep copy
*/
Word(const Word &copy);
Word(const FactorArray &factorArray);
Word();
@ -63,7 +69,8 @@ public:
TO_STRING;
// static functions
/* static functions */
// FactorArray
static void Copy(FactorArray &target, const FactorArray &source);
static void Initialize(FactorArray &factorArray);

View File

@ -28,12 +28,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <cmath>
#include "TypeDef.h"
#include "WordsRange.h"
#include "Util.h" //malloc() replacement
class WordsBitmap
{
friend std::ostream& operator<<(std::ostream& out, const WordsBitmap& wordsBitmap);
protected:
const size_t m_size;
const size_t m_size; //number of words in sentence
bool *m_bitmap;
// ticks of words that have been done;
@ -69,7 +70,7 @@ public:
free(m_bitmap);
}
size_t GetWordsCount() const
size_t GetNumWordsCovered() const
{
size_t count = 0;
for (size_t pos = 0 ; pos < m_size ; pos++)
@ -121,7 +122,7 @@ public:
}
bool IsComplete() const
{
return GetSize() == GetWordsCount();
return GetSize() == GetNumWordsCovered();
}
bool Overlap(const WordsRange &compare) const
@ -158,13 +159,11 @@ public:
return std::memcmp(m_bitmap, compare.m_bitmap, thisSize);
}
int GetFutureCosts(int lastPos) const ;
TO_STRING;
};
// friend
inline std::ostream& operator<<(std::ostream& out, const WordsBitmap& wordsBitmap)
{
@ -174,4 +173,3 @@ inline std::ostream& operator<<(std::ostream& out, const WordsBitmap& wordsBitma
}
return out;
}

View File

@ -23,7 +23,7 @@
// Copyright Marcello Federico, ITC-irst, 1998
#include <iostream>
#include <assert.h>
#include <cassert>
#include "mempool.h"
#include "TypeDef.h"
#include "Util.h"
@ -75,7 +75,7 @@ mempool::mempool(int is, int bs){
}
char * mempool::alloc(){
char * mempool::allocate(){
char *ptr;
@ -392,14 +392,13 @@ storage::~storage(){
delete [] poolset;
}
char *storage::alloc(int size){
char *storage::allocate(int size){
if (size<=setsize){
if (!poolset[size]){
poolset[size]=new mempool(size,poolsize/size);
}
return poolset[size]->alloc();
return poolset[size]->allocate();
}
else{
@ -414,9 +413,7 @@ char *storage::alloc(int size){
}
}
char *storage::realloc(char *oldptr,int oldsize,int newsize){
char *storage::reallocate(char *oldptr,int oldsize,int newsize){
char *newptr;
@ -426,7 +423,7 @@ char *storage::realloc(char *oldptr,int oldsize,int newsize){
if (newsize<=setsize){
if (!poolset[newsize])
poolset[newsize]=new mempool(newsize,poolsize/newsize);
newptr=poolset[newsize]->alloc();
newptr=poolset[newsize]->allocate();
memset((char*)newptr,0,newsize);
}
else
@ -438,7 +435,7 @@ char *storage::realloc(char *oldptr,int oldsize,int newsize){
}
}
else{
newptr=(char *)std::realloc(oldptr,newsize);
newptr=(char *)realloc(oldptr,newsize);
if (newptr==oldptr)
cerr << "r\b";
else
@ -450,10 +447,8 @@ char *storage::realloc(char *oldptr,int oldsize,int newsize){
}
return newptr;
}
int storage::free(char *addr,int size){
/*

View File

@ -27,7 +27,6 @@
#ifndef NULL
const int NULL=0;
//#define NULL=0;
#endif
#include <iostream> // std::ostream
@ -49,7 +48,7 @@ class memnode{
//! Memory pool
/*! A memory pool is composed of:
- a linked list of block_num memory blocks
- a linked list of blocknum memory blocks
- each block might contain up to block_size items
- each item is made of exactly item_size bytes
*/
@ -74,7 +73,7 @@ class mempool{
void map(std::ostream& co);
//! Allocates a single memory entry
char *alloc();
char *allocate();
//! Frees a single memory entry
int free(char* addr);
@ -123,14 +122,12 @@ class strstack{
void stat();
int used(){return memory;};
int used(){return memory;}
int wasted(){return waste;};
int wasted(){return waste;}
};
//! Manages multiple memory pools
//! Manage multiple memory pools
/*!
This class permits to manage memory pools
@ -139,7 +136,6 @@ class strstack{
- items larger than the limit are allocated with new
*/
class storage{
mempool **poolset; //!< array of memory pools
int setsize; //!< number of memory pools/maximum elem size
@ -155,10 +151,10 @@ class storage{
~storage();
//! Allocates memory
char *alloc(int size);
char *allocate(int size);
//! Realloc memory
char *realloc(char *oldptr,int oldsize,int newsize);
char *reallocate(char *oldptr,int oldsize,int newsize);
//! Frees memory of an entry
int free(char *addr,int size=0);
@ -167,15 +163,4 @@ class storage{
void stat();
};
#endif