From 45588167bdf3259b0f8ad34827f1a7837f4f9fac Mon Sep 17 00:00:00 2001 From: zens Date: Fri, 14 Jul 2006 02:06:09 +0000 Subject: [PATCH] almost no modification to existing system, only additions: - prefix tree structured phrase table - binary phrase table format with on-demand loading git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@101 1f5c12ca-751b-0410-a591-d2e778427230 --- moses/src/CountedPointer.h | 68 ++++++ moses/src/FactorCollection.cpp | 29 +-- moses/src/File.h | 68 ++++++ moses/src/FilePtr.h | 56 +++++ moses/src/PhraseDictionaryTree.cpp | 347 +++++++++++++++++++++++++++++ moses/src/PhraseDictionaryTree.h | 47 ++++ moses/src/PrefixTree.h | 281 +++++++++++++++++++++++ moses/src/StaticData.cpp | 6 +- moses/src/TypeDef.h | 7 +- moses/src/Util.h | 6 + 10 files changed, 884 insertions(+), 31 deletions(-) create mode 100644 moses/src/CountedPointer.h create mode 100644 moses/src/File.h create mode 100644 moses/src/FilePtr.h create mode 100644 moses/src/PhraseDictionaryTree.cpp create mode 100644 moses/src/PhraseDictionaryTree.h create mode 100644 moses/src/PrefixTree.h diff --git a/moses/src/CountedPointer.h b/moses/src/CountedPointer.h new file mode 100644 index 000000000..9273d5a1a --- /dev/null +++ b/moses/src/CountedPointer.h @@ -0,0 +1,68 @@ +// $Id$ + +#ifndef COUNTEDPOINTER_H_ +#define COUNTEDPOINTER_H_ + +// see http://ootips.org/yonat/4dev/counted_ptr.h +template class CountedPointer +{ +public: + explicit CountedPointer(T *p = 0) : pointerInfo_(0) + { + if (p) + pointerInfo_ = new PointerAndCounter(p); + } + CountedPointer(const CountedPointer &p) { acquire(p.pointerInfo_); }; + ~CountedPointer() { release(); }; + CountedPointer &operator=(const CountedPointer &p) + { + if (this != &p) { + release(); + acquire(p.pointerInfo_); + } + return *this; + } + CountedPointer &operator=(T *p) + { + release(); + pointerInfo_ = new PointerAndCounter(p); + return *this; + } + + operator bool() const { return pointerInfo_; }; + bool hasP() const { return pointerInfo_->pointer != 0; }; + + T& operator*() const { return *pointerInfo_->pointer; }; + T* operator->() const { return pointerInfo_->pointer; }; + bool unique() const { return (!pointerInfo_ || pointerInfo_->counter == 1); }; + void destroy() { release(); }; + + //void operator delete(void *p) { release(); }; + +private: + struct PointerAndCounter + { + T *pointer; + unsigned counter; + PointerAndCounter(T* p = 0, unsigned c = 1) : pointer(p), counter(c) {}; + } *pointerInfo_; + + void acquire(PointerAndCounter *c) + { + pointerInfo_ = c; + if (pointerInfo_) + (pointerInfo_->counter)++; + } + + void release() + { + if (pointerInfo_) { + if (--(pointerInfo_->counter) == 0) { + delete pointerInfo_->pointer; + delete pointerInfo_; + } + pointerInfo_ = 0; + } + } +}; +#endif diff --git a/moses/src/FactorCollection.cpp b/moses/src/FactorCollection.cpp index 151a39550..18e386ccc 100755 --- a/moses/src/FactorCollection.cpp +++ b/moses/src/FactorCollection.cpp @@ -53,32 +53,9 @@ const Factor *FactorCollection::AddFactor(FactorDirection direction , LmId lmId) { // find string id - const string *ptr; - StringSet::const_iterator iterString = m_factorStringCollection.find(factorString); - if (iterString == m_factorStringCollection.end()) - { - const pair< StringSet::iterator, bool > &pairRet = m_factorStringCollection.insert(factorString); - const string &str = *pairRet.first; - ptr = &str; - } - else - { - const string &str = *iterString; - ptr = &str; - } - - Factor findFactor(direction, factorType, ptr, lmId); - FactorSet::const_iterator iter = m_collection.find(findFactor); - - if (iter == m_collection.end()) - { // new factor - pair< FactorSet::iterator, bool > pairRet = m_collection.insert(findFactor); - return &(*pairRet.first); - } - else - { - return &(*iter); - } + const string *ptr=&(*m_factorStringCollection.insert(factorString).first); +// Factor findFactor(direction, factorType, ptr, lmId); + return &(*m_collection.insert(Factor(direction, factorType, ptr, lmId)).first); } const Factor *FactorCollection::AddFactor(FactorDirection direction diff --git a/moses/src/File.h b/moses/src/File.h new file mode 100644 index 000000000..fac915f55 --- /dev/null +++ b/moses/src/File.h @@ -0,0 +1,68 @@ +/* ---------------------------------------------------------------- */ +/* Copyright 2005 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */ +/* Richard Zens */ +/* ---------------------------------------------------------------- */ +#ifndef FILE_H_ +#define FILE_H_ +#include +#include + +static const off_t InvalidOffT=-1; + +// WARNING: +// these functions work only for bitwise read/write-able types + +template inline size_t fWrite(FILE* f,const T& t) { + if(fwrite(&t,sizeof(t),1,f)!=1) { + std::cerr<<"ERROR:: fwrite!\n";abort();} + return sizeof(t); +} + +template inline void fRead(FILE* f,T& t) { + if(fread(&t,sizeof(t),1,f)!=1) {std::cerr<<"ERROR: fread!\n";abort();} +} + +template inline size_t fWrite(FILE* f,const T* b,const T* e) { + unsigned s=e-b;size_t rv=fWrite(f,s); + if(fwrite(b,sizeof(T),s,f)!=s) {std::cerr<<"ERROR: fwrite!\n";abort();} + return rv+sizeof(T)*s; +} + +template inline size_t fWrite(FILE* f,const T b,const T e) { + unsigned s=std::distance(b,e);size_t rv=fWrite(f,s); + if(fwrite(&(*b),sizeof(T),s,f)!=s) {std::cerr<<"ERROR: fwrite!\n";abort();} + return rv+sizeof(T)*s; +} + +template inline size_t fWriteVector(FILE* f,const C& v) { + unsigned s=v.size(); + size_t rv=fWrite(f,s); + if(fwrite(&v[0],sizeof(typename C::value_type),s,f)!=s) {std::cerr<<"ERROR: fwrite!\n";abort();} + return rv+sizeof(typename C::value_type)*s; +} + +template inline void fReadVector(FILE* f, C& v) { + unsigned s;fRead(f,s);v.resize(s); + unsigned r=fread(&(*v.begin()),sizeof(typename C::value_type),s,f); + if(r!=s) { + std::cerr<<"ERROR: freadVec! "< class FilePtr { +public: +#ifdef USECPFP + typedef CountedPointer Ptr; +#else + typedef T* Ptr; +#endif + +private: + FILE* f; + off_t pos; + mutable Ptr t; +public: + FilePtr(FILE* f_=0,off_t p=0) : f(f_),pos(p),t(0) {} + ~FilePtr() {} + + void set(FILE* f_,off_t p) {f=f_;pos=p;} + + void free() { +#ifdef USECPFP + t.destroy(); +#else + delete t; t=0; +#endif + } + + T& operator* () {load();return *t;} + Ptr operator->() {load();return t;} + operator Ptr () {load();return t;} + + const T& operator* () const {load();return *t;} + const Ptr operator->() const {load();return t;} + operator const Ptr () const {load();return t;} + + Ptr getPtr() {return t;} + const Ptr getPtr() const {return t;} + + operator bool() const {return (f && pos!=InvalidOffT);} + + void load() const { + if(t) return; + if(f && pos!=InvalidOffT) {fSeek(f,pos); t=new T(f);} + } +}; +#endif diff --git a/moses/src/PhraseDictionaryTree.cpp b/moses/src/PhraseDictionaryTree.cpp new file mode 100644 index 000000000..58274d875 --- /dev/null +++ b/moses/src/PhraseDictionaryTree.cpp @@ -0,0 +1,347 @@ +#include "PhraseDictionaryTree.h" +#include +#include +#include +#include +#include + +#include "PrefixTree.h" +#include "File.h" +#include "FactorCollection.h" + + template + std::ostream& operator<<(std::ostream& out,const std::vector& x) { + out<::const_iterator iend=x.end(); + for(typename std::vector::const_iterator i=x.begin();i!=iend;++i) out<<*i<<' '; + return out; + } + + + +typedef unsigned LabelId; +LabelId InvalidLabelId=std::numeric_limits::max(); +LabelId Epsilon=InvalidLabelId-1; + +typedef std::vector IPhrase; +typedef std::vector Scores; + +typedef PrefixTreeF PTF; + +template > +class LVoc { + typedef A Key; + typedef B M; + typedef std::vector V; + M m; + V data; +public: + LVoc() {} + + bool isKnown(const Key& k) const {return m.find(k)!=m.end();} + LabelId index(const Key& k) const { + typename M::const_iterator i=m.find(k); + return i!=m.end()? i->second : InvalidLabelId;} + LabelId add(const Key& k) { + std::pair p=m.insert(std::make_pair(k,data.size())); + if(p.second) data.push_back(k); + assert(p.first->second>=0 && static_cast(p.first->second)second; + } + const Key& symbol(LabelId i) const { + assert(i>=0);assert(static_cast(i)=0;--i) + out<>i>>k) { + if(i>=data.size()) data.resize(i+1); + data[i]=k; + m[k]=i; + } + } + } +}; + + +class TgtCand { + IPhrase e; + Scores sc; +public: + TgtCand() {} + TgtCand(const IPhrase& a,const Scores& b) : e(a),sc(b) {} + TgtCand(FILE* f) {readBin(f);} + + const IPhrase& GetPhrase() const {return e;} + const Scores& GetScores() const {return sc;} + + void writeBin(FILE* f) const { + fWriteVector(f,e);fWriteVector(f,sc);} + void readBin(FILE* f) {fReadVector(f,e);fReadVector(f,sc);} +}; + + +class TgtCands : public std::vector { + typedef std::vector MyBase; +public: + TgtCands() : MyBase() {} + + void writeBin(FILE* f) const { + unsigned s=size();fWrite(f,s);for(size_t i=0;ioperator [](i).writeBin(f); + } + void readBin(FILE* f) { + unsigned s;fRead(f,s);resize(s);for(size_t i=0;ioperator [](i).readBin(f); + } + +}; + + + +struct PDTimp { + typedef PrefixTreeF PTF; + typedef FilePtr CPT; + typedef std::vector Data; + typedef LVoc WordVoc; + + Data data; + std::vector srcOffsets; + + FILE *os,*ot; + WordVoc sv,tv; + + FactorCollection *m_factorCollection; + FactorType m_factorType; + + PDTimp() : os(0),ot(0),m_factorCollection(0),m_factorType(Surface) {} + ~PDTimp() {if(os) fclose(os);if(ot) fclose(ot);} + + int ReadBinary(const std::string& fn) { + std::string ifs(fn+".binphr.srctree"), + ift(fn+".binphr.tgtdata"), + ifi(fn+".binphr.idx"), + ifsv(fn+".binphr.srcvoc"), + iftv(fn+".binphr.tgtvoc"); + + FILE *ii=fOpen(ifi.c_str(),"rb"); + fReadVector(ii,srcOffsets); + fclose(ii); + + os=fOpen(ifs.c_str(),"rb"); + ot=fOpen(ift.c_str(),"rb"); + + // std::cerr<<"the load offsets are "<=data.size()) return InvalidOffT; + if(data[f[0]]) return data[f[0]]->find(f); else return InvalidOffT; + } + + void GetTargetCandidates(const IPhrase& f,TgtCands& tgtCands) + { + off_t tCandOffset=FindOffT(f); + if(tCandOffset==InvalidOffT) return; + fSeek(ot,tCandOffset); + tgtCands.readBin(ot); + } + +}; + + +PhraseDictionaryTree::PhraseDictionaryTree(size_t noScoreComponent,FactorCollection *fc,FactorType ft) + : Dictionary(noScoreComponent),imp(new PDTimp) + { + imp->m_factorCollection=fc; + imp->m_factorType=ft; + } + +PhraseDictionaryTree::~PhraseDictionaryTree() {delete imp;} + +void PhraseDictionaryTree::GetTargetCandidates(const std::vector& src,std::vector& rv) const +{ + IPhrase f(src.size()); + for(size_t i=0;isv.index(src[i]->GetString()); + if(f[i]==InvalidLabelId) return; + } + + TgtCands tgtCands; + imp->GetTargetCandidates(f,tgtCands); + + for(size_t i=0;i vf; + vf.reserve(iphrase.size()); + for(size_t j=0;jm_factorCollection->AddFactor(Output,imp->m_factorType,imp->tv.symbol(iphrase[j]))); + rv.push_back(FactorTgtCand(vf,tgtCands[i].GetScores())); + } +} + +void PhraseDictionaryTree::PrintTargetCandidates(const std::vector& src,std::ostream& out) const +{ + IPhrase f(src.size()); + for(size_t i=0;isv.index(src[i]); + if(f[i]==InvalidLabelId) return; + } + + TgtCands tcand; + imp->GetTargetCandidates(f,tcand); + + out<<"there are "<tv.symbol(iphr[j])<<" "; + out<<'\n'; + } +} + + + // for mert +void PhraseDictionaryTree::SetWeightTransModel(const std::vector &) {} + +int PhraseDictionaryTree::CreateBinaryFileFromAsciiPhraseTable(std::istream& inFile,const std::string& out) { + std::string line; + size_t count = 0; + + std::string ofn(out+".binphr.srctree"), + oft(out+".binphr.tgtdata"), + ofi(out+".binphr.idx"), + ofsv(out+".binphr.srcvoc"), + oftv(out+".binphr.tgtvoc"); + + FILE *os=fOpen(ofn.c_str(),"wb"), + *ot=fOpen(oft.c_str(),"wb"); + + typedef PrefixTreeSA PSA; + PSA *psa=new PSA;PSA::setDefault(InvalidOffT); + + LabelId currFirstWord=InvalidLabelId; + IPhrase currF; + TgtCands tgtCands; + std::vector vo; + size_t lnc=0; + while(getline(inFile, line)) { + ++lnc; + std::istringstream is(line);std::string w; + IPhrase f,e;Scores sc; + + while(is>>w && w!="|||") f.push_back(imp->sv.add(w)); + while(is>>w && w!="|||") e.push_back(imp->tv.add(w)); + while(is>>w && w!="|||") sc.push_back(atof(w.c_str())); + + + if(f.empty()) { + std::cerr<<"WARNING: empty source phrase in line '"<insert(f); + if(d==InvalidOffT) d=fTell(ot); + else { + std::cerr<<"ERROR: source phrase already inserted (A)!\nline: '"<=vo.size()) vo.resize(currFirstWord+1,InvalidOffT); + vo[currFirstWord]=fTell(os); + pf.create(*psa,os); + // clear + delete psa;psa=new PSA; + currFirstWord=f[0]; + } + + // insert src phrase in prefix tree + assert(psa); + PSA::Data& d=psa->insert(f); + if(d==InvalidOffT) d=fTell(ot); + else { + std::cerr<<"ERROR: source phrase already inserted (B)!\nline: '"<=vo.size()) vo.resize(currFirstWord+1,InvalidOffT); + vo[currFirstWord]=fTell(os); + pf.create(*psa,os); + delete psa;psa=0; + + fclose(os); + fclose(ot); + + std::vector inv; + for(size_t i=0;isv.Write(ofsv); + imp->tv.Write(oftv); + + return 1; +} + + +int PhraseDictionaryTree::ReadBinary(const std::string& fn) { + std::cerr<<"size of off_t "<ReadBinary(fn); +} diff --git a/moses/src/PhraseDictionaryTree.h b/moses/src/PhraseDictionaryTree.h new file mode 100644 index 000000000..cd64feee8 --- /dev/null +++ b/moses/src/PhraseDictionaryTree.h @@ -0,0 +1,47 @@ +#ifndef PHRASEDICTIONARYTREE_H_ +#define PHRASEDICTIONARYTREE_H_ +#include +#include +#include +#include "TypeDef.h" +#include "Dictionary.h" +#include "PhraseDictionary.h" + +class Phrase; +class PDTimp; +class FactorCollection; + + +typedef std::pair,std::vector > FactorTgtCand; + + +class PhraseDictionaryTree : public Dictionary { + PDTimp *imp; //implementation +public: + PhraseDictionaryTree(size_t noScoreComponent,FactorCollection* factorCollection=0,FactorType factorType=Surface); + virtual ~PhraseDictionaryTree(); + + DecodeType GetDecodeType() const + { + return Translate; + } + + int CreateBinaryFileFromAsciiPhraseTable(std::istream& In,const std::string& OutputFileNamePrefix); + int ReadBinary(const std::string& FileNamePrefix); + + + size_t GetSize() const + { + return 0; + } + +// const TargetPhraseCollection *FindEquivPhrase(const Phrase &source) const; + + void GetTargetCandidates(const std::vector& src,std::vector& rv) const; + void PrintTargetCandidates(const std::vector& src,std::ostream& out) const; + + // for mert + void SetWeightTransModel(const std::vector &weightT); + +}; +#endif /*PHRASEDICTIONARYTREE_H_*/ diff --git a/moses/src/PrefixTree.h b/moses/src/PrefixTree.h new file mode 100644 index 000000000..b9db696a2 --- /dev/null +++ b/moses/src/PrefixTree.h @@ -0,0 +1,281 @@ + +/* ---------------------------------------------------------------- */ +/* Copyright 2005 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */ +/* Richard Zens */ +/* ---------------------------------------------------------------- */ +#ifndef PREFIXTREE_H_ +#define PREFIXTREE_H_ +#include +#include +#include +#include +#include "Util.h" +#include "FilePtr.h" +#include "File.h" +#ifdef DEBUG +#include "CountObjects.h" +#endif + +template +class PrefixTreeSA { +public: + typedef T Key; + typedef D Data; + + typedef PrefixTreeSA Self; + typedef std::vector VT; + typedef std::vector VP; + typedef std::vector VD; + + VT keys; + VP ptr; + VD data; + + static Data def; + +public: + PrefixTreeSA() {} + + ~PrefixTreeSA() {for(size_t i=0;i Data& insert(fwiter b,fwiter e) { + typename VT::iterator i=std::lower_bound(keys.begin(),keys.end(),*b); + typename VT::iterator kb=keys.begin(); + size_t pos=std::distance(kb,i); + + if(i==keys.end() || *i!=*b) { + keys.insert(i,*b); + data.insert(data.begin()+pos,def); + ptr.insert(ptr.begin()+pos,0); + } + if(++b!=e) { + if(!ptr[pos]) ptr[pos]=new Self; + return ptr[pos]->insert(b,e); + } + else return data[pos]; + } + // insert container + template Data& insert(const cont& c) { + return insert(c.begin(),c.end());} + + size_t size() const {return keys.size();} + const Key& getKey(size_t i) const {return keys[i];} + const Data& getData(size_t i) const {return data[i];} + const Self* getPtr(size_t i) const {return ptr[i];} + + size_t findKey(const Key& k) const { + typename VT::const_iterator i=std::lower_bound(keys.begin(),keys.end(),k); + if(i==keys.end() || *i!=k) return keys.size(); + return std::distance(keys.begin(),i); + } + + // find sequence + template const Data* findPtr(fwiter b,fwiter e) const { + size_t pos=findKey(*b); + if(pos==keys.size()) return 0; + if(++b==e) return &data[pos]; + if(ptr[pos]) return ptr[pos]->findPtr(b,e); else return 0; + } + // find container + template const Data* findPtr(const cont& c) const { + return findPtr(c.begin(),c.end());} + + + // find sequence + template const Data& find(fwiter b,fwiter e) const { + if(const Data* p=findPtr(b,e)) return *p; else return def; + } + + // find container + template const Data& find(const cont& c) const { + return find(c.begin(),c.end());} + + void shrink() { + ShrinkToFit(keys); ShrinkToFit(ptr); ShrinkToFit(data);} + +}; +template D PrefixTreeSA::def; + +///////////////////////////////////////////////////////////////////////////// + +template +class PrefixTreeF { +public: + typedef T Key; + typedef D Data; +private: + typedef PrefixTreeF Self; +public: + typedef FilePtr Ptr; +private: + typedef std::vector VK; + typedef std::vector VD; + typedef std::vector VP; + + VK keys; + VD data; + VP ptr; + + static Data def; + + off_t startPos; + FILE* f; +public: +#ifdef DEBUG + DECLAREMEMSTAT(Self); +#endif + + PrefixTreeF(FILE* f_=0) : f(f_) {if(f) read();} + + ~PrefixTreeF() {free();} + + void read() { + startPos=fTell(f); + fReadVector(f,keys); + fReadVector(f,data); + ptr.clear();ptr.resize(keys.size()); + for(size_t i=0;ifree();} + + void reserve(size_t s) { + keys.reserve(s);data.reserve(s);ptr.reserve(s);} + + template + void changeData(fwiter b,fwiter e,const Data& d) { + typename VK::const_iterator i=std::lower_bound(keys.begin(),keys.end(),*b); + if(i==keys.end() || *i!=*b) { + std::cerr<<"ERROR: key not found in changeData!\n"; return;} + typename VK::const_iterator kb=keys.begin(); + size_t pos=std::distance(kb,i); + if(++b==e) { + off_t p=startPos+keys.size()*sizeof(Key)+2*sizeof(unsigned)+pos*sizeof(Data); + std::cerr<<"elem found at pos "<changeData(b,e,d); else { + std::cerr<<"ERROR: seg not found!in changeData\n"; + } + } + + + void create(const PrefixTreeSA& psa,const std::string& fname) { + FILE* f=fOpen(fname.c_str(),"wb"); + create(psa,f); + fclose(f); + } + + void create(const PrefixTreeSA& psa,FILE* f,int verbose=0) { + setDefault(psa.getDefault()); + + typedef std::pair*,off_t> P; + typedef std::deque

Next; + + Next next; + + next.push_back(P(&psa,fTell(f))); + bool isFirst=1; + size_t ns=1; + while(next.size()) { + if(verbose && next.size()>ns) { + std::cerr<<"stack size in PF create: "<& p=*pp.first; + off_t pos=pp.second; + next.pop_back(); + + if(!isFirst) { + off_t curr=fTell(f); + fSeek(f,pos); + fWrite(f,curr); + fSeek(f,curr); + } else isFirst=0; + + size_t s=0; + s+=fWriteVector(f,p.keys); + s+=fWriteVector(f,p.data); + + for(size_t i=0;i const Data* findPtr(fwiter b,fwiter e) const { + typename VK::const_iterator i=std::lower_bound(keys.begin(),keys.end(),*b); + if(i==keys.end() || *i!=*b) return 0; + size_t pos=std::distance(keys.begin(),i); + if(++b==e) return &data[pos]; + if(ptr[pos]) return ptr[pos]->findPtr(b,e); else return 0; + } + // find container + template const Data* findPtr(const cont& c) const { + return findPtr(c.begin(),c.end());} + + + // find sequence + template const Data& find(fwiter b,fwiter e) const { + if(const Data* p=findPtr(b,e)) return *p; else return def;} //return (p?*p:def);} + + // find container + template const Data& find(const cont& c) const { + return find(c.begin(),c.end());} + + + + static void setDefault(const Data& d) {def=d;} + static const Data& getDefault() {return def;} + + + void print(std::ostream& out,const std::string s="") const { + + out<print(out,s+" "); + } + + +}; +template D PrefixTreeF::def; +#ifdef DEBUG +template MemoryStatsPrinter< PrefixTreeF > PrefixTreeF::memStat("PrefixTreeF",0); +#endif +#endif diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp index 37b1d6553..e9cfbdf01 100755 --- a/moses/src/StaticData.cpp +++ b/moses/src/StaticData.cpp @@ -27,8 +27,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #include "FactorCollection.h" #include "HypothesisCollection.h" #include "Timer.h" - +#include "PhraseDictionaryTree.h" #include "boost/filesystem/operations.hpp" // boost::filesystem::exists +#include "InputFileStream.h" using namespace std; @@ -335,7 +336,6 @@ void StaticData::LoadPhraseTables(bool filter weight[currScore] = weightAll[totalPrevNoScoreComponent + currScore]; } totalPrevNoScoreComponent += noScoreComponent; - string phraseTableHash = GetMD5Hash(filePath); string hashFilePath = GetCachePath() + PROJECT_NAME + "--" @@ -374,7 +374,7 @@ void StaticData::LoadPhraseTables(bool filter , inputPhraseList , this->GetLanguageModel(Initial) , this->GetWeightWordPenalty()); - + timer.check("Finished loading PhraseTable"); } } diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h index f880ea159..425dd1655 100755 --- a/moses/src/TypeDef.h +++ b/moses/src/TypeDef.h @@ -58,12 +58,15 @@ const size_t DEFAULT_VERBOSE_LEVEL = 1; #ifdef LM_SRI typedef unsigned int LmId; -#endif +#else #ifdef LM_INTERNAL class NGramNode; typedef const NGramNode* LmId; +#else +// if nothing is defined: +typedef unsigned int LmId; +#endif #endif - // enums. // must be 0, 1, 2, ..., unless otherwise stated diff --git a/moses/src/Util.h b/moses/src/Util.h index d9295927e..8730cadbc 100644 --- a/moses/src/Util.h +++ b/moses/src/Util.h @@ -188,3 +188,9 @@ void RemoveAllInColl(COLL &coll) std::string GetTempFolder(); void CreateTempFile(std::ofstream &fileStream, std::string &filePath); std::string GetMD5Hash(const std::string &filePath); + +template inline void ShrinkToFit(T& v) { + if(v.capacity()>v.size()) T(v).swap(v);assert(v.capacity()==v.size());} + + +