Removal of 'using namespace ...' from several header files.

This commit is contained in:
Ulrich Germann 2015-07-02 01:32:34 +01:00
parent 515862ee1c
commit e94921dc44
51 changed files with 432 additions and 408 deletions

View File

@ -21,6 +21,7 @@ mingw/MosesGUI/icons_rc.py
mingw/MosesGUI/Ui_credits.py
mingw/MosesGUI/Ui_mainWindow.py
moses/TranslationModel/UG
moses/server
phrase-extract/pcfg-common
phrase-extract/syntax-common
randlm

View File

@ -108,7 +108,7 @@ external-lib z ;
#lib dl : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
#requirements += <library>dl ;
requirements += <cxxflags>-std=c++0x ;
#requirements += <cxxflags>-std=c++0x ;
if ! [ option.get "without-tcmalloc" : : "yes" ] && [ test_library "tcmalloc_minimal" ] {
if [ option.get "full-tcmalloc" : : "yes" ] {

View File

@ -27,7 +27,7 @@ BaseManager::GetSource() const
return m_source;
}
const ttasksptr&
const ttasksptr
BaseManager::GetTtask() const {
return m_ttask.lock();
}

View File

@ -50,7 +50,7 @@ public:
//! the input sentence being decoded
const InputType& GetSource() const;
const ttasksptr& GetTtask() const;
const ttasksptr GetTtask() const;
virtual void Decode() = 0;
// outputs

View File

@ -1115,8 +1115,7 @@ void StaticData::LoadSparseWeightsFromConfig()
}
std::map<std::string, std::vector<float> > weights = m_parameter->GetAllWeights();
std::map<std::string, std::vector<float> >::iterator iter;
// for (auto iter = weights.begin(); iter != weights.end(); ++iter) {
std::map<std::string, std::vector<float> >::iterator iter;
for (iter = weights.begin(); iter != weights.end(); ++iter) {
// this indicates that it is sparse feature
if (featureNames.find(iter->first) == featureNames.end()) {

View File

@ -85,24 +85,25 @@ int main(int argc, char* argv[])
++k;
size_t s1,s2,e1,e2; int po_fwd=-1,po_bwd=-1;
vector<uchar> caln;
// cout << sid << " " << B.docname(sid) << endl;
std::vector<unsigned char> caln;
// cout << sid << " " << B.docname(sid) << std::endl;
if (!B.find_trg_phr_bounds(sid, off, off+m.size(),
s1,s2,e1,e2,po_fwd,po_bwd,
&caln, NULL, &m == &m2))
{
// cout << "alignment failure" << endl;
// cout << "alignment failure" << std::endl;
}
cout << sid << " " << B.docname(sid)
<< " dfwd=" << po_fwd << " dbwd=" << po_bwd
<< "\n";
write_sentence(*B.T1, sid, *B.V1, cout); cout << "\n";
write_sentence(*B.T2, sid, *B.V2, cout); cout << "\n";
std::cout << sid << " " << B.docname(sid)
<< " dfwd=" << po_fwd << " dbwd=" << po_bwd
<< "\n";
write_sentence(*B.T1, sid, *B.V1, std::cout); std::cout << "\n";
write_sentence(*B.T2, sid, *B.V2, std::cout); std::cout << "\n";
B.write_yawat_alignment(sid,
m1.size() ? &m1 : NULL,
m2.size() ? &m2 : NULL, cout);
cout << endl;
m2.size() ? &m2 : NULL, std::cout);
std::cout << std::endl;
}
}
@ -141,9 +142,9 @@ interpret_args(int ac, char* av[])
po::notify(vm);
if (vm.count("help"))
{
cout << "\nusage:\n\t" << av[0]
<< " [options] [--q1=<L1string>] [--q2=<L2string>]" << endl;
cout << o << endl;
std::cout << "\nusage:\n\t" << av[0]
<< " [options] [--q1=<L1string>] [--q2=<L2string>]" << std::endl;
std::cout << o << std::endl;
exit(0);
}
}

View File

@ -3,10 +3,10 @@
#ifndef __Pickler
#define __Pickler
#include<iostream>
#include<string>
#include<vector>
#include<map>
#include <iostream>
#include <string>
#include <vector>
#include <map>
#include "tpt_typedefs.h"
#include "num_read_write.h"
#include <cassert>
@ -20,7 +20,7 @@ namespace ugdiss
/**
* The following functions write and read data in a compact binary
* representation. Write and read errors can be checked directly
* on the ostream object after the function call, so no return value is
* on the std::ostream object after the function call, so no return value is
* necessary.*/
void binwrite(std::ostream& out, char data);
void binwrite(std::ostream& out, unsigned char data);
@ -165,7 +165,7 @@ namespace ugdiss
binread(in,k);
binread(in,v);
data[k] = v;
// cerr << "* " << i << " " << k << " " << v << endl;
// cerr << "* " << i << " " << k << " " << v << std::endl;
}
}

View File

@ -12,7 +12,7 @@
#include "tpt_typedefs.h"
// #include <stdint.h>
#include <cassert>
using namespace std;
// // using namespace std;
#ifndef uchar
#endif
@ -29,7 +29,7 @@ namespace ugdiss
{
// void tightwritex(iostream& out, size_t data, bool flag);
void
tightwrite(std::ostream& out, ::uint64_t data, bool flag);
tightwrite(std::ostream& out, uint64_t data, bool flag);
filepos_type
tightread(std::istream& in, std::ios::pos_type stop);
@ -91,7 +91,7 @@ namespace ugdiss
tightread4(char const* start, char const* stop, uint32_t& dest);
char const*
tightread8(char const* start, char const* stop, ::uint64_t& dest);
tightread8(char const* start, char const* stop, uint64_t& dest);
template<typename numType>
char const*
@ -102,13 +102,13 @@ namespace ugdiss
if (sizeof(numType)==4)
return tightread4(start,stop,reinterpret_cast<uint32_t&>(dest));
else if (sizeof(numType)==8)
return tightread8(start,stop,reinterpret_cast<typename ::uint64_t&>(dest));
return tightread8(start,stop,reinterpret_cast<uint64_t&>(dest));
assert(0);
return NULL;
}
// char const*
// tightread(char const* start, char const* stop, ::uint64_t& dest);
// tightread(char const* start, char const* stop, uint64_t& dest);
// char const*
// tightread(char const* start, char const* stop, filepos_type& dest);

View File

@ -20,7 +20,7 @@
#include <vector>
#include <map>
using namespace std;
// // using namespace std;
namespace bio=boost::iostreams;
namespace ugdiss
@ -28,9 +28,9 @@ namespace ugdiss
class TokenIndex
{
/** Reverse index: maps from ID to char const* */
mutable vector<char const*> ridx;
mutable std::vector<char const*> ridx;
/** Label for the UNK token */
string unkLabel;
std::string unkLabel;
id_type unkId,numTokens;
/// New 2013-09-02: thread-safe
@ -38,8 +38,8 @@ namespace ugdiss
// NEW 2011-01-30: dynamic adding of unknown items
bool dynamic; // dynamically assign a new word id to unknown items?
boost::shared_ptr<map<string,id_type> > str2idExtra;
boost::shared_ptr<vector<string> > newWords;
boost::shared_ptr<std::map<std::string,id_type> > str2idExtra;
boost::shared_ptr<std::vector<std::string> > newWords;
// The use of pointers to external items is a bit of a bad hack
// in terms of the semantic of TokenIndex const: since external items
// are changed, the TokenIndex instance remains unchanged and const works,
@ -48,7 +48,7 @@ namespace ugdiss
// thread-safe!
public:
/** string->ID lookup works via binary search in a vector of Entry instances */
/** string->ID lookup works via binary search in a std::vector of Entry instances */
class Entry
{
public:
@ -69,26 +69,26 @@ namespace ugdiss
Entry const* startIdx;
Entry const* endIdx;
CompFunc comp;
TokenIndex(string unkToken="UNK");
// TokenIndex(string fname,string unkToken="UNK",bool dyna=false);
void open(string fname,string unkToken="UNK",bool dyna=false);
TokenIndex(std::string unkToken="UNK");
// TokenIndex(std::string fname,std::string unkToken="UNK",bool dyna=false);
void open(std::string fname,std::string unkToken="UNK",bool dyna=false);
void close();
// id_type unkId,numTokens;
id_type operator[](char const* w) const;
id_type operator[](string const& w) const;
id_type operator[](std::string const& w) const;
char const* const operator[](id_type id) const;
char const* const operator[](id_type id);
vector<char const*> reverseIndex() const;
std::vector<char const*> reverseIndex() const;
string toString(vector<id_type> const& v);
string toString(vector<id_type> const& v) const;
std::string toString(std::vector<id_type> const& v);
std::string toString(std::vector<id_type> const& v) const;
string toString(id_type const* start, id_type const* const stop);
string toString(id_type const* start, id_type const* const stop) const;
std::string toString(id_type const* start, id_type const* const stop);
std::string toString(id_type const* start, id_type const* const stop) const;
vector<id_type> toIdSeq(string const& line) const;
std::vector<id_type> toIdSeq(std::string const& line) const;
bool fillIdSeq(string const& line, vector<id_type> & v) const;
bool fillIdSeq(std::string const& line, std::vector<id_type> & v) const;
void iniReverseIndex();
id_type getNumTokens() const;
@ -104,27 +104,27 @@ namespace ugdiss
char const* const getUnkToken() const;
void write(string fname); // write TokenIndex to a new file
void write(std::string fname); // write TokenIndex to a new file
bool isDynamic() const;
bool setDynamic(bool onoff);
void setUnkLabel(string unk);
void setUnkLabel(std::string unk);
};
void
write_tokenindex_to_disk(vector<pair<string,uint32_t> > const& tok,
string const& ofile, string const& unkToken);
write_tokenindex_to_disk(std::vector<std::pair<std::string,uint32_t> > const& tok,
std::string const& ofile, std::string const& unkToken);
/** for sorting words by frequency */
class compWords
{
string unk;
std::string unk;
public:
compWords(string _unk) : unk(_unk) {};
compWords(std::string _unk) : unk(_unk) {};
bool
operator()(pair<string,size_t> const& A,
pair<string,size_t> const& B) const
operator()(std::pair<std::string,size_t> const& A,
std::pair<std::string,size_t> const& B) const
{
if (A.first == unk) return false;// do we still need this special treatment?
if (B.first == unk) return true; // do we still need this special treatment?
@ -136,27 +136,27 @@ namespace ugdiss
template<class MYMAP>
void
mkTokenIndex(string ofile,MYMAP const& M,string unkToken)
mkTokenIndex(std::string ofile,MYMAP const& M,std::string unkToken)
{
// typedef pair<uint32_t,id_type> IndexEntry; // offset and id
typedef pair<string,uint32_t> Token; // token and id
// typedef std::pair<uint32_t,id_type> IndexEntry; // offset and id
typedef std::pair<std::string,uint32_t> Token; // token and id
// first, sort the word list in decreasing order of frequency, so that we
// can assign IDs in an encoding-efficient manner (high frequency. low ID)
vector<pair<string,size_t> > wcounts(M.size()); // for sorting by frequency
std::vector<std::pair<std::string,size_t> > wcounts(M.size()); // for sorting by frequency
typedef typename MYMAP::const_iterator myIter;
size_t z=0;
for (myIter m = M.begin(); m != M.end(); m++)
{
// cout << m->first << " " << m->second << endl;
wcounts[z++] = pair<string,size_t>(m->first,m->second);
// cout << m->first << " " << m->second << std::endl;
wcounts[z++] = std::pair<std::string,size_t>(m->first,m->second);
}
compWords compFunc(unkToken);
sort(wcounts.begin(),wcounts.end(),compFunc);
// Assign IDs ...
vector<Token> tok(wcounts.size());
std::vector<Token> tok(wcounts.size());
for (size_t i = 0; i < wcounts.size(); i++)
tok[i] = Token(wcounts[i].first,i);
// and re-sort in alphabetical order
@ -166,9 +166,9 @@ namespace ugdiss
template<typename Token>
void
fill_token_seq(TokenIndex& V, string const& line, vector<Token>& dest)
fill_token_seq(TokenIndex& V, std::string const& line, std::vector<Token>& dest)
{
istringstream buf(line); string w;
std::istringstream buf(line); std::string w;
while (buf>>w) dest.push_back(Token(V[w]));
}
}

View File

@ -71,10 +71,19 @@ namespace Moses {
class Mmsapt;
namespace bitext
{
using namespace ugdiss;
// using namespace ugdiss;
using ugdiss::bitvector;
using ugdiss::Ttrack;
using ugdiss::TSA;
using ugdiss::imTSA;
using ugdiss::mmTSA;
using ugdiss::L2R_Token;
using ugdiss::SimpleWordId;
using ugdiss::imTtrack;
using ugdiss::mmTtrack;
using ugdiss::binread;
float lbop(size_t const tries, size_t const succ, float const confidence);
void write_bitvector(bitvector const& v, ostream& out);
void write_bitvector(bitvector const& v, std::ostream& out);
#ifndef NO_MOSES
struct
@ -86,7 +95,7 @@ namespace Moses {
boost::shared_mutex lock;
sptr<SamplingBias> bias;
sptr<pstats::cache_t> cache1, cache2;
ostream* bias_log;
std::ostream* bias_log;
ContextForQuery() : bias_log(NULL) { }
};
#endif
@ -96,10 +105,10 @@ namespace Moses {
{
public:
typedef TKN Token;
typedef typename TSA<Token>::tree_iterator iter;
typedef typename ugdiss::TSA<Token>::tree_iterator iter;
typedef typename std::vector<PhrasePair<Token> > vec_ppair;
typedef typename lru_cache::LRU_Cache<uint64_t, vec_ppair> pplist_cache_t;
typedef TSA<Token> tsa;
typedef ugdiss::TSA<Token> tsa;
friend class Moses::Mmsapt;
protected:
mutable boost::shared_mutex m_lock; // for thread-safe operation
@ -112,7 +121,7 @@ namespace Moses {
size_t m_pstats_cache_threshold; // threshold for caching sampling results
sptr<pstats::cache_t> m_cache1, m_cache2; // caches for sampling results
vector<string> m_docname;
std::vector<string> m_docname;
map<string,id_type> m_docname2docid; // maps from doc names to ids
sptr<std::vector<id_type> > m_sid2docid; // maps from sentences to docs (ids)
@ -141,7 +150,7 @@ namespace Moses {
size_t & s1, size_t & s2, // beginning and end of target start
size_t & e1, size_t & e2, // beginning and end of target end
int& po_fwd, int& po_bwd, // phrase orientations
std::vector<uchar> * core_alignment, // stores the core alignment
std::vector<unsigned char> * core_alignment, // stores the core alignment
bitvector* full_alignment, // stores full word alignment for this sent.
bool const flip) const; // flip source and target (reverse lookup)
@ -190,17 +199,17 @@ namespace Moses {
loadSentenceBias(string const& fname) const;
sptr<DocumentBias>
SetupDocumentBias(string const& bserver, string const& text, ostream* log) const;
SetupDocumentBias(string const& bserver, string const& text, std::ostream* log) const;
sptr<DocumentBias>
SetupDocumentBias(map<string,float> context_weights, ostream* log) const;
SetupDocumentBias(map<string,float> context_weights, std::ostream* log) const;
void
mark_match(Token const* start, Token const* end, iter const& m,
bitvector& check) const;
void
write_yawat_alignment
( id_type const sid, iter const* m1, iter const* m2, ostream& out ) const;
( id_type const sid, iter const* m1, iter const* m2, std::ostream& out ) const;
string docname(id_type const sid) const;
@ -229,7 +238,7 @@ namespace Moses {
size_t i = 0;
float v; while (in>>v) (*ret)[i++] = v;
UTIL_THROW_IF2(i != T1->size(),
"Mismatch between bias vector size and corpus size at "
"Mismatch between bias std::vector size and corpus size at "
<< HERE);
return ret;
}
@ -239,8 +248,8 @@ namespace Moses {
Bitext<Token>::
toString(uint64_t pid, int isL2) const
{
ostringstream buf;
uint32_t sid,off,len; parse_pid(pid,sid,off,len);
std::ostringstream buf;
uint32_t sid,off,len; ugdiss::parse_pid(pid,sid,off,len);
Token const* t = (isL2 ? T2 : T1)->sntStart(sid) + off;
Token const* x = t + len;
TokenIndex const& V = isL2 ? *V2 : *V1;
@ -328,10 +337,10 @@ namespace Moses {
size_t const start, size_t const stop,
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
int & po_fwd, int & po_bwd,
std::vector<uchar>* core_alignment, bitvector* full_alignment,
std::vector<unsigned char>* core_alignment, bitvector* full_alignment,
bool const flip) const
{
// if (core_alignment) cout << "HAVE CORE ALIGNMENT" << endl;
// if (core_alignment) cout << "HAVE CORE ALIGNMENT" << std::endl;
// a word on the core_alignment:
//
@ -425,7 +434,7 @@ namespace Moses {
sptr<DocumentBias>
Bitext<Token>::
SetupDocumentBias
( string const& bserver, string const& text, ostream* log ) const
( string const& bserver, string const& text, std::ostream* log ) const
{
sptr<DocumentBias> ret;
UTIL_THROW_IF2(m_sid2docid == NULL,
@ -439,7 +448,7 @@ namespace Moses {
sptr<DocumentBias>
Bitext<Token>::
SetupDocumentBias
( map<string,float> context_weights, ostream* log ) const
( map<string,float> context_weights, std::ostream* log ) const
{
sptr<DocumentBias> ret;
UTIL_THROW_IF2(m_sid2docid == NULL,
@ -541,12 +550,12 @@ namespace Moses {
m_pp.init(m_pid1, m_is_inverse, m_token,m_len,m_pstats.get(),0);
// convert pstats entries to phrase pairs
// convert pstats entries to phrase std::pairs
pstats::trg_map_t::iterator a;
for (a = m_pstats->trg.begin(); a != m_pstats->trg.end(); ++a)
{
uint32_t sid,off,len;
parse_pid(a->first, sid, off, len);
ugdiss::parse_pid(a->first, sid, off, len);
m_pp.update(a->first, m_other.sntStart(sid)+off, len, a->second);
m_pp.good2 = max(uint32_t(m_pp.raw2 * float(m_pp.good1)/m_pp.raw1),
m_pp.joint);
@ -596,16 +605,16 @@ namespace Moses {
void
Bitext<Token>::
write_yawat_alignment
( id_type const sid, iter const* m1, iter const* m2, ostream& out ) const
( id_type const sid, iter const* m1, iter const* m2, std::ostream& out ) const
{
vector<int> a1(T1->sntLen(sid),-1), a2(T2->sntLen(sid),-1);
std::vector<int> a1(T1->sntLen(sid),-1), a2(T2->sntLen(sid),-1);
bitvector f1(a1.size()), f2(a2.size());
if (m1) mark_match(T1->sntStart(sid), T1->sntEnd(sid), *m1, f1);
if (m2) mark_match(T2->sntStart(sid), T2->sntEnd(sid), *m2, f2);
vector<pair<bitvector,bitvector> > agroups;
vector<string> grouplabel;
pair<bitvector,bitvector> ag;
std::vector<pair<bitvector,bitvector> > agroups;
std::vector<string> grouplabel;
std::pair<bitvector,bitvector> ag;
ag.first.resize(a1.size());
ag.second.resize(a2.size());
char const* x = Tx->sntStart(sid);
@ -670,19 +679,19 @@ namespace Moses {
void
expand(typename Bitext<Token>::iter const& m,
Bitext<Token> const& bt, pstats const& ps,
std::vector<PhrasePair<Token> >& dest, ostream* log)
std::vector<PhrasePair<Token> >& dest, std::ostream* log)
{
bool fwd = m.root == bt.I1.get();
dest.reserve(ps.trg.size());
PhrasePair<Token> pp;
pp.init(m.getPid(), !fwd, m.getToken(0), m.size(), &ps, 0);
// cout << HERE << " "
// << toString(*(fwd ? bt.V1 : bt.V2), pp.start1,pp.len1) << endl;
// << toString(*(fwd ? bt.V1 : bt.V2), pp.start1,pp.len1) << std::endl;
pstats::trg_map_t::const_iterator a;
for (a = ps.trg.begin(); a != ps.trg.end(); ++a)
{
uint32_t sid,off,len;
parse_pid(a->first, sid, off, len);
ugdiss::parse_pid(a->first, sid, off, len);
pp.update(a->first, (fwd ? bt.T2 : bt.T1)->sntStart(sid)+off,
len, a->second);
dest.push_back(pp);

View File

@ -76,7 +76,7 @@ void Bitext<Token>
}
else ++i;
}
// cerr << workers.size() << "/" << target << " active" << endl;
// cerr << workers.size() << "/" << target << " active" << std::endl;
if (int(workers.size()) > target)
this->doomed = workers.size() - target;
else
@ -132,7 +132,7 @@ Bitext<Token>
::agenda
::get_job()
{
// cerr << workers.size() << " workers on record" << endl;
// cerr << workers.size() << " workers on record" << std::endl;
sptr<job> ret;
if (this->shutdown) return ret;
boost::unique_lock<boost::mutex> lock(this->lock);

View File

@ -100,7 +100,7 @@ Bitext<Token>::agenda::job
#if 0
cerr << ctr++ << " " << m.str(m_bitext->V1.get())
<< " " << sid << "/" << root->getCorpusSize()
<< " " << offset << " " << stop-x << endl;
<< " " << offset << " " << stop-x << std::endl;
#endif
bias_total += (*m_bias)[sid];
++stats->raw_cnt;
@ -109,7 +109,7 @@ Bitext<Token>::agenda::job
#if UG_BITEXT_TRACK_ACTIVE_THREADS
++active;
// if (active%5 == 0)
// cerr << size_t(active) << " active jobs at " << __FILE__ << ":" << __LINE__ << endl;
// cerr << size_t(active) << " active jobs at " << __FILE__ << ":" << __LINE__ << std::endl;
#endif
}
@ -130,10 +130,10 @@ int Bitext<Token>::agenda::job
if (!m_bias) return 1;
using namespace boost::math;
// // using namespace boost::math;
typedef boost::math::binomial_distribution<> binomial;
ostream* log = m_bias->loglevel > 1 ? m_bias->log : NULL;
std::ostream* log = m_bias->loglevel > 1 ? m_bias->log : NULL;
float p = (*m_bias)[sid];
id_type docid = m_bias->GetClass(sid);
@ -177,7 +177,7 @@ int Bitext<Token>::agenda::job
for (; x < e; ++x) *log << (*m_bitext->V1)[x->id()] << " ";
if (!ret) *log << "SKIP";
else if (p < .5 && d > .9) *log << "FORCE";
*log << endl;
*log << std::endl;
}
return (ret ? (p < .5 && d > .9) ? 2 : 1 : 0);

View File

@ -17,9 +17,9 @@ Bitext<Token>::agenda
// reduce the number of lock / unlock operations we need to do
// during sampling.
uint64_t sid=0, offset=0; // sid and offset of source phrase
size_t s1=0, s2=0, e1=0, e2=0; // soft and hard boundaries of target phrase
vector<uchar> aln; // stores phrase-pair-internal alignment
uint64_t sid=0, offset=0; // sid and offset of source phrase
size_t s1=0, s2=0, e1=0, e2=0; // soft and hard boundaries of target phrase
std::vector<unsigned char> aln; // stores phrase-pair-internal alignment
while(sptr<job> j = ag.get_job())
{
j->stats->register_worker();
@ -53,7 +53,7 @@ Bitext<Token>::agenda
Token const* eos = ag.bt.T2->sntEnd(sid);
cerr << "[" << j->stats->good + 1 << "] ";
while (t != eos) cerr << (*ag.bt.V2)[(t++)->id()] << " ";
cerr << "[" << docid << "]" << endl;
cerr << "[" << docid << "]" << std::endl;
#endif
float sample_weight = 1./num_pairs;
@ -62,11 +62,11 @@ Bitext<Token>::agenda
// adjust offsets in phrase-internal aligment
for (size_t k = 1; k < aln.size(); k += 2) aln[k] += s2 - s1;
vector<uint64_t> seen; seen.reserve(10);
std::vector<uint64_t> seen; seen.reserve(10);
// It is possible that the phrase extraction extracts the same
// phrase twice, e.g., when word a co-occurs with sequence b b b
// but is aligned only to the middle word. We can only count
// each phrase pair once per source phrase occurrence, or else
// each phrase std::pair once per source phrase occurrence, or else
// run the risk of having more joint counts than marginal
// counts.

View File

@ -54,7 +54,7 @@ namespace Moses
void
jstats::
add(float w, vector<uchar> const& a, uint32_t const cnt2,
add(float w, std::vector<unsigned char> const& a, uint32_t const cnt2,
uint32_t fwd_orient, uint32_t bwd_orient, int const docid)
{
boost::lock_guard<boost::mutex> lk(this->lock);
@ -66,7 +66,7 @@ namespace Moses
size_t i = 0;
while (i < my_aln.size() && my_aln[i].second != a) ++i;
if (i == my_aln.size())
my_aln.push_back(pair<size_t,vector<uchar> >(1,a));
my_aln.push_back(std::pair<size_t,std::vector<unsigned char> >(1,a));
else
my_aln[i].first++;
if (my_aln[i].first > my_aln[i/2].first)
@ -81,7 +81,7 @@ namespace Moses
}
}
vector<pair<size_t, vector<uchar> > > const&
std::vector<std::pair<size_t, std::vector<unsigned char> > > const&
jstats::
aln() const
{ return my_aln; }

View File

@ -1,5 +1,7 @@
// -*- c++ -*-
#pragma once
#include <string>
#include <stdint.h>
#include "ug_typedefs.h"
#include "ug_lexical_reordering.h"
#include <boost/thread.hpp>
@ -8,9 +10,10 @@ namespace Moses
{
namespace bitext
{
using namespace ugdiss;
// "joint" (i.e., phrase pair) statistics
// using namespace ugdiss;
// "joint" (i.e., phrase std::pair) statistics
class
jstats
{
@ -20,23 +23,24 @@ namespace Moses
float my_wcnt; // weighted joint count
// to do: use a static alignment pattern store that stores each pattern only
// once, so that we don't have to store so many alignment vectors
vector<pair<size_t, vector<uchar> > > my_aln; // internal word alignment
// once, so that we don't have to store so many alignment std::vectors
std::vector<std::pair<size_t, std::vector<unsigned char> > > my_aln;
// internal word alignment
uint32_t ofwd[Moses::LRModel::NONE+1]; // forward distortion type counts
uint32_t obwd[Moses::LRModel::NONE+1]; // backward distortion type counts
public:
std::map<uint32_t,uint32_t> indoc;
// vector<uint32_t> indoc; // counts origin of samples (for biased sampling)
// std::vector<uint32_t> indoc; // counts origin of samples (for biased sampling)
jstats();
jstats(jstats const& other);
uint32_t rcnt() const; // raw joint counts
uint32_t cnt2() const; // raw target phrase occurrence count
float wcnt() const; // weighted joint counts
vector<pair<size_t, vector<uchar> > > const & aln() const;
void add(float w, vector<uchar> const& a, uint32_t const cnt2,
std::vector<std::pair<size_t, std::vector<unsigned char> > > const & aln() const;
void add(float w, std::vector<unsigned char> const& a, uint32_t const cnt2,
uint32_t fwd_orient, uint32_t bwd_orient,
int const docid);
void invalidate();
@ -46,7 +50,7 @@ namespace Moses
uint32_t dcnt_bwd(PhraseOrientation const idx) const;
void fill_lr_vec(Moses::LRModel::Direction const& dir,
Moses::LRModel::ModelType const& mdl,
vector<float>& v);
std::vector<float>& v);
};
}
}

View File

@ -66,7 +66,7 @@ namespace Moses
bool
pstats::
add(uint64_t pid, float const w,
vector<uchar> const& a,
std::vector<unsigned char> const& a,
uint32_t const cnt2,
uint32_t fwd_o,
uint32_t bwd_o, int const docid)

View File

@ -17,7 +17,7 @@ namespace Moses
{
typedef boost::unordered_map<uint64_t, sptr<pstats> > map_t;
typedef ThreadSafeContainer<uint64_t, sptr<pstats>, map_t> cache_t;
typedef std::vector<uchar> alnvec;
typedef std::vector<unsigned char> alnvec;
#if UG_BITEXT_TRACK_ACTIVE_THREADS
static ThreadSafeCounter active;
#endif

View File

@ -7,7 +7,7 @@
#include "ug_typedefs.h"
namespace ugdiss
{
using namespace std;
// using namespace std;
template<typename T>
class ConllBottomUpToken : public T

View File

@ -3,7 +3,7 @@ namespace ugdiss
{
Conll_Record
Conll_Record::
remap(vector<id_type const*> const& m) const
remap(std::vector<id_type const*> const& m) const
{
Conll_Record ret;
ret.sform = m.size() > 0 && m[0] ? m[0][this->sform] : this->sform;

View File

@ -5,7 +5,7 @@
namespace ugdiss
{
using namespace std;
// using namespace std;
class
Conll_Record
@ -29,7 +29,7 @@ namespace ugdiss
// virtual bool operator==(Conll_Record const& other) const;
// virtual bool operator<(Conll_Record const& other) const;
Conll_Record remap(vector<id_type const*> const& m) const;
Conll_Record remap(std::vector<id_type const*> const& m) const;
#if 0
/** constructor for conversion from CONLL-stype text format

View File

@ -35,7 +35,7 @@ namespace ugdiss
id_type
SimpleWordId::
remap(vector<id_type const*> const& m) const
remap(std::vector<id_type const*> const& m) const
{
if (!m[0]) return theID;
return m[0][theID];

View File

@ -27,7 +27,7 @@ namespace ugdiss
id_type const& id() const;
int cmp(SimpleWordId const& other) const;
bool operator==(SimpleWordId const& other) const;
id_type remap(vector<id_type const*> const& m) const;
id_type remap(std::vector<id_type const*> const& m) const;
};
/** Token class for suffix arrays */

View File

@ -15,22 +15,22 @@
#include "ug_conll_bottom_up_token.h"
#include "ug_typedefs.h"
using namespace std;
// using namespace std;
namespace ugdiss
{
// Fills the vector v with pointers to the internal root r_x for the
// Fills the std::vector v with pointers to the internal root r_x for the
// stretch [start,x] for all x: start <= x < stop. If the stretch
// is incoherent, r_x is NULL
template<typename T>
void
fill_L2R_roots(T const* start,T const* stop, vector<T const*>& v)
fill_L2R_roots(T const* start,T const* stop, std::vector<T const*>& v)
{
assert(stop>start);
v.resize(stop-start);
v[0] = start;
bitvector isR(v.size());
vector<T const*> root(v.size());
std::vector<T const*> root(v.size());
isR.set(0);
root[0] = start+start->parent;
for (T const* x = start+1; x < stop; ++x)
@ -95,7 +95,7 @@ namespace ugdiss
template<typename T>
T const*
findInternalRoot(vector<T> const& v)
findInternalRoot(std::vector<T> const& v)
{
T const* a = as<T>(&(*v.begin()));
T const* b = as<T>(&(*v.end()));
@ -108,7 +108,7 @@ namespace ugdiss
public:
Conll_Record const* rec; // pointer to the record (see below) for this node
DTNode* parent; // pointer to my parent
vector<DTNode*> children; // children (in the order they appear in the sentence)
std::vector<DTNode*> children; // children (in the order they appear in the sentence)
DTNode(Conll_Record const* p);
};
@ -117,7 +117,7 @@ namespace ugdiss
DependencyTree
{
public:
vector<DTNode> w;
std::vector<DTNode> w;
DependencyTree(Conll_Record const* first, Conll_Record const* last);
};
#endif

View File

@ -38,8 +38,8 @@ namespace Moses
{
UTIL_THROW_IF2(c != '-', "[" << HERE << "] "
<< "Error in alignment information:\n" << a);
binwrite(obuf,row);
binwrite(obuf,col);
ugdiss::binwrite(obuf,row);
ugdiss::binwrite(obuf,col);
}
// important: DO NOT replace the two lines below this comment by
// char const* x = obuf.str().c_str(), as the memory x is pointing

View File

@ -25,12 +25,12 @@ namespace Moses
imBitext(imBitext const& other);
// sptr<imBitext<TKN> >
// add(vector<TKN> const& s1, vector<TKN> const& s2, vector<ushort> & a);
// add(vector<TKN> const& s1, std::vector<TKN> const& s2, vector<ushort> & a);
sptr<imBitext<TKN> >
add(vector<string> const& s1,
vector<string> const& s2,
vector<string> const& a) const;
std::vector<string> const& s2,
std::vector<string> const& a) const;
};

View File

@ -20,8 +20,8 @@
namespace ugdiss
{
using namespace std;
using namespace boost;
// using namespace std;
// using namespace boost;
namespace bio=boost::iostreams;
// template<typename TOKEN> class imBitext<TOKEN>;
@ -37,8 +37,8 @@ namespace ugdiss
friend class tree_iterator;
private:
vector<cpos> sufa; // stores the actual array
vector<filepos_type> index; /* top-level index into regions in sufa
std::vector<cpos> sufa; // stores the actual array
std::vector<filepos_type> index; /* top-level index into regions in sufa
* (for faster access) */
private:
char const*
@ -54,11 +54,11 @@ namespace ugdiss
imTSA();
imTSA(boost::shared_ptr<Ttrack<TOKEN> const> c,
bdBitset const* filt,
ostream* log = NULL);
std::ostream* log = NULL);
imTSA(imTSA<TOKEN> const& prior,
boost::shared_ptr<imTtrack<TOKEN> const> const& crp,
vector<id_type> const& newsids, size_t const vsize);
std::vector<id_type> const& newsids, size_t const vsize);
count_type
sntCnt(char const* p, char const * const q) const;
@ -86,7 +86,7 @@ namespace ugdiss
sanityCheck() const;
void
save_as_mm_tsa(string fname) const;
save_as_mm_tsa(std::string fname) const;
/// add a sentence to the database
// shared_ptr<imTSA<TOKEN> > add(vector<TOKEN> const& snt) const;
@ -140,7 +140,7 @@ namespace ugdiss
// specified in filter
template<typename TOKEN>
imTSA<TOKEN>::
imTSA(boost::shared_ptr<Ttrack<TOKEN> const> c, bdBitset const* filter, ostream* log)
imTSA(boost::shared_ptr<Ttrack<TOKEN> const> c, bdBitset const* filter, std::ostream* log)
{
assert(c);
this->corpus = c;
@ -166,14 +166,14 @@ namespace ugdiss
// alignment in the memory, using a ushort instead of a uint32_t might not
// even make a difference.
vector<count_type> wcnt; // word counts
std::vector<count_type> wcnt; // word counts
sufa.resize(c->count_tokens(wcnt,filter,slimit,log));
if (log) *log << sufa.size() << "." << endl;
if (log) *log << sufa.size() << "." << std::endl;
// exit(1);
// we use a second vector that keeps track for each ID of the current insertion
// we use a second std::vector that keeps track for each ID of the current insertion
// position in the array
vector<count_type> tmp(wcnt.size(),0);
std::vector<count_type> tmp(wcnt.size(),0);
for (size_t i = 1; i < wcnt.size(); ++i)
tmp[i] = tmp[i-1] + wcnt[i-1];
@ -198,14 +198,14 @@ namespace ugdiss
}
// Now sort the array
if (log) *log << "sorting ...." << endl;
if (log) *log << "sorting ...." << std::endl;
index.resize(wcnt.size()+1,0);
typename ttrack::Position::LESS<Ttrack<TOKEN> > sorter(c.get());
for (size_t i = 0; i < wcnt.size(); i++)
{
if (log && wcnt[i] > 5000)
*log << "sorting " << wcnt[i]
<< " entries starting with id " << i << "." << endl;
<< " entries starting with id " << i << "." << std::endl;
index[i+1] = index[i]+wcnt[i];
assert(index[i+1]==tmp[i]); // sanity check
if (wcnt[i]>1)
@ -217,7 +217,7 @@ namespace ugdiss
this->indexSize = this->index.size();
#if 1
// Sanity check during code development. Can be removed once the thing is stable.
typename vector<cpos>::iterator m = sufa.begin();
typename std::vector<cpos>::iterator m = sufa.begin();
for (size_t i = 0; i < wcnt.size(); i++)
{
for (size_t k = 0; k < wcnt[i]; ++k,++m)
@ -330,14 +330,14 @@ namespace ugdiss
template<typename TOKEN>
void
imTSA<TOKEN>::
save_as_mm_tsa(string fname) const
save_as_mm_tsa(std::string fname) const
{
ofstream out(fname.c_str());
std::ofstream out(fname.c_str());
filepos_type idxStart(0);
id_type idxSize(index.size());
numwrite(out,idxStart);
numwrite(out,idxSize);
vector<filepos_type> mmIndex;
std::vector<filepos_type> mmIndex;
for (size_t i = 1; i < this->index.size(); i++)
{
mmIndex.push_back(out.tellp());
@ -360,7 +360,7 @@ namespace ugdiss
imTSA<TOKEN>::
imTSA(imTSA<TOKEN> const& prior,
boost::shared_ptr<imTtrack<TOKEN> const> const& crp,
vector<id_type> const& newsids, size_t const vsize)
std::vector<id_type> const& newsids, size_t const vsize)
{
typename ttrack::Position::LESS<Ttrack<TOKEN> > sorter(crp.get());
@ -369,7 +369,7 @@ namespace ugdiss
size_t newToks = 0;
BOOST_FOREACH(id_type sid, newsids)
newToks += crp->sntLen(sid);
vector<cpos> nidx(newToks); // new array entries
std::vector<cpos> nidx(newToks); // new array entries
size_t n = 0;
BOOST_FOREACH(id_type sid, newsids)
@ -390,9 +390,9 @@ namespace ugdiss
this->index.resize(vsize+1);
size_t i = 0;
typename vector<cpos>::iterator k = this->sufa.begin();
typename std::vector<cpos>::iterator k = this->sufa.begin();
// cerr << newToks << " new items at "
// << __FILE__ << ":" << __LINE__ << endl;
// << __FILE__ << ":" << __LINE__ << std::endl;
for (size_t n = 0; n < nidx.size();)
{
id_type nid = crp->getToken(nidx[n])->id();

View File

@ -28,8 +28,8 @@
namespace ugdiss
{
using namespace std;
using namespace boost;
// using namespace std;
// using namespace boost;
namespace bio=boost::iostreams;
template<typename Token> class imTSA;
@ -37,7 +37,8 @@ namespace ugdiss
template<typename TOKEN>
typename boost::shared_ptr<imTtrack<TOKEN> >
append(typename boost::shared_ptr<imTtrack<TOKEN> > const & crp, vector<TOKEN> const & snt);
append(typename boost::shared_ptr<imTtrack<TOKEN> > const & crp,
std::vector<TOKEN> const & snt);
template<typename Token>
class imTtrack : public Ttrack<Token>
@ -45,19 +46,20 @@ namespace ugdiss
private:
size_t numToks;
boost::shared_ptr<vector<vector<Token> > > myData; // pointer to corpus data
boost::shared_ptr<typename std::vector<std::vector<Token> > > myData;
// pointer to corpus data
friend class imTSA<Token>;
friend
typename boost::shared_ptr<imTtrack<Token> >
append<Token>(typename boost::shared_ptr<imTtrack<Token> > const & crp, vector<Token> const & snt);
append<Token>(typename boost::shared_ptr<imTtrack<Token> > const & crp, std::vector<Token> const & snt);
void m_check_token_count(); // debugging function
public:
imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d);
imTtrack(istream& in, TokenIndex& V, ostream* log = NULL);
imTtrack(boost::shared_ptr<std::vector<std::vector<Token> > > const& d);
imTtrack(std::istream& in, TokenIndex& V, std::ostream* log = NULL);
imTtrack(size_t reserve = 0);
// imTtrack(istream& in, Vocab& V);
@ -80,7 +82,7 @@ namespace ugdiss
m_check_token_count()
{ // sanity check
size_t check = 0;
BOOST_FOREACH(vector<Token> const& s, *myData)
BOOST_FOREACH(std::vector<Token> const& s, *myData)
check += s.size();
UTIL_THROW_IF2(check != this->numToks, "[" << HERE << "]"
<< " Wrong token count after appending sentence!"
@ -131,28 +133,28 @@ namespace ugdiss
template<typename Token>
imTtrack<Token>::
imTtrack(istream& in, TokenIndex& V, ostream* log)
imTtrack(std::istream& in, TokenIndex& V, std::ostream* log)
: numToks(0)
{
myData.reset(new vector<vector<Token> >());
string line,w;
myData.reset(new std::vector<std::vector<Token> >());
std::string line,w;
size_t linectr=0;
boost::unordered_map<string,id_type> H;
boost::unordered_map<std::string,id_type> H;
// for (id_type i = 0; i < V.knownVocabSize(); ++i)
// H[V[i]] = i;
while (getline(in,line))
{
// cout << line << endl;
myData->push_back(vector<Token>());
// cout << line << std::endl;
myData->push_back(std::vector<Token>());
if (log && ++linectr%1000000==0)
*log << linectr/1000000 << "M lines of input processed" << endl;
istringstream buf(line);
// cout << line << endl;
*log << linectr/1000000 << "M lines of input processed" << std::endl;
std::istringstream buf(line);
// cout << line << std::endl;
while (buf>>w)
{
myData->back().push_back(Token(V[w]));
// cout << w << " " << myData->back().back().id() << " "
// << V[w] << endl;
// << V[w] << std::endl;
}
// myData->back().resize(myData->back().size(), Token(0));
numToks += myData->back().size();
@ -164,17 +166,17 @@ namespace ugdiss
imTtrack(size_t reserve)
: numToks(0)
{
myData.reset(new vector<vector<Token> >());
myData.reset(new std::vector<std::vector<Token> >());
if (reserve) myData->reserve(reserve);
}
template<typename Token>
imTtrack<Token>::
imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d)
imTtrack(boost::shared_ptr<std::vector<std::vector<Token> > > const& d)
: numToks(0)
{
myData = d;
BOOST_FOREACH(vector<Token> const& v, *d)
BOOST_FOREACH(std::vector<Token> const& v, *d)
numToks += v.size();
}
@ -186,7 +188,7 @@ namespace ugdiss
id_type i;
for (i = 0; i < myData->size(); ++i)
{
vector<Token> const& v = (*myData)[i];
std::vector<Token> const& v = (*myData)[i];
if (v.size() == 0) continue;
if (&v.front() <= t && &v.back() >= t)
break;
@ -197,7 +199,7 @@ namespace ugdiss
/// add a sentence to the database
template<typename TOKEN>
boost::shared_ptr<imTtrack<TOKEN> >
append(boost::shared_ptr<imTtrack<TOKEN> > const& crp, vector<TOKEN> const & snt)
append(boost::shared_ptr<imTtrack<TOKEN> > const& crp, std::vector<TOKEN> const & snt)
{
#if 1
if (crp) crp->m_check_token_count();

View File

@ -11,7 +11,7 @@
#include <boost/unordered_map.hpp>
#include "tpt_pickler.h"
using namespace std;
// using namespace std;
namespace ugdiss
{
@ -20,16 +20,16 @@ namespace ugdiss
LexicalPhraseScorer1
{
typedef boost::unordered_map<id_type, float> inner_map_t;
vector<inner_map_t> L1_given_L2;
vector<inner_map_t> L2_given_L1;
std::vector<inner_map_t> L1_given_L2;
std::vector<inner_map_t> L2_given_L1;
void load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
vector<inner_map_t> & lex);
std::vector<inner_map_t> & lex);
public:
void open(string const& bname, string const& L1, string const& L2,
TokenIndex & V1, TokenIndex & V2);
void score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
vector<ushort> aln, float & fwd_score, float& bwd_score);
std::vector<ushort> aln, float & fwd_score, float& bwd_score);
void score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
char const* const aln_start, char const* const aln_end,
@ -42,10 +42,10 @@ namespace ugdiss
void
LexicalPhraseScorer1<TKN>::
load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
vector<inner_map_t> & lex)
std::vector<inner_map_t> & lex)
{
boost::iostreams::filtering_istream in;
cout << fname << endl;
cout << fname << std::endl;
open_input_stream(fname,in);
lex.resize(V1.ksize());
string w1,w2; float p;
@ -66,8 +66,8 @@ namespace ugdiss
{
string lex1 = bname+L1+"-"+L2+"."+L1+"-given-"+L2+".lex.gz";
string lex2 = bname+L1+"-"+L2+"."+L2+"-given-"+L1+".lex.gz";
cout << lex1 << endl;
cout << lex2 << endl;
cout << lex1 << std::endl;
cout << lex2 << std::endl;
load_lex(lex1,V1,V2,L1_given_L2);
load_lex(lex2,V2,V1,L2_given_L1);
}
@ -79,8 +79,8 @@ namespace ugdiss
TKN const* snt2, size_t const s2, size_t const e2,
vector<ushort> aln, float & fwd_score, float& bwd_score)
{
vector<float> p1(e1,0), p2(e2,0);
vector<int> c1(e1,0), c2(e2,0);
std::vector<float> p1(e1,0), p2(e2,0);
std::vector<int> c1(e1,0), c2(e2,0);
size_t i1=0,i2=0;
for (size_t k = 0; k < aln.size(); ++k)
{
@ -126,8 +126,8 @@ namespace ugdiss
char const* const aln_start, char const* const aln_end,
float & fwd_score, float& bwd_score)
{
vector<float> p1(e1,0), p2(e2,0);
vector<int> c1(e1,0), c2(e2,0);
std::vector<float> p1(e1,0), p2(e2,0);
std::vector<int> c1(e1,0), c2(e2,0);
size_t i1=0,i2=0;
for (char const* x = aln_start; x < aln_end;)
{

View File

@ -14,7 +14,7 @@
#include "tpt_pickler.h"
#include "ug_mm_2d_table.h"
#include "util/exception.hh"
using namespace std;
// using namespace std;
namespace ugdiss
{
@ -22,7 +22,7 @@ namespace ugdiss
class
LexicalPhraseScorer2
{
vector<string> ftag;
std::vector<string> ftag;
public:
typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> table_t;
table_t COOC;
@ -31,7 +31,7 @@ namespace ugdiss
void
score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
vector<someint> const & aln, float const alpha,
std::vector<someint> const & aln, float const alpha,
float & fwd_score, float& bwd_score) const;
void
@ -67,8 +67,8 @@ namespace ugdiss
vector<someint> const & aln, float const alpha,
float & fwd_score, float& bwd_score) const
{
vector<float> p1(e1,0), p2(e2,0);
vector<int> c1(e1,0), c2(e2,0);
std::vector<float> p1(e1,0), p2(e2,0);
std::vector<int> c1(e1,0), c2(e2,0);
size_t i1=0,i2=0;
for (size_t k = 0; k < aln.size(); ++k)
{
@ -113,7 +113,7 @@ namespace ugdiss
cerr << "[" << s << "," << t << "] "
<< COOC.m1(s) << "/"
<< COOC[s][t] << "/"
<< COOC.m2(t) << endl;
<< COOC.m2(t) << std::endl;
#endif
return ret;
}
@ -141,8 +141,8 @@ namespace ugdiss
char const* const aln_start, char const* const aln_end,
float const alpha, float & fwd_score, float& bwd_score) const
{
vector<float> p1(e1,0), p2(e2,0);
vector<int> c1(e1,0), c2(e2,0);
std::vector<float> p1(e1,0), p2(e2,0);
std::vector<int> c1(e1,0), c2(e2,0);
size_t i1=0,i2=0;
for (char const* x = aln_start; x < aln_end;)
{

View File

@ -14,14 +14,14 @@
namespace lru_cache
{
using namespace std;
using namespace boost;
// using namespace std;
// using namespace boost;
template<typename KEY, typename VAL>
class LRU_Cache
{
public:
typedef unordered_map<KEY,uint32_t> map_t;
typedef boost::unordered_map<KEY,uint32_t> map_t;
private:
struct Record
{
@ -33,7 +33,7 @@ namespace lru_cache
mutable boost::shared_mutex m_lock;
uint32_t m_qfront, m_qback;
vector<Record> m_recs;
std::vector<Record> m_recs;
map_t m_idx;
void
@ -84,7 +84,7 @@ namespace lru_cache
set(KEY const& key, sptr<VAL> const& ptr)
{
boost::lock_guard<boost::shared_mutex> lock(m_lock);
pair<typename map_t::iterator,bool> foo;
std::pair<typename map_t::iterator,bool> foo;
foo = m_idx.insert(make_pair(key,m_recs.size()));
uint32_t p = foo.first->second;

View File

@ -13,7 +13,7 @@
namespace bio=boost::iostreams;
namespace ugdiss
{
using namespace std;
// using namespace std;
template<typename OFFSET, typename ID, typename VAL, typename INIT>
class
mm2dTable
@ -71,12 +71,12 @@ namespace ugdiss
}
void open(string fname);
void open(std::string fname);
void close();
Row operator[](ID key) const;
mm2dTable(string const fname="") { if (!fname.empty()) open(fname); };
mm2dTable(std::string const fname="") { if (!fname.empty()) open(fname); };
~mm2dTable() { file.reset(); };
};
@ -110,25 +110,25 @@ namespace ugdiss
template<typename OFFSET, typename ID, typename VAL, typename INIT>
void
mm2dTable<OFFSET,ID,VAL,INIT>::
open(string fname)
open(std::string fname)
{
// cout << "opening " << fname << " at " << __FILE__ << ":" << __LINE__ << endl;
// cout << "opening " << fname << " at " << __FILE__ << ":" << __LINE__ << std::endl;
if (access(fname.c_str(),R_OK))
{
ostringstream msg;
std::ostringstream msg;
msg << "[" << __FILE__ << ":" << __LINE__ <<"] FATAL ERROR: "
<< "file '" << fname << " is not accessible." << endl;
string foo = msg.str();
<< "file '" << fname << " is not accessible." << std::endl;
std::string foo = msg.str();
UTIL_THROW(util::Exception,foo.c_str());
}
file.reset(new bio::mapped_file_source());
file->open(fname);
if (!file->is_open())
{
ostringstream msg;
std::ostringstream msg;
msg << "[" << __FILE__ << ":" << __LINE__ <<"] FATAL ERROR: "
<< "Opening file '" << fname << "' failed." << endl;
string foo = msg.str();
<< "Opening file '" << fname << "' failed." << std::endl;
std::string foo = msg.str();
UTIL_THROW(util::Exception,foo.c_str());
}
char const* p = file->data();
@ -137,15 +137,15 @@ namespace ugdiss
numRows = *reinterpret_cast<ID const*>(p); p += sizeof(id_type);
numCols = *reinterpret_cast<ID const*>(p); p += sizeof(id_type);
data = reinterpret_cast<Cell const*>(p);
// cout << numRows << " rows; " << numCols << " columns " << endl;
// cout << numRows << " rows; " << numCols << " columns " << std::endl;
M1 = reinterpret_cast<VAL const*>(index+numRows+1);
M2 = M1+numRows;
// cout << "Table " << fname << " has " << numRows << " rows and "
// << numCols << " columns." << endl;
// << numCols << " columns." << std::endl;
// cout << "File size is " << file.size()*1024 << " bytes; ";
// cout << "M2 starts " << (reinterpret_cast<char const*>(M2) - file.data())
// << " bytes into the file" << endl;
// cout << M2[0] << endl;
// << " bytes into the file" << std::endl;
// cout << M2[0] << std::endl;
}
template<
@ -156,15 +156,15 @@ namespace ugdiss
typename ICONT // inner container type
>
void
write_mm_2d_table(ostream& out, vector<ICONT> const& T,
vector<VAL> const* m1 = NULL,
vector<VAL> const* m2 = NULL)
write_mm_2d_table(std::ostream& out, std::vector<ICONT> const& T,
std::vector<VAL> const* m1 = NULL,
std::vector<VAL> const* m2 = NULL)
{
assert(T.size());
typedef typename ICONT::const_iterator iter;
// compute marginals if necessary
vector<VAL> m1x,m2x;
std::vector<VAL> m1x,m2x;
if (!m1)
{
m1x.resize(T.size(),INIT(0));
@ -191,7 +191,7 @@ namespace ugdiss
numwrite(out,id_type(m2->size())); // number of columns
// write actual table
vector<OFFSET> index;
std::vector<OFFSET> index;
size_t ctr =0;
index.reserve(m1->size()+1);
for (ID r = 0; r < ID(T.size()); ++r)

View File

@ -35,7 +35,7 @@ namespace Moses
// in the future, we might also allow listing documents with
// sentence ranges.
string buffer,docname; size_t a=0,b;
this->m_sid2docid.reset(new vector<id_type>(this->T1->size()));
this->m_sid2docid.reset(new std::vector<id_type>(this->T1->size()));
while(getline(docmap,buffer))
{
istringstream line(buffer);
@ -46,7 +46,7 @@ namespace Moses
this->m_docname.push_back(docname);
line >> b;
#ifndef NO_MOSES
VERBOSE(1, "DOCUMENT MAP " << docname << " " << a << "-" << b+a << endl);
VERBOSE(1, "DOCUMENT MAP " << docname << " " << a << "-" << b+a << std::endl);
#endif
for (b += a; a < b; ++a)
(*this->m_sid2docid)[a] = docid;

View File

@ -19,7 +19,7 @@
namespace ugdiss
{
using namespace std;
// using namespace std;
namespace bio=boost::iostreams;
template<typename TOKEN>
@ -43,8 +43,8 @@ namespace ugdiss
public:
mmTSA();
mmTSA(string fname, Ttrack<TOKEN> const* c);
void open(string fname, typename boost::shared_ptr<Ttrack<TOKEN> const> c);
mmTSA(std::string fname, Ttrack<TOKEN> const* c);
void open(std::string fname, typename boost::shared_ptr<Ttrack<TOKEN> const> c);
count_type
sntCnt(char const* p, char const * const q) const;
@ -109,7 +109,7 @@ namespace ugdiss
template<typename TOKEN>
mmTSA<TOKEN>::
mmTSA(string fname, Ttrack<TOKEN> const* c)
mmTSA(std::string fname, Ttrack<TOKEN> const* c)
{
open(fname,c);
}
@ -119,12 +119,12 @@ namespace ugdiss
template<typename TOKEN>
void
mmTSA<TOKEN>::
open(string fname, typename boost::shared_ptr<Ttrack<TOKEN> const> c)
open(std::string fname, typename boost::shared_ptr<Ttrack<TOKEN> const> c)
{
this->bsc.reset(new BitSetCache<TSA<TOKEN> >(this));
if (access(fname.c_str(),F_OK))
{
ostringstream msg;
std::ostringstream msg;
msg << "mmTSA<>::open: File '" << fname << "' does not exist.";
throw std::runtime_error(msg.str().c_str());
}
@ -137,7 +137,7 @@ namespace ugdiss
p = numread(p,idxOffset);
p = numread(p,this->indexSize);
// cerr << fname << ": " << idxOffset << " " << this->indexSize << endl;
// cerr << fname << ": " << idxOffset << " " << this->indexSize << std::endl;
this->startArray = p;
this->index = reinterpret_cast<filepos_type const*>(file.data()+idxOffset);
@ -243,7 +243,7 @@ namespace ugdiss
{
raw = 0;
id_type sid; uint16_t off;
boost::dynamic_bitset<typename ::uint64_t> check(this->corpus->size());
boost::dynamic_bitset<uint64_t> check(this->corpus->size());
while (p < q)
{
p = tightread(p,q,sid);

View File

@ -24,7 +24,7 @@
namespace ugdiss
{
using namespace std;
// using namespace std;
namespace bio=boost::iostreams;
template<typename TKN=id_type>
@ -42,7 +42,7 @@ namespace ugdiss
* of more than four billion words)
*/
public:
mmTtrack(string fname);
mmTtrack(std::string fname);
mmTtrack();
// return pointer to beginning of sentence
@ -58,20 +58,20 @@ namespace ugdiss
size_t numTokens() const;
// open an mmTtrack file
void open(string fname);
void open(std::string fname);
// FUNCTIONS FOR BUILDING CORPUS TRACKS
// write a blank file header at the beginning of a new ttrack file
void write_blank_file_header(ostream& out) const;
void write_blank_file_header(std::ostream& out) const;
// write the sentence index /idx/ and fill the file header
void write_index_and_finalize(ostream& out,
vector<id_type> const& idx,
void write_index_and_finalize(std::ostream& out,
std::vector<id_type> const& idx,
count_type tokenCount) const;
// copy a contiguous sequence of sentences to another stream
// return the number of tokens copied
id_type copySentences(ostream& trg, id_type start, id_type stop) const;
id_type copySentences(std::ostream& trg, id_type start, id_type stop) const;
/** find the sentence id of a given token */
id_type findSid(TKN const* t) const;
@ -79,7 +79,7 @@ namespace ugdiss
id_type findSid(id_type tokenOffset) const;
/// re-assign ids based on the id maps in /f/
void remap(string const fname, vector<id_type const*> const & f) const;
void remap(std::string const fname, std::vector<id_type const*> const & f) const;
};
@ -87,7 +87,7 @@ namespace ugdiss
template<typename TKN>
void
mmTtrack<TKN>::
remap(string const fname, vector<id_type const*> const & f) const
remap(std::string const fname, std::vector<id_type const*> const & f) const
{
bio::mapped_file myfile(fname);
assert(myfile.is_open());
@ -128,8 +128,9 @@ namespace ugdiss
{
if (sid >= this->numSent)
{
cerr << "Fatal error: requested sentence #"<<sid<<" is beyond corpus size ("
<< this->numSent <<")" << endl;
std::cerr << "Fatal error: requested sentence #"
<< sid <<" is beyond corpus size ("
<< this->numSent <<")" << std::endl;
}
assert(sid < this->numSent);
return data+index[sid];
@ -155,7 +156,7 @@ namespace ugdiss
template<typename TKN>
mmTtrack<TKN>::
mmTtrack(string fname)
mmTtrack(std::string fname)
{
open(fname);
}
@ -163,18 +164,18 @@ namespace ugdiss
template<typename TKN>
void
mmTtrack<TKN>::
open(string fname)
open(std::string fname)
{
if (access(fname.c_str(),F_OK))
{
ostringstream msg;
std::ostringstream msg;
msg << "mmTtrack<>::open: File '" << fname << "' does not exist.";
throw std::runtime_error(msg.str().c_str());
}
file.open(fname);
if (!file.is_open())
{
cerr << "Error opening file " << fname << endl;
std::cerr << "Error opening file " << fname << std::endl;
assert(0);
}
filepos_type idxOffset;
@ -210,7 +211,7 @@ namespace ugdiss
template<typename TKN>
void
mmTtrack<TKN>::
write_blank_file_header(ostream& out) const
write_blank_file_header(std::ostream& out) const
{
numwrite(out,filepos_type(0)); // place holder for index start
numwrite(out,id_type(0)); // place holder for index size
@ -220,8 +221,8 @@ namespace ugdiss
template<typename TKN>
void
mmTtrack<TKN>::
write_index_and_finalize(ostream& out,
vector<id_type>const& idx,
write_index_and_finalize(std::ostream& out,
std::vector<id_type>const& idx,
id_type tokenCount) const
{
id_type idxSize = idx.size();
@ -237,7 +238,7 @@ namespace ugdiss
template<typename TKN>
id_type
mmTtrack<TKN>::
copySentences(ostream& trg, id_type start, id_type stop) const
copySentences(std::ostream& trg, id_type start, id_type stop) const
{
assert(stop > start);
TKN const* a = sntStart(start);

View File

@ -31,8 +31,8 @@
#include "ug_corpus_token.h"
#include "tpt_pickler.h"
using namespace ugdiss;
using namespace std;
// using namespace ugdiss;
// using namespace std;
namespace Moses {
typedef L2R_Token<SimpleWordId> Token;
@ -43,7 +43,7 @@ namespace Moses {
public:
typedef mmTSA<Token>::tree_iterator iter;
class pstats; // one-sided phrase statistics
class jstats; // phrase pair ("joint") statistics
class jstats; // phrase std::pair ("joint") statistics
class agenda
{
boost::mutex lock;
@ -51,7 +51,7 @@ namespace Moses {
class job;
class worker;
list<job> joblist;
vector<sptr<boost::thread> > workers;
std::vector<sptr<boost::thread> > workers;
bool shutdown;
size_t doomed;
public:
@ -83,7 +83,7 @@ namespace Moses {
find_trg_phr_bounds
(size_t const sid, size_t const start, size_t const stop,
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
vector<uchar> * core_alignment, bool const flip) const;
std::vector<uchar> * core_alignment, bool const flip) const;
boost::unordered_map<uint64_t,sptr<pstats> > cache1,cache2;
private:
@ -99,22 +99,22 @@ namespace Moses {
void prep(iter const& phrase);
};
// "joint" (i.e., phrase pair) statistics
// "joint" (i.e., phrase std::pair) statistics
class
mmbitext::
jstats
{
uint32_t my_rcnt; // unweighted count
float my_wcnt; // weighted count
vector<pair<size_t, vector<uchar> > > my_aln;
std::vector<pair<size_t, vector<uchar> > > my_aln;
boost::mutex lock;
public:
jstats();
jstats(jstats const& other);
uint32_t rcnt() const;
float wcnt() const;
vector<pair<size_t, vector<uchar> > > const & aln() const;
void add(float w, vector<uchar> const& a);
std::vector<pair<size_t, vector<uchar> > > const & aln() const;
void add(float w, std::vector<uchar> const& a);
};
// struct
@ -151,11 +151,11 @@ namespace Moses {
size_t in_progress; // keeps track of how many threads are currently working on this
boost::unordered_map<uint64_t, jstats> trg;
pstats();
// vector<phrase> nbest;
// std::vector<phrase> nbest;
// void select_nbest(size_t const N=10);
void release();
void register_worker();
void add(mmbitext::iter const& trg_phrase, float const w, vector<uchar> const& a);
void add(mmbitext::iter const& trg_phrase, float const w, std::vector<uchar> const& a);
};
class

View File

@ -12,6 +12,9 @@ namespace Moses
{
namespace bitext
{
using ugdiss::TokenIndex;
template<typename Token>
class
PhrasePair
@ -27,7 +30,7 @@ namespace Moses
std::vector<float> fvals;
float dfwd[Moses::LRModel::NONE+1]; // distortion counts // counts or probs?
float dbwd[Moses::LRModel::NONE+1]; // distortion counts
std::vector<uchar> aln;
std::vector<unsigned char> aln;
float score;
bool inverse;
// std::vector<uint32_t> indoc;
@ -54,10 +57,10 @@ namespace Moses
void
fill_lr_vec(LRModel::Direction const& dir,
LRModel::ModelType const& mdl,
vector<float>& v) const;
std::vector<float>& v) const;
#ifndef NO_MOSES
void
print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
print(std::ostream& out, TokenIndex const& V1, TokenIndex const& V2,
LRModel const& LR) const;
#endif
@ -271,7 +274,7 @@ namespace Moses
PhrasePair<Token>
::fill_lr_vec(LRModel::Direction const& dir,
LRModel::ModelType const& mdl,
vector<float>& v) const
std::vector<float>& v) const
{
// how many distinct scores do we have?
size_t num_scores = (mdl == LRModel::MSLR ? 4 : mdl == LRModel::MSD ? 3 : 2);
@ -301,7 +304,7 @@ namespace Moses
template<typename Token>
void
PhrasePair<Token>
::print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
::print(std::ostream& out, TokenIndex const& V1, TokenIndex const& V2,
LRModel const& LR) const
{
out << toString (V1, this->start1, this->len1) << " ::: "
@ -315,14 +318,14 @@ namespace Moses
out << m->first << ":" << m->second;
}
out << "] [";
vector<float> lrscores;
std::vector<float> lrscores;
this->fill_lr_vec(LR.GetDirection(), LR.GetModelType(), lrscores);
for (size_t i = 0; i < lrscores.size(); ++i)
{
if (i) out << " ";
out << boost::format("%.2f") % exp(lrscores[i]);
}
out << "]" << endl;
out << "]" << std::endl;
#if 0
for (int i = 0; i <= Moses::LRModel::NONE; i++)
{

View File

@ -2,7 +2,7 @@
#pragma once
#include <map>
#include<vector>
#include <vector>
#include <string>
#include <iostream>
#include "moses/Util.h"

View File

@ -21,8 +21,8 @@
namespace ugdiss
{
using namespace std;
using namespace boost;
// using namespace std;
// using namespace boost;
namespace bio=boost::iostreams;
template<typename TKN>
@ -56,7 +56,7 @@ namespace ugdiss
typedef boost::shared_ptr<bitvector> bitset_pointer;
typedef TKN Token;
typedef BitSetCache<TSA<TKN> > BSC_t;
/* to allow caching of bit vectors that are expensive to create on
/* to allow caching of bit std::vectors that are expensive to create on
* the fly */
friend class TSA_tree_iterator<TKN>;
@ -148,8 +148,8 @@ namespace ugdiss
* [keyStart,keyStop)
*/
char const*
lower_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const;
lower_bound(typename std::vector<TKN>::const_iterator const& keyStart,
typename std::vector<TKN>::const_iterator const& keyStop) const;
char const*
lower_bound(TKN const* keyStart, TKN const* keyStop) const;
@ -160,29 +160,29 @@ namespace ugdiss
* [keyStart,keyStop)
*/
char const*
upper_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const;
upper_bound(typename std::vector<TKN>::const_iterator const& keyStart,
typename std::vector<TKN>::const_iterator const& keyStop) const;
char const*
upper_bound(TKN const* keyStart, int keyLength) const;
/** dump all suffixes in order to /out/ */
void dump(ostream& out, TokenIndex const& T) const;
void dump(std::ostream& out, TokenIndex const& T) const;
/** fill the dynamic bit set with true for all sentences that contain
* /phrase/.
* @return the raw number of occurrences.
*/
count_type
fillBitSet(vector<TKN> const& phrase, bdBitset& dest) const;
fillBitSet(std::vector<TKN> const& phrase, bdBitset& dest) const;
count_type
fillBitSet(TKN const* key, size_t keyLen, bdBitset& dest) const;
count_type
setBits(char const* startRange, char const* endRange,
boost::dynamic_bitset<typename ::uint64_t>& bs) const;
boost::dynamic_bitset<uint64_t>& bs) const;
void
setTokenBits(char const* startRange, char const* endRange, size_t len,
@ -246,11 +246,11 @@ namespace ugdiss
getCounts(char const* p, char const* const q,
count_type& sids, count_type& raw) const = 0;
string
std::string
suffixAt(char const* p, TokenIndex const* V=NULL, size_t maxlen=0)
const;
string
std::string
suffixAt(ArrayEntry const& I, TokenIndex const* V=NULL, size_t maxlen=0)
const;
@ -269,18 +269,18 @@ namespace ugdiss
next 16 bits: length of the phrase
*/
::uint64_t
getSequenceId(typename vector<TKN>::const_iterator const& pstart,
typename vector<TKN>::const_iterator const& pstop) const;
getSequenceId(typename std::vector<TKN>::const_iterator const& pstart,
typename std::vector<TKN>::const_iterator const& pstop) const;
::uint64_t
getSequenceId(TKN const* t, ushort plen) const;
/** Return the phrase represented by phrase ID pid_ */
string
std::string
getSequence(::uint64_t pid, TokenIndex const& V) const;
/** Return the phrase represented by phrase ID pid_ */
vector<TKN>
std::vector<TKN>
getSequence(::uint64_t pid) const;
TKN const*
@ -308,7 +308,7 @@ namespace ugdiss
bool
findBranches(TKN const* base, bitvector const& terminals,
vector<tree_iterator>& dest) const;
std::vector<tree_iterator>& dest) const;
double aveIndexEntrySize() const
{
@ -356,7 +356,7 @@ namespace ugdiss
template<typename TKN>
count_type
TSA<TKN>::
fillBitSet(vector<TKN> const& key,
fillBitSet(std::vector<TKN> const& key,
bitvector& bitset) const
{
if (!key.size()) return 0;
@ -555,8 +555,8 @@ namespace ugdiss
template<typename TKN>
char const*
TSA<TKN>::
lower_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const
lower_bound(typename std::vector<TKN>::const_iterator const& keyStart,
typename std::vector<TKN>::const_iterator const& keyStop) const
{
TKN const* const a = &(*keyStart);
TKN const* const z = &(*keyStop);
@ -597,8 +597,8 @@ namespace ugdiss
template<typename TKN>
char const*
TSA<TKN>::
upper_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const
upper_bound(typename std::vector<TKN>::const_iterator const& keyStart,
typename std::vector<TKN>::const_iterator const& keyStop) const
{
TKN const* const a = &((TKN)*keyStart);
TKN const* const z = &((TKN)*keyStop);
@ -631,7 +631,7 @@ namespace ugdiss
{
char const* lo = lower_bound(keyStart,keyLen);
char const* up = upper_bound(keyStart,keyLen);
// cerr << up-lo << endl;
// cerr << up-lo << std::endl;
return rawCnt(lo,up);
}
@ -640,8 +640,8 @@ namespace ugdiss
template<typename TKN>
::uint64_t
TSA<TKN>::
getSequenceId(typename vector<TKN>::const_iterator const& pstart,
typename vector<TKN>::const_iterator const& pstop) const
getSequenceId(typename std::vector<TKN>::const_iterator const& pstart,
typename std::vector<TKN>::const_iterator const& pstop) const
{
return getSequenceId(&(*pstart),pstop-pstart);
}
@ -668,14 +668,14 @@ namespace ugdiss
//---------------------------------------------------------------------------
template<typename TKN>
vector<TKN>
std::vector<TKN>
TSA<TKN>::
getSequence(::uint64_t pid) const
{
size_t plen = pid % 65536;
size_t offset = (pid >> 16) % 65536;
TKN const* w = corpus->sntStart(pid >> 32)+offset;
vector<TKN> ret(plen);
std::vector<TKN> ret(plen);
for (size_t i = 0; i < plen; i++, w = w->next())
{
assert(w);
@ -685,11 +685,11 @@ namespace ugdiss
}
template<typename TKN>
string
std::string
TSA<TKN>::
getSequence(::uint64_t pid, TokenIndex const& V) const
{
ostringstream buf;
std::ostringstream buf;
TKN const* a = getSequenceStart(pid);
buf << V[a->id()];
size_t len = getSequenceLength(pid);
@ -806,7 +806,7 @@ namespace ugdiss
bool
TSA<TKN>::
findBranches(TKN const* base, bitvector const& terminals,
vector<tree_iterator>& dest) const
std::vector<tree_iterator>& dest) const
{
dest.assign(terminals.count(),tree_iterator(this));
for (size_t i = terminals.find_first(), k = 0;

View File

@ -9,24 +9,24 @@
#include <boost/dynamic_bitset.hpp>
#include <stdint.h>
#include <iostream>
// A simple mechanism for caching bit vectors representing occurrences of token
// A simple mechanism for caching bit std::vectors representing occurrences of token
// sequences in a corpus. Useful for very frequent items for which the bit
// vector is expensive to create on the fly. The variable threshold determines
// when bit vectors are cached and when they are created on the fly, using the
// std::vector is expensive to create on the fly. The variable threshold determines
// when bit std::vectors are cached and when they are created on the fly, using the
// size of the range of entries in the TSA's index in bytes to determine
// whether or not to store the respective bit vector in the cache.
// whether or not to store the respective bit std::vector in the cache.
namespace ugdiss
{
using namespace std;
// using namespace std;
template<typename TSA>
class
BitSetCache
{
public:
typedef boost::dynamic_bitset<typename ::uint64_t> BitSet;
typedef boost::dynamic_bitset<uint64_t> BitSet;
typedef boost::shared_ptr<BitSet> bsptr;
typedef map<pair<char const*,ushort>,bsptr> myMap;
typedef std::map<std::pair<char const*,ushort>,bsptr> myMap;
typedef myMap::iterator myMapIter;
private:
TSA const* tsa;
@ -56,7 +56,7 @@ namespace ugdiss
if (!lo) return ret;
if (up-lo > threshold)
{
pair<char const*,ushort> k(lo,keyLen);
std::pair<char const*,ushort> k(lo,keyLen);
myMapIter m = cached1.find(k);
if (m != cached1.end())
ret = m->second;
@ -83,9 +83,9 @@ namespace ugdiss
if (!lo) return ret;
if (up-lo > threshold)
{
pair<char const*,ushort> k(lo,keyLen);
std::pair<char const*,ushort> k(lo,keyLen);
// cout << "bla " << keyStart->id() << " "
// << cached2.size() << " " << up-lo << " " << k.second << endl;
// << cached2.size() << " " << up-lo << " " << k.second << std::endl;
myMapIter m = cached2.find(k);
if (m != cached2.end())
ret = m->second;

View File

@ -21,11 +21,11 @@ namespace ugdiss
#define _DISPLAY_CHAIN
// for debugging only
template<typename T>
void display(T const* x, string label)
void display(T const* x, std::string label)
{
cout << label << ":";
for (;x;x=next(x)) cout << " " << x->lemma;
cout << endl;
std::cout << label << ":";
for (;x;x=next(x)) std::cout << " " << x->lemma;
std::cout << std::endl;
}
#endif
@ -47,11 +47,11 @@ namespace ugdiss
TSA_tree_iterator
{
protected:
vector<char const*> lower;
vector<char const*> upper;
std::vector<char const*> lower;
std::vector<char const*> upper;
// for debugging ...
void showBounds(ostream& out) const;
void showBounds(std::ostream& out) const;
public:
typedef TKN Token;
@ -76,7 +76,7 @@ namespace ugdiss
bool full_match_only=true);
TSA_tree_iterator(TSA<Token> const* s,
TokenIndex const& V,
string const& key);
std::string const& key);
char const* lower_bound(int p) const;
char const* upper_bound(int p) const;
@ -96,7 +96,7 @@ namespace ugdiss
virtual bool over();
virtual bool up();
string str(TokenIndex const* V=NULL, int start=0, int stop=0) const;
std::string str(TokenIndex const* V=NULL, int start=0, int stop=0) const;
// checks if the sentence [start,stop) contains the given sequence.
bool match(Token const* start, Token const* stop) const;
@ -105,23 +105,23 @@ namespace ugdiss
// fillBitSet: deprecated; use markSentences() instead
count_type
fillBitSet(boost::dynamic_bitset<typename ::uint64_t>& bitset) const;
fillBitSet(boost::dynamic_bitset<uint64_t>& bitset) const;
count_type
markEndOfSequence(Token const* start, Token const* stop,
boost::dynamic_bitset<typename ::uint64_t>& dest) const;
boost::dynamic_bitset<uint64_t>& dest) const;
count_type
markSequence(Token const* start, Token const* stop, bitvector& dest) const;
count_type
markSentences(boost::dynamic_bitset<typename ::uint64_t>& bitset) const;
markSentences(boost::dynamic_bitset<uint64_t>& bitset) const;
count_type
markOccurrences(boost::dynamic_bitset<typename ::uint64_t>& bitset,
markOccurrences(boost::dynamic_bitset<uint64_t>& bitset,
bool markOnlyStartPosition=false) const;
count_type
markOccurrences(vector<ushort>& dest) const;
markOccurrences(std::vector<ushort>& dest) const;
::uint64_t
getSequenceId() const;
@ -181,7 +181,7 @@ namespace ugdiss
return this->size();
}
sptr<vector<typename ttrack::Position> >
sptr<std::vector<typename ttrack::Position> >
randomSample(int level, size_t N) const;
};
@ -286,7 +286,7 @@ namespace ugdiss
// display(root->corpus->getToken(U),"U1");
int x = root->corpus->cmp(U,L,lower.size()-1);
// cerr << "x=" << x << endl;
// cerr << "x=" << x << std::endl;
if (x != 1)
return false;
lower.back() = upper.back();
@ -359,10 +359,10 @@ namespace ugdiss
TSA_tree_iterator<Token>::
TSA_tree_iterator(TSA<Token> const* s,
TokenIndex const& V,
string const& key)
std::string const& key)
: root(s)
{
istringstream buf(key); string w;
std::istringstream buf(key); std::string w;
while (buf >> w)
{
if (this->extend(V[w]))
@ -482,8 +482,8 @@ namespace ugdiss
#if 0
tsa::ArrayEntry I;
root->readEntry(lo,I);
cout << I.sid << " " << I.offset << endl;
cout << root->corpus->sntLen(I.sid) << endl;
cout << I.sid << " " << I.offset << std::endl;
cout << root->corpus->sntLen(I.sid) << std::endl;
#endif
hi = root->find_end(lo, hi, getToken(0), 1, 0);
upper.push_back(hi);
@ -574,11 +574,11 @@ namespace ugdiss
Token const* eos = root->corpus->sntEnd(A.sid);
#endif
if (p < 0) p += lower.size();
// cerr << p << ". " << t->id() << endl;
// cerr << p << ". " << t->id() << std::endl;
while (p-- > 0)
{
t = next(t);
// if (t) cerr << p << ". " << t->id() << endl;
// if (t) cerr << p << ". " << t->id() << std::endl;
assert(t >= bos && t < eos);
}
return t;
@ -616,7 +616,7 @@ namespace ugdiss
template<typename Token>
count_type
TSA_tree_iterator<Token>::
fillBitSet(boost::dynamic_bitset<typename ::uint64_t>& bitset) const
fillBitSet(boost::dynamic_bitset<uint64_t>& bitset) const
{
return markSentences(bitset);
}
@ -626,7 +626,7 @@ namespace ugdiss
template<typename Token>
count_type
TSA_tree_iterator<Token>::
markSentences(boost::dynamic_bitset<typename ::uint64_t>& bitset) const
markSentences(boost::dynamic_bitset<uint64_t>& bitset) const
{
assert(root && root->corpus);
bitset.resize(root->corpus->size());
@ -653,7 +653,7 @@ namespace ugdiss
template<typename Token>
count_type
TSA_tree_iterator<Token>::
markOccurrences(boost::dynamic_bitset<typename ::uint64_t>& bitset, bool markOnlyStartPosition) const
markOccurrences(boost::dynamic_bitset<uint64_t>& bitset, bool markOnlyStartPosition) const
{
assert(root && root->corpus);
if (bitset.size() != root->corpus->numTokens())
@ -669,7 +669,7 @@ namespace ugdiss
template<typename Token>
count_type
TSA_tree_iterator<Token>::
markOccurrences(vector<ushort>& dest) const
markOccurrences(std::vector<ushort>& dest) const
{
assert(root && root->corpus);
assert(dest.size() == root->corpus->numTokens());
@ -700,7 +700,7 @@ namespace ugdiss
count_type
TSA_tree_iterator<Token>::
markEndOfSequence(Token const* start, Token const* stop,
boost::dynamic_bitset<typename ::uint64_t>& dest) const
boost::dynamic_bitset<uint64_t>& dest) const
{
count_type matchCount=0;
Token const* a = getToken(0);
@ -769,7 +769,7 @@ namespace ugdiss
}
template<typename Token>
string
std::string
TSA_tree_iterator<Token>::
str(TokenIndex const* V, int start, int stop) const
{
@ -779,7 +779,7 @@ namespace ugdiss
assert(start>=0 && start < int(this->size()));
assert(stop > 0 && stop <= int(this->size()));
Token const* x = this->getToken(0);
ostringstream buf;
std::ostringstream buf;
for (int i = start; i < stop; ++i, x = x->next())
{
assert(x);
@ -802,7 +802,7 @@ namespace ugdiss
assert(start>=0 && start < int(this->size()));
assert(stop > 0 && stop <= int(this->size()));
Token const* x = this->getToken(0);
ostringstream buf;
std::ostringstream buf;
for (int i = start; i < stop; ++i, x = x->next())
{
assert(x);
@ -899,15 +899,15 @@ namespace ugdiss
/// randomly select up to N occurrences of the sequence
template<typename Token>
sptr<vector<typename ttrack::Position> >
sptr<std::vector<typename ttrack::Position> >
TSA_tree_iterator<Token>::
randomSample(int level, size_t N) const
{
if (level < 0) level += lower.size();
assert(level >=0);
sptr<vector<typename ttrack::Position> >
ret(new vector<typename ttrack::Position>(N));
sptr<std::vector<typename ttrack::Position> >
ret(new std::vector<typename ttrack::Position>(N));
size_t m=0; // number of samples selected so far
typename Token::ArrayEntry I(lower.at(level));

View File

@ -22,7 +22,7 @@
namespace ugdiss
{
using namespace std;
// using namespace std;
typedef boost::dynamic_bitset<uint64_t> bdBitset;
@ -39,12 +39,12 @@ namespace ugdiss
}
template<typename Token>
string
std::string
toString(TokenIndex const& V, Token const* x, size_t const len)
{
if (!len) return "";
UTIL_THROW_IF2(!x, HERE << ": Unexpected end of phrase!");
ostringstream buf;
std::ostringstream buf;
buf << V[x->id()];
size_t i = 1;
for (x = x->next(); x && i < len; ++i, x = x->next())
@ -100,10 +100,10 @@ namespace ugdiss
endPos(id_type sid) const { return sntEnd(sid)-sntStart(0); }
/** Don't use this unless you want a copy of the sentence */
vector<TKN>
std::vector<TKN>
operator[](id_type sid) const
{
return vector<TKN>(sntStart(sid),sntEnd(sid));
return std::vector<TKN>(sntStart(sid),sntEnd(sid));
}
/** @return size of corpus in number of sentences */
@ -114,9 +114,9 @@ namespace ugdiss
/** @return string representation of sentence /sid/
* Currently only defined for Ttrack<id_type> */
string str(id_type sid, TokenIndex const& T) const;
std::string str(id_type sid, TokenIndex const& T) const;
string pid2str(TokenIndex const* V, uint64_t pid) const;
std::string pid2str(TokenIndex const* V, uint64_t pid) const;
// /** @return string representation of sentence /sid/
// * Currently only defined for Ttrack<id_type> */
@ -124,8 +124,8 @@ namespace ugdiss
/** counts the tokens in the corpus; used for example in the construction of
* token sequence arrays */
count_type count_tokens(vector<count_type>& cnt, bdBitset const* filter,
int lengthCutoff=0, ostream* log=NULL) const;
count_type count_tokens(std::vector<count_type>& cnt, bdBitset const* filter,
int lengthCutoff=0, std::ostream* log=NULL) const;
// static id_type toID(TKN const& t);
@ -171,8 +171,8 @@ namespace ugdiss
template<typename TKN>
count_type
Ttrack<TKN>::
count_tokens(vector<count_type>& cnt, bdBitset const* filter,
int lengthCutoff, ostream* log) const
count_tokens(std::vector<count_type>& cnt, bdBitset const* filter,
int lengthCutoff, std::ostream* log) const
{
bdBitset filter2;
if (!filter)
@ -199,7 +199,7 @@ namespace ugdiss
{
if (log)
*log << "WARNING: skipping sentence #" << sid
<< " with more than 65536 tokens" << endl;
<< " with more than 65536 tokens" << std::endl;
expectedTotal -= stop-k;
}
else
@ -207,7 +207,7 @@ namespace ugdiss
totalCount += stop-k;
for (; k < stop; ++k)
{
// cout << sid << " " << stop-k << " " << k->lemma << " " << k->id() << " " << sizeof(*k) << endl;
// cout << sid << " " << stop-k << " " << k->lemma << " " << k->id() << " " << sizeof(*k) << std::endl;
id_type wid = k->id();
while (wid >= cnt.size()) cnt.push_back(0);
cnt[wid]++;
@ -217,8 +217,8 @@ namespace ugdiss
if (this->size() == filter->count())
{
if (totalCount != expectedTotal)
cerr << "OOPS: expected " << expectedTotal
<< " tokens but counted " << totalCount << endl;
std::cerr << "OOPS: expected " << expectedTotal
<< " tokens but counted " << totalCount << std::endl;
assert(totalCount == expectedTotal);
}
return totalCount;
@ -244,25 +244,25 @@ namespace ugdiss
int ret=-1;
#if 0
cerr << "A: "; for (TKN const* x = a; x; x = next(x)) cerr << x->lemma << " "; cerr << endl;
cerr << "B: "; for (TKN const* x = b; x; x = next(x)) cerr << x->lemma << " "; cerr << endl;
cerr << "A: "; for (TKN const* x = a; x; x = next(x)) cerr << x->lemma << " "; cerr << std::endl;
cerr << "B: "; for (TKN const* x = b; x; x = next(x)) cerr << x->lemma << " "; cerr << std::endl;
#endif
while (a >= bosA && a < eosA)
{
// cerr << keyLength << "a. " << (a ? a->lemma : 0) << " " << (b ? b->lemma : 0) << endl;
// cerr << keyLength << "a. " << (a ? a->lemma : 0) << " " << (b ? b->lemma : 0) << std::endl;
if (*a < *b) { break; } // return -1;
if (*a > *b) { ret = 2; break; } // return 2;
a = next(a);
b = next(b);
// cerr << keyLength << "b. " << (a ? a->lemma : 0) << " " << (b ? b->lemma : 0) << endl;
// cerr << keyLength << "b. " << (a ? a->lemma : 0) << " " << (b ? b->lemma : 0) << std::endl;
if (--keyLength==0 || b < bosB || b >= eosB)
{
ret = (a < bosA || a >= eosA) ? 0 : 1;
break;
}
}
// cerr << "RETURNING " << ret << endl;
// cerr << "RETURNING " << ret << std::endl;
return ret;
}
@ -312,7 +312,7 @@ namespace ugdiss
{
cout << t2->lemma << "." << int(t2->minpos) << " "
<< k->lemma << "." << int(k->minpos) << " "
<< t2->cmp(*k) << endl;
<< t2->cmp(*k) << std::endl;
}
}
#endif
@ -382,7 +382,7 @@ namespace ugdiss
}
template<typename TKN>
string
std::string
Ttrack<TKN>::
pid2str(TokenIndex const* V, uint64_t pid) const
{
@ -390,7 +390,7 @@ namespace ugdiss
pid >>= 16;
uint32_t off = pid % (1<<16);
uint32_t sid = pid>>16;
ostringstream buf;
std::ostringstream buf;
TKN const* t = sntStart(sid) + off;
TKN const* stop = t + len;
if (V)

View File

@ -57,13 +57,13 @@ namespace ugdiss
cout << "A: " << z->id();
for (z = next(z); z >= bosA && z < eosA; z = next(z))
cout << "-" << z->id();
cout << endl;
cout << std::endl;
z = b;
cout << "B: " << z->id();
for (z = next(z); z >= bosB && z < eosB; z = next(z))
cout << "-" << z->id();
cout << endl;
cout << std::endl;
#endif
while (*a == *b)
{
@ -76,7 +76,7 @@ namespace ugdiss
}
int x = a->cmp(*b);
// cout << " " << (x < 0 ? "YES" : "NO") << endl;
// cout << " " << (x < 0 ? "YES" : "NO") << std::endl;
assert (x != 0);
return x < 0;

View File

@ -10,24 +10,24 @@
#include "tpt_typedefs.h"
namespace ugdiss
{
using namespace std;
// using namespace std;
typedef boost::dynamic_bitset<uint64_t> bitvector;
typedef vector<vector<float> > flt_2d_table;
typedef vector<flt_2d_table> flt_3d_table;
typedef vector<flt_3d_table> flt_4d_table;
typedef std::vector<std::vector<float> > flt_2d_table;
typedef std::vector<flt_2d_table> flt_3d_table;
typedef std::vector<flt_3d_table> flt_4d_table;
typedef vector<vector<ushort> > ushort_2d_table;
typedef vector<ushort_2d_table> ushort_3d_table;
typedef vector<ushort_3d_table> ushort_4d_table;
typedef std::vector<std::vector<ushort> > ushort_2d_table;
typedef std::vector<ushort_2d_table> ushort_3d_table;
typedef std::vector<ushort_3d_table> ushort_4d_table;
typedef vector<vector<short> > short_2d_table;
typedef vector<short_2d_table> short_3d_table;
typedef vector<short_3d_table> short_4d_table;
typedef std::vector<std::vector<short> > short_2d_table;
typedef std::vector<short_2d_table> short_3d_table;
typedef std::vector<short_3d_table> short_4d_table;
typedef vector<vector<int> > int_2d_table;
typedef vector<int_2d_table> int_3d_table;
typedef vector<int_3d_table> int_4d_table;
typedef std::vector<std::vector<int> > int_2d_table;
typedef std::vector<int_2d_table> int_3d_table;
typedef std::vector<int_3d_table> int_4d_table;
}
#define sptr boost::shared_ptr

View File

@ -296,6 +296,7 @@ namespace Moses
load_extra_data(string bname, bool locking = true)
{
using namespace boost;
using namespace ugdiss;
// TO DO: ADD CHECKS FOR ROBUSTNESS
// - file existence?
// - same number of lines?
@ -701,7 +702,7 @@ namespace Moses
#if 1
if (m_bias_log && m_lr_func && m_bias_loglevel > 3)
{
typename PhrasePair<Token>::SortDescendingByJointCount sorter;
PhrasePair<Token>::SortDescendingByJointCount sorter;
sort(ppfix.begin(), ppfix.end(),sorter);
BOOST_FOREACH(PhrasePair<Token> const& pp, ppfix)
{

View File

@ -145,7 +145,7 @@ namespace Moses
std::vector<std::vector<id_type> > wlex21;
// word translation lexicon (without counts, get these from calc_lex.COOC)
typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> mm2dtable_t;
typedef ugdiss::mm2dTable<id_type,id_type,uint32_t,uint32_t> mm2dtable_t;
mm2dtable_t COOCraw;
TargetPhrase*

View File

@ -16,7 +16,7 @@ namespace Moses {
float m_alpha;
string m_lexfile;
public:
LexicalPhraseScorer2<Token> scorer;
ugdiss::LexicalPhraseScorer2<Token> scorer;
PScoreLex1(string const& alphaspec, string const& lexfile)
{

View File

@ -27,7 +27,7 @@ namespace Moses {
{
if (x == '+') { --checksum; continue; }
if (x != 'g' && x != 's' && x != 'r') continue;
string s = (format("pbwd-%c%.3f") % x % c).str();
string s = (boost::format("pbwd-%c%.3f") % x % c).str();
this->m_feature_names.push_back(s);
}
this->m_num_feats = this->m_feature_names.size();

View File

@ -28,7 +28,7 @@ namespace Moses {
{
if (x == '+') { --checksum; continue; }
if (x != 'g' && x != 's' && x != 'r') continue;
string s = (format("pfwd-%c%.3f") % x % c).str();
string s = (boost::format("pfwd-%c%.3f") % x % c).str();
this->m_feature_names.push_back(s);
}
this->m_num_feats = this->m_feature_names.size();

View File

@ -12,7 +12,7 @@ namespace Moses {
class
PScoreUnaligned : public PhraseScorer<Token>
{
typedef boost::dynamic_bitset<typename ::uint64_t> bitvector;
typedef boost::dynamic_bitset<uint64_t> bitvector;
public:
PScoreUnaligned(string const spec)
{

View File

@ -1,5 +1,5 @@
#include "TranslationRequest.h"
#include "moses/ContextScope.h"
#include "moses/ContextScope.h"
#include <boost/foreach.hpp>
namespace MosesServer

View File

@ -24,9 +24,10 @@
#define BOOST_TEST_MODULE MosesTrainingScoreFeature
#include <boost/test/test_tools.hpp>
#include <boost/test/unit_test.hpp>
#include <boost/assign/list_of.hpp>
#include <unordered_set>
#include <unordered_map>
//#include <unordered_set>
//#include <unordered_map>
using namespace MosesTraining;
using namespace std;
@ -54,16 +55,16 @@ BOOST_AUTO_TEST_CASE(manager_configure_domain_except)
//Check that configure rejects illegal domain arg combinations
ScoreFeatureManager manager;
BOOST_CHECK_THROW(
manager.configure( {"--DomainRatio","/dev/null","--DomainIndicator","/dev/null"}),
manager.configure(boost::assign::list_of("--DomainRatio")("/dev/null")("--DomainIndicator")("/dev/null")),
ScoreFeatureArgumentException);
BOOST_CHECK_THROW(
manager.configure( {"--SparseDomainSubset","/dev/null","--SparseDomainRatio","/dev/null"}),
manager.configure(boost::assign::list_of("--SparseDomainSubset")("/dev/null")("--SparseDomainRatio")("/dev/null")),
ScoreFeatureArgumentException);
BOOST_CHECK_THROW(
manager.configure( {"--SparseDomainBlah","/dev/null"}),
manager.configure(boost::assign::list_of("--SparseDomainBlah")("/dev/null")),
ScoreFeatureArgumentException);
BOOST_CHECK_THROW(
manager.configure( {"--DomainSubset"}),
manager.configure(boost::assign::list_of("--DomainSubset")),
ScoreFeatureArgumentException);
}
@ -97,25 +98,27 @@ T adder(T first, Args... args)
BOOST_AUTO_TEST_CASE(manager_config_domain)
{
checkDomainConfigured<RatioDomainFeature>
( {"--DomainRatio","/dev/null"});
(boost::assign::list_of("--DomainRatio")("/dev/null"));
checkDomainConfigured<IndicatorDomainFeature>
( {"--DomainIndicator","/dev/null"});
(boost::assign::list_of("--DomainIndicator")("/dev/null"));
checkDomainConfigured<SubsetDomainFeature>
( {"--DomainSubset","/dev/null"});
(boost::assign::list_of("--DomainSubset")("/dev/null"));
checkDomainConfigured<SparseRatioDomainFeature>
( {"--SparseDomainRatio","/dev/null"});
(boost::assign::list_of("--SparseDomainRatio")("/dev/null"));
checkDomainConfigured<SparseIndicatorDomainFeature>
( {"--SparseDomainIndicator","/dev/null"});
(boost::assign::list_of("--SparseDomainIndicator")("/dev/null"));
checkDomainConfigured<SparseSubsetDomainFeature>
( {"--SparseDomainSubset","/dev/null"});
(boost::assign::list_of("--SparseDomainSubset")("/dev/null"));
/*
// C++11 testing
unordered_set<int> s;
s.insert(4);
s.insert(7);
s.insert(4);
s.insert(1);
for (auto i: s) {
for (auto i: s) {
cerr << i << " ";
}
@ -124,7 +127,7 @@ for (auto i: s) {
m["ba"] = 6;
m["aabc"] = 7;
for (auto i: m) {
for (auto i: m) {
cerr << i.first << "=" << i.second << " ";
}
@ -132,6 +135,6 @@ for (auto i: m) {
std::string s1 = "x", s2 = "aa", s3 = "bb", s4 = "yy";
std::string ssum = adder(s1, s2, s3, s4);
*/
}