mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-08-16 15:00:33 +03:00
Removal of 'using namespace ...' from several header files.
This commit is contained in:
parent
515862ee1c
commit
e94921dc44
@ -21,6 +21,7 @@ mingw/MosesGUI/icons_rc.py
|
||||
mingw/MosesGUI/Ui_credits.py
|
||||
mingw/MosesGUI/Ui_mainWindow.py
|
||||
moses/TranslationModel/UG
|
||||
moses/server
|
||||
phrase-extract/pcfg-common
|
||||
phrase-extract/syntax-common
|
||||
randlm
|
||||
|
2
Jamroot
2
Jamroot
@ -108,7 +108,7 @@ external-lib z ;
|
||||
|
||||
#lib dl : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
|
||||
#requirements += <library>dl ;
|
||||
requirements += <cxxflags>-std=c++0x ;
|
||||
#requirements += <cxxflags>-std=c++0x ;
|
||||
|
||||
if ! [ option.get "without-tcmalloc" : : "yes" ] && [ test_library "tcmalloc_minimal" ] {
|
||||
if [ option.get "full-tcmalloc" : : "yes" ] {
|
||||
|
@ -27,7 +27,7 @@ BaseManager::GetSource() const
|
||||
return m_source;
|
||||
}
|
||||
|
||||
const ttasksptr&
|
||||
const ttasksptr
|
||||
BaseManager::GetTtask() const {
|
||||
return m_ttask.lock();
|
||||
}
|
||||
|
@ -50,7 +50,7 @@ public:
|
||||
|
||||
//! the input sentence being decoded
|
||||
const InputType& GetSource() const;
|
||||
const ttasksptr& GetTtask() const;
|
||||
const ttasksptr GetTtask() const;
|
||||
|
||||
virtual void Decode() = 0;
|
||||
// outputs
|
||||
|
@ -1115,8 +1115,7 @@ void StaticData::LoadSparseWeightsFromConfig()
|
||||
}
|
||||
|
||||
std::map<std::string, std::vector<float> > weights = m_parameter->GetAllWeights();
|
||||
std::map<std::string, std::vector<float> >::iterator iter;
|
||||
// for (auto iter = weights.begin(); iter != weights.end(); ++iter) {
|
||||
std::map<std::string, std::vector<float> >::iterator iter;
|
||||
for (iter = weights.begin(); iter != weights.end(); ++iter) {
|
||||
// this indicates that it is sparse feature
|
||||
if (featureNames.find(iter->first) == featureNames.end()) {
|
||||
|
@ -85,24 +85,25 @@ int main(int argc, char* argv[])
|
||||
++k;
|
||||
|
||||
size_t s1,s2,e1,e2; int po_fwd=-1,po_bwd=-1;
|
||||
vector<uchar> caln;
|
||||
// cout << sid << " " << B.docname(sid) << endl;
|
||||
std::vector<unsigned char> caln;
|
||||
// cout << sid << " " << B.docname(sid) << std::endl;
|
||||
if (!B.find_trg_phr_bounds(sid, off, off+m.size(),
|
||||
s1,s2,e1,e2,po_fwd,po_bwd,
|
||||
&caln, NULL, &m == &m2))
|
||||
{
|
||||
// cout << "alignment failure" << endl;
|
||||
// cout << "alignment failure" << std::endl;
|
||||
}
|
||||
|
||||
cout << sid << " " << B.docname(sid)
|
||||
<< " dfwd=" << po_fwd << " dbwd=" << po_bwd
|
||||
<< "\n";
|
||||
write_sentence(*B.T1, sid, *B.V1, cout); cout << "\n";
|
||||
write_sentence(*B.T2, sid, *B.V2, cout); cout << "\n";
|
||||
std::cout << sid << " " << B.docname(sid)
|
||||
<< " dfwd=" << po_fwd << " dbwd=" << po_bwd
|
||||
<< "\n";
|
||||
|
||||
write_sentence(*B.T1, sid, *B.V1, std::cout); std::cout << "\n";
|
||||
write_sentence(*B.T2, sid, *B.V2, std::cout); std::cout << "\n";
|
||||
B.write_yawat_alignment(sid,
|
||||
m1.size() ? &m1 : NULL,
|
||||
m2.size() ? &m2 : NULL, cout);
|
||||
cout << endl;
|
||||
m2.size() ? &m2 : NULL, std::cout);
|
||||
std::cout << std::endl;
|
||||
|
||||
}
|
||||
}
|
||||
@ -141,9 +142,9 @@ interpret_args(int ac, char* av[])
|
||||
po::notify(vm);
|
||||
if (vm.count("help"))
|
||||
{
|
||||
cout << "\nusage:\n\t" << av[0]
|
||||
<< " [options] [--q1=<L1string>] [--q2=<L2string>]" << endl;
|
||||
cout << o << endl;
|
||||
std::cout << "\nusage:\n\t" << av[0]
|
||||
<< " [options] [--q1=<L1string>] [--q2=<L2string>]" << std::endl;
|
||||
std::cout << o << std::endl;
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
@ -3,10 +3,10 @@
|
||||
#ifndef __Pickler
|
||||
#define __Pickler
|
||||
|
||||
#include<iostream>
|
||||
#include<string>
|
||||
#include<vector>
|
||||
#include<map>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "tpt_typedefs.h"
|
||||
#include "num_read_write.h"
|
||||
#include <cassert>
|
||||
@ -20,7 +20,7 @@ namespace ugdiss
|
||||
/**
|
||||
* The following functions write and read data in a compact binary
|
||||
* representation. Write and read errors can be checked directly
|
||||
* on the ostream object after the function call, so no return value is
|
||||
* on the std::ostream object after the function call, so no return value is
|
||||
* necessary.*/
|
||||
void binwrite(std::ostream& out, char data);
|
||||
void binwrite(std::ostream& out, unsigned char data);
|
||||
@ -165,7 +165,7 @@ namespace ugdiss
|
||||
binread(in,k);
|
||||
binread(in,v);
|
||||
data[k] = v;
|
||||
// cerr << "* " << i << " " << k << " " << v << endl;
|
||||
// cerr << "* " << i << " " << k << " " << v << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include "tpt_typedefs.h"
|
||||
// #include <stdint.h>
|
||||
#include <cassert>
|
||||
using namespace std;
|
||||
// // using namespace std;
|
||||
|
||||
#ifndef uchar
|
||||
#endif
|
||||
@ -29,7 +29,7 @@ namespace ugdiss
|
||||
{
|
||||
// void tightwritex(iostream& out, size_t data, bool flag);
|
||||
void
|
||||
tightwrite(std::ostream& out, ::uint64_t data, bool flag);
|
||||
tightwrite(std::ostream& out, uint64_t data, bool flag);
|
||||
|
||||
filepos_type
|
||||
tightread(std::istream& in, std::ios::pos_type stop);
|
||||
@ -91,7 +91,7 @@ namespace ugdiss
|
||||
tightread4(char const* start, char const* stop, uint32_t& dest);
|
||||
|
||||
char const*
|
||||
tightread8(char const* start, char const* stop, ::uint64_t& dest);
|
||||
tightread8(char const* start, char const* stop, uint64_t& dest);
|
||||
|
||||
template<typename numType>
|
||||
char const*
|
||||
@ -102,13 +102,13 @@ namespace ugdiss
|
||||
if (sizeof(numType)==4)
|
||||
return tightread4(start,stop,reinterpret_cast<uint32_t&>(dest));
|
||||
else if (sizeof(numType)==8)
|
||||
return tightread8(start,stop,reinterpret_cast<typename ::uint64_t&>(dest));
|
||||
return tightread8(start,stop,reinterpret_cast<uint64_t&>(dest));
|
||||
assert(0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// char const*
|
||||
// tightread(char const* start, char const* stop, ::uint64_t& dest);
|
||||
// tightread(char const* start, char const* stop, uint64_t& dest);
|
||||
|
||||
// char const*
|
||||
// tightread(char const* start, char const* stop, filepos_type& dest);
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
using namespace std;
|
||||
// // using namespace std;
|
||||
namespace bio=boost::iostreams;
|
||||
|
||||
namespace ugdiss
|
||||
@ -28,9 +28,9 @@ namespace ugdiss
|
||||
class TokenIndex
|
||||
{
|
||||
/** Reverse index: maps from ID to char const* */
|
||||
mutable vector<char const*> ridx;
|
||||
mutable std::vector<char const*> ridx;
|
||||
/** Label for the UNK token */
|
||||
string unkLabel;
|
||||
std::string unkLabel;
|
||||
id_type unkId,numTokens;
|
||||
|
||||
/// New 2013-09-02: thread-safe
|
||||
@ -38,8 +38,8 @@ namespace ugdiss
|
||||
|
||||
// NEW 2011-01-30: dynamic adding of unknown items
|
||||
bool dynamic; // dynamically assign a new word id to unknown items?
|
||||
boost::shared_ptr<map<string,id_type> > str2idExtra;
|
||||
boost::shared_ptr<vector<string> > newWords;
|
||||
boost::shared_ptr<std::map<std::string,id_type> > str2idExtra;
|
||||
boost::shared_ptr<std::vector<std::string> > newWords;
|
||||
// The use of pointers to external items is a bit of a bad hack
|
||||
// in terms of the semantic of TokenIndex const: since external items
|
||||
// are changed, the TokenIndex instance remains unchanged and const works,
|
||||
@ -48,7 +48,7 @@ namespace ugdiss
|
||||
// thread-safe!
|
||||
|
||||
public:
|
||||
/** string->ID lookup works via binary search in a vector of Entry instances */
|
||||
/** string->ID lookup works via binary search in a std::vector of Entry instances */
|
||||
class Entry
|
||||
{
|
||||
public:
|
||||
@ -69,26 +69,26 @@ namespace ugdiss
|
||||
Entry const* startIdx;
|
||||
Entry const* endIdx;
|
||||
CompFunc comp;
|
||||
TokenIndex(string unkToken="UNK");
|
||||
// TokenIndex(string fname,string unkToken="UNK",bool dyna=false);
|
||||
void open(string fname,string unkToken="UNK",bool dyna=false);
|
||||
TokenIndex(std::string unkToken="UNK");
|
||||
// TokenIndex(std::string fname,std::string unkToken="UNK",bool dyna=false);
|
||||
void open(std::string fname,std::string unkToken="UNK",bool dyna=false);
|
||||
void close();
|
||||
// id_type unkId,numTokens;
|
||||
id_type operator[](char const* w) const;
|
||||
id_type operator[](string const& w) const;
|
||||
id_type operator[](std::string const& w) const;
|
||||
char const* const operator[](id_type id) const;
|
||||
char const* const operator[](id_type id);
|
||||
vector<char const*> reverseIndex() const;
|
||||
std::vector<char const*> reverseIndex() const;
|
||||
|
||||
string toString(vector<id_type> const& v);
|
||||
string toString(vector<id_type> const& v) const;
|
||||
std::string toString(std::vector<id_type> const& v);
|
||||
std::string toString(std::vector<id_type> const& v) const;
|
||||
|
||||
string toString(id_type const* start, id_type const* const stop);
|
||||
string toString(id_type const* start, id_type const* const stop) const;
|
||||
std::string toString(id_type const* start, id_type const* const stop);
|
||||
std::string toString(id_type const* start, id_type const* const stop) const;
|
||||
|
||||
vector<id_type> toIdSeq(string const& line) const;
|
||||
std::vector<id_type> toIdSeq(std::string const& line) const;
|
||||
|
||||
bool fillIdSeq(string const& line, vector<id_type> & v) const;
|
||||
bool fillIdSeq(std::string const& line, std::vector<id_type> & v) const;
|
||||
|
||||
void iniReverseIndex();
|
||||
id_type getNumTokens() const;
|
||||
@ -104,27 +104,27 @@ namespace ugdiss
|
||||
|
||||
char const* const getUnkToken() const;
|
||||
|
||||
void write(string fname); // write TokenIndex to a new file
|
||||
void write(std::string fname); // write TokenIndex to a new file
|
||||
bool isDynamic() const;
|
||||
bool setDynamic(bool onoff);
|
||||
|
||||
void setUnkLabel(string unk);
|
||||
void setUnkLabel(std::string unk);
|
||||
};
|
||||
|
||||
void
|
||||
write_tokenindex_to_disk(vector<pair<string,uint32_t> > const& tok,
|
||||
string const& ofile, string const& unkToken);
|
||||
write_tokenindex_to_disk(std::vector<std::pair<std::string,uint32_t> > const& tok,
|
||||
std::string const& ofile, std::string const& unkToken);
|
||||
|
||||
/** for sorting words by frequency */
|
||||
class compWords
|
||||
{
|
||||
string unk;
|
||||
std::string unk;
|
||||
public:
|
||||
compWords(string _unk) : unk(_unk) {};
|
||||
compWords(std::string _unk) : unk(_unk) {};
|
||||
|
||||
bool
|
||||
operator()(pair<string,size_t> const& A,
|
||||
pair<string,size_t> const& B) const
|
||||
operator()(std::pair<std::string,size_t> const& A,
|
||||
std::pair<std::string,size_t> const& B) const
|
||||
{
|
||||
if (A.first == unk) return false;// do we still need this special treatment?
|
||||
if (B.first == unk) return true; // do we still need this special treatment?
|
||||
@ -136,27 +136,27 @@ namespace ugdiss
|
||||
|
||||
template<class MYMAP>
|
||||
void
|
||||
mkTokenIndex(string ofile,MYMAP const& M,string unkToken)
|
||||
mkTokenIndex(std::string ofile,MYMAP const& M,std::string unkToken)
|
||||
{
|
||||
// typedef pair<uint32_t,id_type> IndexEntry; // offset and id
|
||||
typedef pair<string,uint32_t> Token; // token and id
|
||||
// typedef std::pair<uint32_t,id_type> IndexEntry; // offset and id
|
||||
typedef std::pair<std::string,uint32_t> Token; // token and id
|
||||
|
||||
|
||||
// first, sort the word list in decreasing order of frequency, so that we
|
||||
// can assign IDs in an encoding-efficient manner (high frequency. low ID)
|
||||
vector<pair<string,size_t> > wcounts(M.size()); // for sorting by frequency
|
||||
std::vector<std::pair<std::string,size_t> > wcounts(M.size()); // for sorting by frequency
|
||||
typedef typename MYMAP::const_iterator myIter;
|
||||
size_t z=0;
|
||||
for (myIter m = M.begin(); m != M.end(); m++)
|
||||
{
|
||||
// cout << m->first << " " << m->second << endl;
|
||||
wcounts[z++] = pair<string,size_t>(m->first,m->second);
|
||||
// cout << m->first << " " << m->second << std::endl;
|
||||
wcounts[z++] = std::pair<std::string,size_t>(m->first,m->second);
|
||||
}
|
||||
compWords compFunc(unkToken);
|
||||
sort(wcounts.begin(),wcounts.end(),compFunc);
|
||||
|
||||
// Assign IDs ...
|
||||
vector<Token> tok(wcounts.size());
|
||||
std::vector<Token> tok(wcounts.size());
|
||||
for (size_t i = 0; i < wcounts.size(); i++)
|
||||
tok[i] = Token(wcounts[i].first,i);
|
||||
// and re-sort in alphabetical order
|
||||
@ -166,9 +166,9 @@ namespace ugdiss
|
||||
|
||||
template<typename Token>
|
||||
void
|
||||
fill_token_seq(TokenIndex& V, string const& line, vector<Token>& dest)
|
||||
fill_token_seq(TokenIndex& V, std::string const& line, std::vector<Token>& dest)
|
||||
{
|
||||
istringstream buf(line); string w;
|
||||
std::istringstream buf(line); std::string w;
|
||||
while (buf>>w) dest.push_back(Token(V[w]));
|
||||
}
|
||||
}
|
||||
|
@ -71,10 +71,19 @@ namespace Moses {
|
||||
class Mmsapt;
|
||||
namespace bitext
|
||||
{
|
||||
using namespace ugdiss;
|
||||
|
||||
// using namespace ugdiss;
|
||||
using ugdiss::bitvector;
|
||||
using ugdiss::Ttrack;
|
||||
using ugdiss::TSA;
|
||||
using ugdiss::imTSA;
|
||||
using ugdiss::mmTSA;
|
||||
using ugdiss::L2R_Token;
|
||||
using ugdiss::SimpleWordId;
|
||||
using ugdiss::imTtrack;
|
||||
using ugdiss::mmTtrack;
|
||||
using ugdiss::binread;
|
||||
float lbop(size_t const tries, size_t const succ, float const confidence);
|
||||
void write_bitvector(bitvector const& v, ostream& out);
|
||||
void write_bitvector(bitvector const& v, std::ostream& out);
|
||||
|
||||
#ifndef NO_MOSES
|
||||
struct
|
||||
@ -86,7 +95,7 @@ namespace Moses {
|
||||
boost::shared_mutex lock;
|
||||
sptr<SamplingBias> bias;
|
||||
sptr<pstats::cache_t> cache1, cache2;
|
||||
ostream* bias_log;
|
||||
std::ostream* bias_log;
|
||||
ContextForQuery() : bias_log(NULL) { }
|
||||
};
|
||||
#endif
|
||||
@ -96,10 +105,10 @@ namespace Moses {
|
||||
{
|
||||
public:
|
||||
typedef TKN Token;
|
||||
typedef typename TSA<Token>::tree_iterator iter;
|
||||
typedef typename ugdiss::TSA<Token>::tree_iterator iter;
|
||||
typedef typename std::vector<PhrasePair<Token> > vec_ppair;
|
||||
typedef typename lru_cache::LRU_Cache<uint64_t, vec_ppair> pplist_cache_t;
|
||||
typedef TSA<Token> tsa;
|
||||
typedef ugdiss::TSA<Token> tsa;
|
||||
friend class Moses::Mmsapt;
|
||||
protected:
|
||||
mutable boost::shared_mutex m_lock; // for thread-safe operation
|
||||
@ -112,7 +121,7 @@ namespace Moses {
|
||||
size_t m_pstats_cache_threshold; // threshold for caching sampling results
|
||||
sptr<pstats::cache_t> m_cache1, m_cache2; // caches for sampling results
|
||||
|
||||
vector<string> m_docname;
|
||||
std::vector<string> m_docname;
|
||||
map<string,id_type> m_docname2docid; // maps from doc names to ids
|
||||
sptr<std::vector<id_type> > m_sid2docid; // maps from sentences to docs (ids)
|
||||
|
||||
@ -141,7 +150,7 @@ namespace Moses {
|
||||
size_t & s1, size_t & s2, // beginning and end of target start
|
||||
size_t & e1, size_t & e2, // beginning and end of target end
|
||||
int& po_fwd, int& po_bwd, // phrase orientations
|
||||
std::vector<uchar> * core_alignment, // stores the core alignment
|
||||
std::vector<unsigned char> * core_alignment, // stores the core alignment
|
||||
bitvector* full_alignment, // stores full word alignment for this sent.
|
||||
bool const flip) const; // flip source and target (reverse lookup)
|
||||
|
||||
@ -190,17 +199,17 @@ namespace Moses {
|
||||
loadSentenceBias(string const& fname) const;
|
||||
|
||||
sptr<DocumentBias>
|
||||
SetupDocumentBias(string const& bserver, string const& text, ostream* log) const;
|
||||
SetupDocumentBias(string const& bserver, string const& text, std::ostream* log) const;
|
||||
|
||||
sptr<DocumentBias>
|
||||
SetupDocumentBias(map<string,float> context_weights, ostream* log) const;
|
||||
SetupDocumentBias(map<string,float> context_weights, std::ostream* log) const;
|
||||
|
||||
void
|
||||
mark_match(Token const* start, Token const* end, iter const& m,
|
||||
bitvector& check) const;
|
||||
void
|
||||
write_yawat_alignment
|
||||
( id_type const sid, iter const* m1, iter const* m2, ostream& out ) const;
|
||||
( id_type const sid, iter const* m1, iter const* m2, std::ostream& out ) const;
|
||||
|
||||
string docname(id_type const sid) const;
|
||||
|
||||
@ -229,7 +238,7 @@ namespace Moses {
|
||||
size_t i = 0;
|
||||
float v; while (in>>v) (*ret)[i++] = v;
|
||||
UTIL_THROW_IF2(i != T1->size(),
|
||||
"Mismatch between bias vector size and corpus size at "
|
||||
"Mismatch between bias std::vector size and corpus size at "
|
||||
<< HERE);
|
||||
return ret;
|
||||
}
|
||||
@ -239,8 +248,8 @@ namespace Moses {
|
||||
Bitext<Token>::
|
||||
toString(uint64_t pid, int isL2) const
|
||||
{
|
||||
ostringstream buf;
|
||||
uint32_t sid,off,len; parse_pid(pid,sid,off,len);
|
||||
std::ostringstream buf;
|
||||
uint32_t sid,off,len; ugdiss::parse_pid(pid,sid,off,len);
|
||||
Token const* t = (isL2 ? T2 : T1)->sntStart(sid) + off;
|
||||
Token const* x = t + len;
|
||||
TokenIndex const& V = isL2 ? *V2 : *V1;
|
||||
@ -328,10 +337,10 @@ namespace Moses {
|
||||
size_t const start, size_t const stop,
|
||||
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
|
||||
int & po_fwd, int & po_bwd,
|
||||
std::vector<uchar>* core_alignment, bitvector* full_alignment,
|
||||
std::vector<unsigned char>* core_alignment, bitvector* full_alignment,
|
||||
bool const flip) const
|
||||
{
|
||||
// if (core_alignment) cout << "HAVE CORE ALIGNMENT" << endl;
|
||||
// if (core_alignment) cout << "HAVE CORE ALIGNMENT" << std::endl;
|
||||
|
||||
// a word on the core_alignment:
|
||||
//
|
||||
@ -425,7 +434,7 @@ namespace Moses {
|
||||
sptr<DocumentBias>
|
||||
Bitext<Token>::
|
||||
SetupDocumentBias
|
||||
( string const& bserver, string const& text, ostream* log ) const
|
||||
( string const& bserver, string const& text, std::ostream* log ) const
|
||||
{
|
||||
sptr<DocumentBias> ret;
|
||||
UTIL_THROW_IF2(m_sid2docid == NULL,
|
||||
@ -439,7 +448,7 @@ namespace Moses {
|
||||
sptr<DocumentBias>
|
||||
Bitext<Token>::
|
||||
SetupDocumentBias
|
||||
( map<string,float> context_weights, ostream* log ) const
|
||||
( map<string,float> context_weights, std::ostream* log ) const
|
||||
{
|
||||
sptr<DocumentBias> ret;
|
||||
UTIL_THROW_IF2(m_sid2docid == NULL,
|
||||
@ -541,12 +550,12 @@ namespace Moses {
|
||||
|
||||
m_pp.init(m_pid1, m_is_inverse, m_token,m_len,m_pstats.get(),0);
|
||||
|
||||
// convert pstats entries to phrase pairs
|
||||
// convert pstats entries to phrase std::pairs
|
||||
pstats::trg_map_t::iterator a;
|
||||
for (a = m_pstats->trg.begin(); a != m_pstats->trg.end(); ++a)
|
||||
{
|
||||
uint32_t sid,off,len;
|
||||
parse_pid(a->first, sid, off, len);
|
||||
ugdiss::parse_pid(a->first, sid, off, len);
|
||||
m_pp.update(a->first, m_other.sntStart(sid)+off, len, a->second);
|
||||
m_pp.good2 = max(uint32_t(m_pp.raw2 * float(m_pp.good1)/m_pp.raw1),
|
||||
m_pp.joint);
|
||||
@ -596,16 +605,16 @@ namespace Moses {
|
||||
void
|
||||
Bitext<Token>::
|
||||
write_yawat_alignment
|
||||
( id_type const sid, iter const* m1, iter const* m2, ostream& out ) const
|
||||
( id_type const sid, iter const* m1, iter const* m2, std::ostream& out ) const
|
||||
{
|
||||
vector<int> a1(T1->sntLen(sid),-1), a2(T2->sntLen(sid),-1);
|
||||
std::vector<int> a1(T1->sntLen(sid),-1), a2(T2->sntLen(sid),-1);
|
||||
bitvector f1(a1.size()), f2(a2.size());
|
||||
if (m1) mark_match(T1->sntStart(sid), T1->sntEnd(sid), *m1, f1);
|
||||
if (m2) mark_match(T2->sntStart(sid), T2->sntEnd(sid), *m2, f2);
|
||||
|
||||
vector<pair<bitvector,bitvector> > agroups;
|
||||
vector<string> grouplabel;
|
||||
pair<bitvector,bitvector> ag;
|
||||
std::vector<pair<bitvector,bitvector> > agroups;
|
||||
std::vector<string> grouplabel;
|
||||
std::pair<bitvector,bitvector> ag;
|
||||
ag.first.resize(a1.size());
|
||||
ag.second.resize(a2.size());
|
||||
char const* x = Tx->sntStart(sid);
|
||||
@ -670,19 +679,19 @@ namespace Moses {
|
||||
void
|
||||
expand(typename Bitext<Token>::iter const& m,
|
||||
Bitext<Token> const& bt, pstats const& ps,
|
||||
std::vector<PhrasePair<Token> >& dest, ostream* log)
|
||||
std::vector<PhrasePair<Token> >& dest, std::ostream* log)
|
||||
{
|
||||
bool fwd = m.root == bt.I1.get();
|
||||
dest.reserve(ps.trg.size());
|
||||
PhrasePair<Token> pp;
|
||||
pp.init(m.getPid(), !fwd, m.getToken(0), m.size(), &ps, 0);
|
||||
// cout << HERE << " "
|
||||
// << toString(*(fwd ? bt.V1 : bt.V2), pp.start1,pp.len1) << endl;
|
||||
// << toString(*(fwd ? bt.V1 : bt.V2), pp.start1,pp.len1) << std::endl;
|
||||
pstats::trg_map_t::const_iterator a;
|
||||
for (a = ps.trg.begin(); a != ps.trg.end(); ++a)
|
||||
{
|
||||
uint32_t sid,off,len;
|
||||
parse_pid(a->first, sid, off, len);
|
||||
ugdiss::parse_pid(a->first, sid, off, len);
|
||||
pp.update(a->first, (fwd ? bt.T2 : bt.T1)->sntStart(sid)+off,
|
||||
len, a->second);
|
||||
dest.push_back(pp);
|
||||
|
@ -76,7 +76,7 @@ void Bitext<Token>
|
||||
}
|
||||
else ++i;
|
||||
}
|
||||
// cerr << workers.size() << "/" << target << " active" << endl;
|
||||
// cerr << workers.size() << "/" << target << " active" << std::endl;
|
||||
if (int(workers.size()) > target)
|
||||
this->doomed = workers.size() - target;
|
||||
else
|
||||
@ -132,7 +132,7 @@ Bitext<Token>
|
||||
::agenda
|
||||
::get_job()
|
||||
{
|
||||
// cerr << workers.size() << " workers on record" << endl;
|
||||
// cerr << workers.size() << " workers on record" << std::endl;
|
||||
sptr<job> ret;
|
||||
if (this->shutdown) return ret;
|
||||
boost::unique_lock<boost::mutex> lock(this->lock);
|
||||
|
@ -100,7 +100,7 @@ Bitext<Token>::agenda::job
|
||||
#if 0
|
||||
cerr << ctr++ << " " << m.str(m_bitext->V1.get())
|
||||
<< " " << sid << "/" << root->getCorpusSize()
|
||||
<< " " << offset << " " << stop-x << endl;
|
||||
<< " " << offset << " " << stop-x << std::endl;
|
||||
#endif
|
||||
bias_total += (*m_bias)[sid];
|
||||
++stats->raw_cnt;
|
||||
@ -109,7 +109,7 @@ Bitext<Token>::agenda::job
|
||||
#if UG_BITEXT_TRACK_ACTIVE_THREADS
|
||||
++active;
|
||||
// if (active%5 == 0)
|
||||
// cerr << size_t(active) << " active jobs at " << __FILE__ << ":" << __LINE__ << endl;
|
||||
// cerr << size_t(active) << " active jobs at " << __FILE__ << ":" << __LINE__ << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -130,10 +130,10 @@ int Bitext<Token>::agenda::job
|
||||
|
||||
if (!m_bias) return 1;
|
||||
|
||||
using namespace boost::math;
|
||||
// // using namespace boost::math;
|
||||
typedef boost::math::binomial_distribution<> binomial;
|
||||
|
||||
ostream* log = m_bias->loglevel > 1 ? m_bias->log : NULL;
|
||||
std::ostream* log = m_bias->loglevel > 1 ? m_bias->log : NULL;
|
||||
|
||||
float p = (*m_bias)[sid];
|
||||
id_type docid = m_bias->GetClass(sid);
|
||||
@ -177,7 +177,7 @@ int Bitext<Token>::agenda::job
|
||||
for (; x < e; ++x) *log << (*m_bitext->V1)[x->id()] << " ";
|
||||
if (!ret) *log << "SKIP";
|
||||
else if (p < .5 && d > .9) *log << "FORCE";
|
||||
*log << endl;
|
||||
*log << std::endl;
|
||||
}
|
||||
|
||||
return (ret ? (p < .5 && d > .9) ? 2 : 1 : 0);
|
||||
|
@ -17,9 +17,9 @@ Bitext<Token>::agenda
|
||||
// reduce the number of lock / unlock operations we need to do
|
||||
// during sampling.
|
||||
|
||||
uint64_t sid=0, offset=0; // sid and offset of source phrase
|
||||
size_t s1=0, s2=0, e1=0, e2=0; // soft and hard boundaries of target phrase
|
||||
vector<uchar> aln; // stores phrase-pair-internal alignment
|
||||
uint64_t sid=0, offset=0; // sid and offset of source phrase
|
||||
size_t s1=0, s2=0, e1=0, e2=0; // soft and hard boundaries of target phrase
|
||||
std::vector<unsigned char> aln; // stores phrase-pair-internal alignment
|
||||
while(sptr<job> j = ag.get_job())
|
||||
{
|
||||
j->stats->register_worker();
|
||||
@ -53,7 +53,7 @@ Bitext<Token>::agenda
|
||||
Token const* eos = ag.bt.T2->sntEnd(sid);
|
||||
cerr << "[" << j->stats->good + 1 << "] ";
|
||||
while (t != eos) cerr << (*ag.bt.V2)[(t++)->id()] << " ";
|
||||
cerr << "[" << docid << "]" << endl;
|
||||
cerr << "[" << docid << "]" << std::endl;
|
||||
#endif
|
||||
|
||||
float sample_weight = 1./num_pairs;
|
||||
@ -62,11 +62,11 @@ Bitext<Token>::agenda
|
||||
// adjust offsets in phrase-internal aligment
|
||||
for (size_t k = 1; k < aln.size(); k += 2) aln[k] += s2 - s1;
|
||||
|
||||
vector<uint64_t> seen; seen.reserve(10);
|
||||
std::vector<uint64_t> seen; seen.reserve(10);
|
||||
// It is possible that the phrase extraction extracts the same
|
||||
// phrase twice, e.g., when word a co-occurs with sequence b b b
|
||||
// but is aligned only to the middle word. We can only count
|
||||
// each phrase pair once per source phrase occurrence, or else
|
||||
// each phrase std::pair once per source phrase occurrence, or else
|
||||
// run the risk of having more joint counts than marginal
|
||||
// counts.
|
||||
|
||||
|
@ -54,7 +54,7 @@ namespace Moses
|
||||
|
||||
void
|
||||
jstats::
|
||||
add(float w, vector<uchar> const& a, uint32_t const cnt2,
|
||||
add(float w, std::vector<unsigned char> const& a, uint32_t const cnt2,
|
||||
uint32_t fwd_orient, uint32_t bwd_orient, int const docid)
|
||||
{
|
||||
boost::lock_guard<boost::mutex> lk(this->lock);
|
||||
@ -66,7 +66,7 @@ namespace Moses
|
||||
size_t i = 0;
|
||||
while (i < my_aln.size() && my_aln[i].second != a) ++i;
|
||||
if (i == my_aln.size())
|
||||
my_aln.push_back(pair<size_t,vector<uchar> >(1,a));
|
||||
my_aln.push_back(std::pair<size_t,std::vector<unsigned char> >(1,a));
|
||||
else
|
||||
my_aln[i].first++;
|
||||
if (my_aln[i].first > my_aln[i/2].first)
|
||||
@ -81,7 +81,7 @@ namespace Moses
|
||||
}
|
||||
}
|
||||
|
||||
vector<pair<size_t, vector<uchar> > > const&
|
||||
std::vector<std::pair<size_t, std::vector<unsigned char> > > const&
|
||||
jstats::
|
||||
aln() const
|
||||
{ return my_aln; }
|
||||
|
@ -1,5 +1,7 @@
|
||||
// -*- c++ -*-
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <stdint.h>
|
||||
#include "ug_typedefs.h"
|
||||
#include "ug_lexical_reordering.h"
|
||||
#include <boost/thread.hpp>
|
||||
@ -8,9 +10,10 @@ namespace Moses
|
||||
{
|
||||
namespace bitext
|
||||
{
|
||||
using namespace ugdiss;
|
||||
|
||||
// "joint" (i.e., phrase pair) statistics
|
||||
// using namespace ugdiss;
|
||||
|
||||
// "joint" (i.e., phrase std::pair) statistics
|
||||
class
|
||||
jstats
|
||||
{
|
||||
@ -20,23 +23,24 @@ namespace Moses
|
||||
float my_wcnt; // weighted joint count
|
||||
|
||||
// to do: use a static alignment pattern store that stores each pattern only
|
||||
// once, so that we don't have to store so many alignment vectors
|
||||
vector<pair<size_t, vector<uchar> > > my_aln; // internal word alignment
|
||||
// once, so that we don't have to store so many alignment std::vectors
|
||||
std::vector<std::pair<size_t, std::vector<unsigned char> > > my_aln;
|
||||
// internal word alignment
|
||||
|
||||
uint32_t ofwd[Moses::LRModel::NONE+1]; // forward distortion type counts
|
||||
uint32_t obwd[Moses::LRModel::NONE+1]; // backward distortion type counts
|
||||
|
||||
public:
|
||||
std::map<uint32_t,uint32_t> indoc;
|
||||
// vector<uint32_t> indoc; // counts origin of samples (for biased sampling)
|
||||
// std::vector<uint32_t> indoc; // counts origin of samples (for biased sampling)
|
||||
jstats();
|
||||
jstats(jstats const& other);
|
||||
uint32_t rcnt() const; // raw joint counts
|
||||
uint32_t cnt2() const; // raw target phrase occurrence count
|
||||
float wcnt() const; // weighted joint counts
|
||||
|
||||
vector<pair<size_t, vector<uchar> > > const & aln() const;
|
||||
void add(float w, vector<uchar> const& a, uint32_t const cnt2,
|
||||
std::vector<std::pair<size_t, std::vector<unsigned char> > > const & aln() const;
|
||||
void add(float w, std::vector<unsigned char> const& a, uint32_t const cnt2,
|
||||
uint32_t fwd_orient, uint32_t bwd_orient,
|
||||
int const docid);
|
||||
void invalidate();
|
||||
@ -46,7 +50,7 @@ namespace Moses
|
||||
uint32_t dcnt_bwd(PhraseOrientation const idx) const;
|
||||
void fill_lr_vec(Moses::LRModel::Direction const& dir,
|
||||
Moses::LRModel::ModelType const& mdl,
|
||||
vector<float>& v);
|
||||
std::vector<float>& v);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -66,7 +66,7 @@ namespace Moses
|
||||
bool
|
||||
pstats::
|
||||
add(uint64_t pid, float const w,
|
||||
vector<uchar> const& a,
|
||||
std::vector<unsigned char> const& a,
|
||||
uint32_t const cnt2,
|
||||
uint32_t fwd_o,
|
||||
uint32_t bwd_o, int const docid)
|
||||
|
@ -17,7 +17,7 @@ namespace Moses
|
||||
{
|
||||
typedef boost::unordered_map<uint64_t, sptr<pstats> > map_t;
|
||||
typedef ThreadSafeContainer<uint64_t, sptr<pstats>, map_t> cache_t;
|
||||
typedef std::vector<uchar> alnvec;
|
||||
typedef std::vector<unsigned char> alnvec;
|
||||
#if UG_BITEXT_TRACK_ACTIVE_THREADS
|
||||
static ThreadSafeCounter active;
|
||||
#endif
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include "ug_typedefs.h"
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
// using namespace std;
|
||||
|
||||
template<typename T>
|
||||
class ConllBottomUpToken : public T
|
||||
|
@ -3,7 +3,7 @@ namespace ugdiss
|
||||
{
|
||||
Conll_Record
|
||||
Conll_Record::
|
||||
remap(vector<id_type const*> const& m) const
|
||||
remap(std::vector<id_type const*> const& m) const
|
||||
{
|
||||
Conll_Record ret;
|
||||
ret.sform = m.size() > 0 && m[0] ? m[0][this->sform] : this->sform;
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
// using namespace std;
|
||||
|
||||
class
|
||||
Conll_Record
|
||||
@ -29,7 +29,7 @@ namespace ugdiss
|
||||
|
||||
// virtual bool operator==(Conll_Record const& other) const;
|
||||
// virtual bool operator<(Conll_Record const& other) const;
|
||||
Conll_Record remap(vector<id_type const*> const& m) const;
|
||||
Conll_Record remap(std::vector<id_type const*> const& m) const;
|
||||
|
||||
#if 0
|
||||
/** constructor for conversion from CONLL-stype text format
|
||||
|
@ -35,7 +35,7 @@ namespace ugdiss
|
||||
|
||||
id_type
|
||||
SimpleWordId::
|
||||
remap(vector<id_type const*> const& m) const
|
||||
remap(std::vector<id_type const*> const& m) const
|
||||
{
|
||||
if (!m[0]) return theID;
|
||||
return m[0][theID];
|
||||
|
@ -27,7 +27,7 @@ namespace ugdiss
|
||||
id_type const& id() const;
|
||||
int cmp(SimpleWordId const& other) const;
|
||||
bool operator==(SimpleWordId const& other) const;
|
||||
id_type remap(vector<id_type const*> const& m) const;
|
||||
id_type remap(std::vector<id_type const*> const& m) const;
|
||||
};
|
||||
|
||||
/** Token class for suffix arrays */
|
||||
|
@ -15,22 +15,22 @@
|
||||
#include "ug_conll_bottom_up_token.h"
|
||||
#include "ug_typedefs.h"
|
||||
|
||||
using namespace std;
|
||||
// using namespace std;
|
||||
namespace ugdiss
|
||||
{
|
||||
|
||||
// Fills the vector v with pointers to the internal root r_x for the
|
||||
// Fills the std::vector v with pointers to the internal root r_x for the
|
||||
// stretch [start,x] for all x: start <= x < stop. If the stretch
|
||||
// is incoherent, r_x is NULL
|
||||
template<typename T>
|
||||
void
|
||||
fill_L2R_roots(T const* start,T const* stop, vector<T const*>& v)
|
||||
fill_L2R_roots(T const* start,T const* stop, std::vector<T const*>& v)
|
||||
{
|
||||
assert(stop>start);
|
||||
v.resize(stop-start);
|
||||
v[0] = start;
|
||||
bitvector isR(v.size());
|
||||
vector<T const*> root(v.size());
|
||||
std::vector<T const*> root(v.size());
|
||||
isR.set(0);
|
||||
root[0] = start+start->parent;
|
||||
for (T const* x = start+1; x < stop; ++x)
|
||||
@ -95,7 +95,7 @@ namespace ugdiss
|
||||
|
||||
template<typename T>
|
||||
T const*
|
||||
findInternalRoot(vector<T> const& v)
|
||||
findInternalRoot(std::vector<T> const& v)
|
||||
{
|
||||
T const* a = as<T>(&(*v.begin()));
|
||||
T const* b = as<T>(&(*v.end()));
|
||||
@ -108,7 +108,7 @@ namespace ugdiss
|
||||
public:
|
||||
Conll_Record const* rec; // pointer to the record (see below) for this node
|
||||
DTNode* parent; // pointer to my parent
|
||||
vector<DTNode*> children; // children (in the order they appear in the sentence)
|
||||
std::vector<DTNode*> children; // children (in the order they appear in the sentence)
|
||||
DTNode(Conll_Record const* p);
|
||||
};
|
||||
|
||||
@ -117,7 +117,7 @@ namespace ugdiss
|
||||
DependencyTree
|
||||
{
|
||||
public:
|
||||
vector<DTNode> w;
|
||||
std::vector<DTNode> w;
|
||||
DependencyTree(Conll_Record const* first, Conll_Record const* last);
|
||||
};
|
||||
#endif
|
||||
|
@ -38,8 +38,8 @@ namespace Moses
|
||||
{
|
||||
UTIL_THROW_IF2(c != '-', "[" << HERE << "] "
|
||||
<< "Error in alignment information:\n" << a);
|
||||
binwrite(obuf,row);
|
||||
binwrite(obuf,col);
|
||||
ugdiss::binwrite(obuf,row);
|
||||
ugdiss::binwrite(obuf,col);
|
||||
}
|
||||
// important: DO NOT replace the two lines below this comment by
|
||||
// char const* x = obuf.str().c_str(), as the memory x is pointing
|
||||
|
@ -25,12 +25,12 @@ namespace Moses
|
||||
imBitext(imBitext const& other);
|
||||
|
||||
// sptr<imBitext<TKN> >
|
||||
// add(vector<TKN> const& s1, vector<TKN> const& s2, vector<ushort> & a);
|
||||
// add(vector<TKN> const& s1, std::vector<TKN> const& s2, vector<ushort> & a);
|
||||
|
||||
sptr<imBitext<TKN> >
|
||||
add(vector<string> const& s1,
|
||||
vector<string> const& s2,
|
||||
vector<string> const& a) const;
|
||||
std::vector<string> const& s2,
|
||||
std::vector<string> const& a) const;
|
||||
|
||||
};
|
||||
|
||||
|
@ -20,8 +20,8 @@
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
using namespace boost;
|
||||
// using namespace std;
|
||||
// using namespace boost;
|
||||
namespace bio=boost::iostreams;
|
||||
|
||||
// template<typename TOKEN> class imBitext<TOKEN>;
|
||||
@ -37,8 +37,8 @@ namespace ugdiss
|
||||
friend class tree_iterator;
|
||||
|
||||
private:
|
||||
vector<cpos> sufa; // stores the actual array
|
||||
vector<filepos_type> index; /* top-level index into regions in sufa
|
||||
std::vector<cpos> sufa; // stores the actual array
|
||||
std::vector<filepos_type> index; /* top-level index into regions in sufa
|
||||
* (for faster access) */
|
||||
private:
|
||||
char const*
|
||||
@ -54,11 +54,11 @@ namespace ugdiss
|
||||
imTSA();
|
||||
imTSA(boost::shared_ptr<Ttrack<TOKEN> const> c,
|
||||
bdBitset const* filt,
|
||||
ostream* log = NULL);
|
||||
std::ostream* log = NULL);
|
||||
|
||||
imTSA(imTSA<TOKEN> const& prior,
|
||||
boost::shared_ptr<imTtrack<TOKEN> const> const& crp,
|
||||
vector<id_type> const& newsids, size_t const vsize);
|
||||
std::vector<id_type> const& newsids, size_t const vsize);
|
||||
|
||||
count_type
|
||||
sntCnt(char const* p, char const * const q) const;
|
||||
@ -86,7 +86,7 @@ namespace ugdiss
|
||||
sanityCheck() const;
|
||||
|
||||
void
|
||||
save_as_mm_tsa(string fname) const;
|
||||
save_as_mm_tsa(std::string fname) const;
|
||||
|
||||
/// add a sentence to the database
|
||||
// shared_ptr<imTSA<TOKEN> > add(vector<TOKEN> const& snt) const;
|
||||
@ -140,7 +140,7 @@ namespace ugdiss
|
||||
// specified in filter
|
||||
template<typename TOKEN>
|
||||
imTSA<TOKEN>::
|
||||
imTSA(boost::shared_ptr<Ttrack<TOKEN> const> c, bdBitset const* filter, ostream* log)
|
||||
imTSA(boost::shared_ptr<Ttrack<TOKEN> const> c, bdBitset const* filter, std::ostream* log)
|
||||
{
|
||||
assert(c);
|
||||
this->corpus = c;
|
||||
@ -166,14 +166,14 @@ namespace ugdiss
|
||||
// alignment in the memory, using a ushort instead of a uint32_t might not
|
||||
// even make a difference.
|
||||
|
||||
vector<count_type> wcnt; // word counts
|
||||
std::vector<count_type> wcnt; // word counts
|
||||
sufa.resize(c->count_tokens(wcnt,filter,slimit,log));
|
||||
|
||||
if (log) *log << sufa.size() << "." << endl;
|
||||
if (log) *log << sufa.size() << "." << std::endl;
|
||||
// exit(1);
|
||||
// we use a second vector that keeps track for each ID of the current insertion
|
||||
// we use a second std::vector that keeps track for each ID of the current insertion
|
||||
// position in the array
|
||||
vector<count_type> tmp(wcnt.size(),0);
|
||||
std::vector<count_type> tmp(wcnt.size(),0);
|
||||
for (size_t i = 1; i < wcnt.size(); ++i)
|
||||
tmp[i] = tmp[i-1] + wcnt[i-1];
|
||||
|
||||
@ -198,14 +198,14 @@ namespace ugdiss
|
||||
}
|
||||
|
||||
// Now sort the array
|
||||
if (log) *log << "sorting ...." << endl;
|
||||
if (log) *log << "sorting ...." << std::endl;
|
||||
index.resize(wcnt.size()+1,0);
|
||||
typename ttrack::Position::LESS<Ttrack<TOKEN> > sorter(c.get());
|
||||
for (size_t i = 0; i < wcnt.size(); i++)
|
||||
{
|
||||
if (log && wcnt[i] > 5000)
|
||||
*log << "sorting " << wcnt[i]
|
||||
<< " entries starting with id " << i << "." << endl;
|
||||
<< " entries starting with id " << i << "." << std::endl;
|
||||
index[i+1] = index[i]+wcnt[i];
|
||||
assert(index[i+1]==tmp[i]); // sanity check
|
||||
if (wcnt[i]>1)
|
||||
@ -217,7 +217,7 @@ namespace ugdiss
|
||||
this->indexSize = this->index.size();
|
||||
#if 1
|
||||
// Sanity check during code development. Can be removed once the thing is stable.
|
||||
typename vector<cpos>::iterator m = sufa.begin();
|
||||
typename std::vector<cpos>::iterator m = sufa.begin();
|
||||
for (size_t i = 0; i < wcnt.size(); i++)
|
||||
{
|
||||
for (size_t k = 0; k < wcnt[i]; ++k,++m)
|
||||
@ -330,14 +330,14 @@ namespace ugdiss
|
||||
template<typename TOKEN>
|
||||
void
|
||||
imTSA<TOKEN>::
|
||||
save_as_mm_tsa(string fname) const
|
||||
save_as_mm_tsa(std::string fname) const
|
||||
{
|
||||
ofstream out(fname.c_str());
|
||||
std::ofstream out(fname.c_str());
|
||||
filepos_type idxStart(0);
|
||||
id_type idxSize(index.size());
|
||||
numwrite(out,idxStart);
|
||||
numwrite(out,idxSize);
|
||||
vector<filepos_type> mmIndex;
|
||||
std::vector<filepos_type> mmIndex;
|
||||
for (size_t i = 1; i < this->index.size(); i++)
|
||||
{
|
||||
mmIndex.push_back(out.tellp());
|
||||
@ -360,7 +360,7 @@ namespace ugdiss
|
||||
imTSA<TOKEN>::
|
||||
imTSA(imTSA<TOKEN> const& prior,
|
||||
boost::shared_ptr<imTtrack<TOKEN> const> const& crp,
|
||||
vector<id_type> const& newsids, size_t const vsize)
|
||||
std::vector<id_type> const& newsids, size_t const vsize)
|
||||
{
|
||||
typename ttrack::Position::LESS<Ttrack<TOKEN> > sorter(crp.get());
|
||||
|
||||
@ -369,7 +369,7 @@ namespace ugdiss
|
||||
size_t newToks = 0;
|
||||
BOOST_FOREACH(id_type sid, newsids)
|
||||
newToks += crp->sntLen(sid);
|
||||
vector<cpos> nidx(newToks); // new array entries
|
||||
std::vector<cpos> nidx(newToks); // new array entries
|
||||
|
||||
size_t n = 0;
|
||||
BOOST_FOREACH(id_type sid, newsids)
|
||||
@ -390,9 +390,9 @@ namespace ugdiss
|
||||
this->index.resize(vsize+1);
|
||||
|
||||
size_t i = 0;
|
||||
typename vector<cpos>::iterator k = this->sufa.begin();
|
||||
typename std::vector<cpos>::iterator k = this->sufa.begin();
|
||||
// cerr << newToks << " new items at "
|
||||
// << __FILE__ << ":" << __LINE__ << endl;
|
||||
// << __FILE__ << ":" << __LINE__ << std::endl;
|
||||
for (size_t n = 0; n < nidx.size();)
|
||||
{
|
||||
id_type nid = crp->getToken(nidx[n])->id();
|
||||
|
@ -28,8 +28,8 @@
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
using namespace boost;
|
||||
// using namespace std;
|
||||
// using namespace boost;
|
||||
namespace bio=boost::iostreams;
|
||||
|
||||
template<typename Token> class imTSA;
|
||||
@ -37,7 +37,8 @@ namespace ugdiss
|
||||
|
||||
template<typename TOKEN>
|
||||
typename boost::shared_ptr<imTtrack<TOKEN> >
|
||||
append(typename boost::shared_ptr<imTtrack<TOKEN> > const & crp, vector<TOKEN> const & snt);
|
||||
append(typename boost::shared_ptr<imTtrack<TOKEN> > const & crp,
|
||||
std::vector<TOKEN> const & snt);
|
||||
|
||||
template<typename Token>
|
||||
class imTtrack : public Ttrack<Token>
|
||||
@ -45,19 +46,20 @@ namespace ugdiss
|
||||
|
||||
private:
|
||||
size_t numToks;
|
||||
boost::shared_ptr<vector<vector<Token> > > myData; // pointer to corpus data
|
||||
boost::shared_ptr<typename std::vector<std::vector<Token> > > myData;
|
||||
// pointer to corpus data
|
||||
friend class imTSA<Token>;
|
||||
|
||||
friend
|
||||
typename boost::shared_ptr<imTtrack<Token> >
|
||||
append<Token>(typename boost::shared_ptr<imTtrack<Token> > const & crp, vector<Token> const & snt);
|
||||
append<Token>(typename boost::shared_ptr<imTtrack<Token> > const & crp, std::vector<Token> const & snt);
|
||||
|
||||
void m_check_token_count(); // debugging function
|
||||
|
||||
public:
|
||||
|
||||
imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d);
|
||||
imTtrack(istream& in, TokenIndex& V, ostream* log = NULL);
|
||||
imTtrack(boost::shared_ptr<std::vector<std::vector<Token> > > const& d);
|
||||
imTtrack(std::istream& in, TokenIndex& V, std::ostream* log = NULL);
|
||||
imTtrack(size_t reserve = 0);
|
||||
// imTtrack(istream& in, Vocab& V);
|
||||
|
||||
@ -80,7 +82,7 @@ namespace ugdiss
|
||||
m_check_token_count()
|
||||
{ // sanity check
|
||||
size_t check = 0;
|
||||
BOOST_FOREACH(vector<Token> const& s, *myData)
|
||||
BOOST_FOREACH(std::vector<Token> const& s, *myData)
|
||||
check += s.size();
|
||||
UTIL_THROW_IF2(check != this->numToks, "[" << HERE << "]"
|
||||
<< " Wrong token count after appending sentence!"
|
||||
@ -131,28 +133,28 @@ namespace ugdiss
|
||||
|
||||
template<typename Token>
|
||||
imTtrack<Token>::
|
||||
imTtrack(istream& in, TokenIndex& V, ostream* log)
|
||||
imTtrack(std::istream& in, TokenIndex& V, std::ostream* log)
|
||||
: numToks(0)
|
||||
{
|
||||
myData.reset(new vector<vector<Token> >());
|
||||
string line,w;
|
||||
myData.reset(new std::vector<std::vector<Token> >());
|
||||
std::string line,w;
|
||||
size_t linectr=0;
|
||||
boost::unordered_map<string,id_type> H;
|
||||
boost::unordered_map<std::string,id_type> H;
|
||||
// for (id_type i = 0; i < V.knownVocabSize(); ++i)
|
||||
// H[V[i]] = i;
|
||||
while (getline(in,line))
|
||||
{
|
||||
// cout << line << endl;
|
||||
myData->push_back(vector<Token>());
|
||||
// cout << line << std::endl;
|
||||
myData->push_back(std::vector<Token>());
|
||||
if (log && ++linectr%1000000==0)
|
||||
*log << linectr/1000000 << "M lines of input processed" << endl;
|
||||
istringstream buf(line);
|
||||
// cout << line << endl;
|
||||
*log << linectr/1000000 << "M lines of input processed" << std::endl;
|
||||
std::istringstream buf(line);
|
||||
// cout << line << std::endl;
|
||||
while (buf>>w)
|
||||
{
|
||||
myData->back().push_back(Token(V[w]));
|
||||
// cout << w << " " << myData->back().back().id() << " "
|
||||
// << V[w] << endl;
|
||||
// << V[w] << std::endl;
|
||||
}
|
||||
// myData->back().resize(myData->back().size(), Token(0));
|
||||
numToks += myData->back().size();
|
||||
@ -164,17 +166,17 @@ namespace ugdiss
|
||||
imTtrack(size_t reserve)
|
||||
: numToks(0)
|
||||
{
|
||||
myData.reset(new vector<vector<Token> >());
|
||||
myData.reset(new std::vector<std::vector<Token> >());
|
||||
if (reserve) myData->reserve(reserve);
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
imTtrack<Token>::
|
||||
imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d)
|
||||
imTtrack(boost::shared_ptr<std::vector<std::vector<Token> > > const& d)
|
||||
: numToks(0)
|
||||
{
|
||||
myData = d;
|
||||
BOOST_FOREACH(vector<Token> const& v, *d)
|
||||
BOOST_FOREACH(std::vector<Token> const& v, *d)
|
||||
numToks += v.size();
|
||||
}
|
||||
|
||||
@ -186,7 +188,7 @@ namespace ugdiss
|
||||
id_type i;
|
||||
for (i = 0; i < myData->size(); ++i)
|
||||
{
|
||||
vector<Token> const& v = (*myData)[i];
|
||||
std::vector<Token> const& v = (*myData)[i];
|
||||
if (v.size() == 0) continue;
|
||||
if (&v.front() <= t && &v.back() >= t)
|
||||
break;
|
||||
@ -197,7 +199,7 @@ namespace ugdiss
|
||||
/// add a sentence to the database
|
||||
template<typename TOKEN>
|
||||
boost::shared_ptr<imTtrack<TOKEN> >
|
||||
append(boost::shared_ptr<imTtrack<TOKEN> > const& crp, vector<TOKEN> const & snt)
|
||||
append(boost::shared_ptr<imTtrack<TOKEN> > const& crp, std::vector<TOKEN> const & snt)
|
||||
{
|
||||
#if 1
|
||||
if (crp) crp->m_check_token_count();
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include "tpt_pickler.h"
|
||||
|
||||
using namespace std;
|
||||
// using namespace std;
|
||||
namespace ugdiss
|
||||
{
|
||||
|
||||
@ -20,16 +20,16 @@ namespace ugdiss
|
||||
LexicalPhraseScorer1
|
||||
{
|
||||
typedef boost::unordered_map<id_type, float> inner_map_t;
|
||||
vector<inner_map_t> L1_given_L2;
|
||||
vector<inner_map_t> L2_given_L1;
|
||||
std::vector<inner_map_t> L1_given_L2;
|
||||
std::vector<inner_map_t> L2_given_L1;
|
||||
void load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
|
||||
vector<inner_map_t> & lex);
|
||||
std::vector<inner_map_t> & lex);
|
||||
public:
|
||||
void open(string const& bname, string const& L1, string const& L2,
|
||||
TokenIndex & V1, TokenIndex & V2);
|
||||
void score(TKN const* snt1, size_t const s1, size_t const e1,
|
||||
TKN const* snt2, size_t const s2, size_t const e2,
|
||||
vector<ushort> aln, float & fwd_score, float& bwd_score);
|
||||
std::vector<ushort> aln, float & fwd_score, float& bwd_score);
|
||||
void score(TKN const* snt1, size_t const s1, size_t const e1,
|
||||
TKN const* snt2, size_t const s2, size_t const e2,
|
||||
char const* const aln_start, char const* const aln_end,
|
||||
@ -42,10 +42,10 @@ namespace ugdiss
|
||||
void
|
||||
LexicalPhraseScorer1<TKN>::
|
||||
load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
|
||||
vector<inner_map_t> & lex)
|
||||
std::vector<inner_map_t> & lex)
|
||||
{
|
||||
boost::iostreams::filtering_istream in;
|
||||
cout << fname << endl;
|
||||
cout << fname << std::endl;
|
||||
open_input_stream(fname,in);
|
||||
lex.resize(V1.ksize());
|
||||
string w1,w2; float p;
|
||||
@ -66,8 +66,8 @@ namespace ugdiss
|
||||
{
|
||||
string lex1 = bname+L1+"-"+L2+"."+L1+"-given-"+L2+".lex.gz";
|
||||
string lex2 = bname+L1+"-"+L2+"."+L2+"-given-"+L1+".lex.gz";
|
||||
cout << lex1 << endl;
|
||||
cout << lex2 << endl;
|
||||
cout << lex1 << std::endl;
|
||||
cout << lex2 << std::endl;
|
||||
load_lex(lex1,V1,V2,L1_given_L2);
|
||||
load_lex(lex2,V2,V1,L2_given_L1);
|
||||
}
|
||||
@ -79,8 +79,8 @@ namespace ugdiss
|
||||
TKN const* snt2, size_t const s2, size_t const e2,
|
||||
vector<ushort> aln, float & fwd_score, float& bwd_score)
|
||||
{
|
||||
vector<float> p1(e1,0), p2(e2,0);
|
||||
vector<int> c1(e1,0), c2(e2,0);
|
||||
std::vector<float> p1(e1,0), p2(e2,0);
|
||||
std::vector<int> c1(e1,0), c2(e2,0);
|
||||
size_t i1=0,i2=0;
|
||||
for (size_t k = 0; k < aln.size(); ++k)
|
||||
{
|
||||
@ -126,8 +126,8 @@ namespace ugdiss
|
||||
char const* const aln_start, char const* const aln_end,
|
||||
float & fwd_score, float& bwd_score)
|
||||
{
|
||||
vector<float> p1(e1,0), p2(e2,0);
|
||||
vector<int> c1(e1,0), c2(e2,0);
|
||||
std::vector<float> p1(e1,0), p2(e2,0);
|
||||
std::vector<int> c1(e1,0), c2(e2,0);
|
||||
size_t i1=0,i2=0;
|
||||
for (char const* x = aln_start; x < aln_end;)
|
||||
{
|
||||
|
@ -14,7 +14,7 @@
|
||||
#include "tpt_pickler.h"
|
||||
#include "ug_mm_2d_table.h"
|
||||
#include "util/exception.hh"
|
||||
using namespace std;
|
||||
// using namespace std;
|
||||
namespace ugdiss
|
||||
{
|
||||
|
||||
@ -22,7 +22,7 @@ namespace ugdiss
|
||||
class
|
||||
LexicalPhraseScorer2
|
||||
{
|
||||
vector<string> ftag;
|
||||
std::vector<string> ftag;
|
||||
public:
|
||||
typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> table_t;
|
||||
table_t COOC;
|
||||
@ -31,7 +31,7 @@ namespace ugdiss
|
||||
void
|
||||
score(TKN const* snt1, size_t const s1, size_t const e1,
|
||||
TKN const* snt2, size_t const s2, size_t const e2,
|
||||
vector<someint> const & aln, float const alpha,
|
||||
std::vector<someint> const & aln, float const alpha,
|
||||
float & fwd_score, float& bwd_score) const;
|
||||
|
||||
void
|
||||
@ -67,8 +67,8 @@ namespace ugdiss
|
||||
vector<someint> const & aln, float const alpha,
|
||||
float & fwd_score, float& bwd_score) const
|
||||
{
|
||||
vector<float> p1(e1,0), p2(e2,0);
|
||||
vector<int> c1(e1,0), c2(e2,0);
|
||||
std::vector<float> p1(e1,0), p2(e2,0);
|
||||
std::vector<int> c1(e1,0), c2(e2,0);
|
||||
size_t i1=0,i2=0;
|
||||
for (size_t k = 0; k < aln.size(); ++k)
|
||||
{
|
||||
@ -113,7 +113,7 @@ namespace ugdiss
|
||||
cerr << "[" << s << "," << t << "] "
|
||||
<< COOC.m1(s) << "/"
|
||||
<< COOC[s][t] << "/"
|
||||
<< COOC.m2(t) << endl;
|
||||
<< COOC.m2(t) << std::endl;
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
@ -141,8 +141,8 @@ namespace ugdiss
|
||||
char const* const aln_start, char const* const aln_end,
|
||||
float const alpha, float & fwd_score, float& bwd_score) const
|
||||
{
|
||||
vector<float> p1(e1,0), p2(e2,0);
|
||||
vector<int> c1(e1,0), c2(e2,0);
|
||||
std::vector<float> p1(e1,0), p2(e2,0);
|
||||
std::vector<int> c1(e1,0), c2(e2,0);
|
||||
size_t i1=0,i2=0;
|
||||
for (char const* x = aln_start; x < aln_end;)
|
||||
{
|
||||
|
@ -14,14 +14,14 @@
|
||||
|
||||
namespace lru_cache
|
||||
{
|
||||
using namespace std;
|
||||
using namespace boost;
|
||||
// using namespace std;
|
||||
// using namespace boost;
|
||||
|
||||
template<typename KEY, typename VAL>
|
||||
class LRU_Cache
|
||||
{
|
||||
public:
|
||||
typedef unordered_map<KEY,uint32_t> map_t;
|
||||
typedef boost::unordered_map<KEY,uint32_t> map_t;
|
||||
private:
|
||||
struct Record
|
||||
{
|
||||
@ -33,7 +33,7 @@ namespace lru_cache
|
||||
|
||||
mutable boost::shared_mutex m_lock;
|
||||
uint32_t m_qfront, m_qback;
|
||||
vector<Record> m_recs;
|
||||
std::vector<Record> m_recs;
|
||||
map_t m_idx;
|
||||
|
||||
void
|
||||
@ -84,7 +84,7 @@ namespace lru_cache
|
||||
set(KEY const& key, sptr<VAL> const& ptr)
|
||||
{
|
||||
boost::lock_guard<boost::shared_mutex> lock(m_lock);
|
||||
pair<typename map_t::iterator,bool> foo;
|
||||
std::pair<typename map_t::iterator,bool> foo;
|
||||
foo = m_idx.insert(make_pair(key,m_recs.size()));
|
||||
|
||||
uint32_t p = foo.first->second;
|
||||
|
@ -13,7 +13,7 @@
|
||||
namespace bio=boost::iostreams;
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
// using namespace std;
|
||||
template<typename OFFSET, typename ID, typename VAL, typename INIT>
|
||||
class
|
||||
mm2dTable
|
||||
@ -71,12 +71,12 @@ namespace ugdiss
|
||||
}
|
||||
|
||||
|
||||
void open(string fname);
|
||||
void open(std::string fname);
|
||||
void close();
|
||||
|
||||
Row operator[](ID key) const;
|
||||
|
||||
mm2dTable(string const fname="") { if (!fname.empty()) open(fname); };
|
||||
mm2dTable(std::string const fname="") { if (!fname.empty()) open(fname); };
|
||||
~mm2dTable() { file.reset(); };
|
||||
};
|
||||
|
||||
@ -110,25 +110,25 @@ namespace ugdiss
|
||||
template<typename OFFSET, typename ID, typename VAL, typename INIT>
|
||||
void
|
||||
mm2dTable<OFFSET,ID,VAL,INIT>::
|
||||
open(string fname)
|
||||
open(std::string fname)
|
||||
{
|
||||
// cout << "opening " << fname << " at " << __FILE__ << ":" << __LINE__ << endl;
|
||||
// cout << "opening " << fname << " at " << __FILE__ << ":" << __LINE__ << std::endl;
|
||||
if (access(fname.c_str(),R_OK))
|
||||
{
|
||||
ostringstream msg;
|
||||
std::ostringstream msg;
|
||||
msg << "[" << __FILE__ << ":" << __LINE__ <<"] FATAL ERROR: "
|
||||
<< "file '" << fname << " is not accessible." << endl;
|
||||
string foo = msg.str();
|
||||
<< "file '" << fname << " is not accessible." << std::endl;
|
||||
std::string foo = msg.str();
|
||||
UTIL_THROW(util::Exception,foo.c_str());
|
||||
}
|
||||
file.reset(new bio::mapped_file_source());
|
||||
file->open(fname);
|
||||
if (!file->is_open())
|
||||
{
|
||||
ostringstream msg;
|
||||
std::ostringstream msg;
|
||||
msg << "[" << __FILE__ << ":" << __LINE__ <<"] FATAL ERROR: "
|
||||
<< "Opening file '" << fname << "' failed." << endl;
|
||||
string foo = msg.str();
|
||||
<< "Opening file '" << fname << "' failed." << std::endl;
|
||||
std::string foo = msg.str();
|
||||
UTIL_THROW(util::Exception,foo.c_str());
|
||||
}
|
||||
char const* p = file->data();
|
||||
@ -137,15 +137,15 @@ namespace ugdiss
|
||||
numRows = *reinterpret_cast<ID const*>(p); p += sizeof(id_type);
|
||||
numCols = *reinterpret_cast<ID const*>(p); p += sizeof(id_type);
|
||||
data = reinterpret_cast<Cell const*>(p);
|
||||
// cout << numRows << " rows; " << numCols << " columns " << endl;
|
||||
// cout << numRows << " rows; " << numCols << " columns " << std::endl;
|
||||
M1 = reinterpret_cast<VAL const*>(index+numRows+1);
|
||||
M2 = M1+numRows;
|
||||
// cout << "Table " << fname << " has " << numRows << " rows and "
|
||||
// << numCols << " columns." << endl;
|
||||
// << numCols << " columns." << std::endl;
|
||||
// cout << "File size is " << file.size()*1024 << " bytes; ";
|
||||
// cout << "M2 starts " << (reinterpret_cast<char const*>(M2) - file.data())
|
||||
// << " bytes into the file" << endl;
|
||||
// cout << M2[0] << endl;
|
||||
// << " bytes into the file" << std::endl;
|
||||
// cout << M2[0] << std::endl;
|
||||
}
|
||||
|
||||
template<
|
||||
@ -156,15 +156,15 @@ namespace ugdiss
|
||||
typename ICONT // inner container type
|
||||
>
|
||||
void
|
||||
write_mm_2d_table(ostream& out, vector<ICONT> const& T,
|
||||
vector<VAL> const* m1 = NULL,
|
||||
vector<VAL> const* m2 = NULL)
|
||||
write_mm_2d_table(std::ostream& out, std::vector<ICONT> const& T,
|
||||
std::vector<VAL> const* m1 = NULL,
|
||||
std::vector<VAL> const* m2 = NULL)
|
||||
{
|
||||
assert(T.size());
|
||||
typedef typename ICONT::const_iterator iter;
|
||||
|
||||
// compute marginals if necessary
|
||||
vector<VAL> m1x,m2x;
|
||||
std::vector<VAL> m1x,m2x;
|
||||
if (!m1)
|
||||
{
|
||||
m1x.resize(T.size(),INIT(0));
|
||||
@ -191,7 +191,7 @@ namespace ugdiss
|
||||
numwrite(out,id_type(m2->size())); // number of columns
|
||||
|
||||
// write actual table
|
||||
vector<OFFSET> index;
|
||||
std::vector<OFFSET> index;
|
||||
size_t ctr =0;
|
||||
index.reserve(m1->size()+1);
|
||||
for (ID r = 0; r < ID(T.size()); ++r)
|
||||
|
@ -35,7 +35,7 @@ namespace Moses
|
||||
// in the future, we might also allow listing documents with
|
||||
// sentence ranges.
|
||||
string buffer,docname; size_t a=0,b;
|
||||
this->m_sid2docid.reset(new vector<id_type>(this->T1->size()));
|
||||
this->m_sid2docid.reset(new std::vector<id_type>(this->T1->size()));
|
||||
while(getline(docmap,buffer))
|
||||
{
|
||||
istringstream line(buffer);
|
||||
@ -46,7 +46,7 @@ namespace Moses
|
||||
this->m_docname.push_back(docname);
|
||||
line >> b;
|
||||
#ifndef NO_MOSES
|
||||
VERBOSE(1, "DOCUMENT MAP " << docname << " " << a << "-" << b+a << endl);
|
||||
VERBOSE(1, "DOCUMENT MAP " << docname << " " << a << "-" << b+a << std::endl);
|
||||
#endif
|
||||
for (b += a; a < b; ++a)
|
||||
(*this->m_sid2docid)[a] = docid;
|
||||
|
@ -19,7 +19,7 @@
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
// using namespace std;
|
||||
namespace bio=boost::iostreams;
|
||||
|
||||
template<typename TOKEN>
|
||||
@ -43,8 +43,8 @@ namespace ugdiss
|
||||
|
||||
public:
|
||||
mmTSA();
|
||||
mmTSA(string fname, Ttrack<TOKEN> const* c);
|
||||
void open(string fname, typename boost::shared_ptr<Ttrack<TOKEN> const> c);
|
||||
mmTSA(std::string fname, Ttrack<TOKEN> const* c);
|
||||
void open(std::string fname, typename boost::shared_ptr<Ttrack<TOKEN> const> c);
|
||||
|
||||
count_type
|
||||
sntCnt(char const* p, char const * const q) const;
|
||||
@ -109,7 +109,7 @@ namespace ugdiss
|
||||
|
||||
template<typename TOKEN>
|
||||
mmTSA<TOKEN>::
|
||||
mmTSA(string fname, Ttrack<TOKEN> const* c)
|
||||
mmTSA(std::string fname, Ttrack<TOKEN> const* c)
|
||||
{
|
||||
open(fname,c);
|
||||
}
|
||||
@ -119,12 +119,12 @@ namespace ugdiss
|
||||
template<typename TOKEN>
|
||||
void
|
||||
mmTSA<TOKEN>::
|
||||
open(string fname, typename boost::shared_ptr<Ttrack<TOKEN> const> c)
|
||||
open(std::string fname, typename boost::shared_ptr<Ttrack<TOKEN> const> c)
|
||||
{
|
||||
this->bsc.reset(new BitSetCache<TSA<TOKEN> >(this));
|
||||
if (access(fname.c_str(),F_OK))
|
||||
{
|
||||
ostringstream msg;
|
||||
std::ostringstream msg;
|
||||
msg << "mmTSA<>::open: File '" << fname << "' does not exist.";
|
||||
throw std::runtime_error(msg.str().c_str());
|
||||
}
|
||||
@ -137,7 +137,7 @@ namespace ugdiss
|
||||
p = numread(p,idxOffset);
|
||||
p = numread(p,this->indexSize);
|
||||
|
||||
// cerr << fname << ": " << idxOffset << " " << this->indexSize << endl;
|
||||
// cerr << fname << ": " << idxOffset << " " << this->indexSize << std::endl;
|
||||
|
||||
this->startArray = p;
|
||||
this->index = reinterpret_cast<filepos_type const*>(file.data()+idxOffset);
|
||||
@ -243,7 +243,7 @@ namespace ugdiss
|
||||
{
|
||||
raw = 0;
|
||||
id_type sid; uint16_t off;
|
||||
boost::dynamic_bitset<typename ::uint64_t> check(this->corpus->size());
|
||||
boost::dynamic_bitset<uint64_t> check(this->corpus->size());
|
||||
while (p < q)
|
||||
{
|
||||
p = tightread(p,q,sid);
|
||||
|
@ -24,7 +24,7 @@
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
// using namespace std;
|
||||
namespace bio=boost::iostreams;
|
||||
|
||||
template<typename TKN=id_type>
|
||||
@ -42,7 +42,7 @@ namespace ugdiss
|
||||
* of more than four billion words)
|
||||
*/
|
||||
public:
|
||||
mmTtrack(string fname);
|
||||
mmTtrack(std::string fname);
|
||||
mmTtrack();
|
||||
|
||||
// return pointer to beginning of sentence
|
||||
@ -58,20 +58,20 @@ namespace ugdiss
|
||||
size_t numTokens() const;
|
||||
|
||||
// open an mmTtrack file
|
||||
void open(string fname);
|
||||
void open(std::string fname);
|
||||
|
||||
// FUNCTIONS FOR BUILDING CORPUS TRACKS
|
||||
// write a blank file header at the beginning of a new ttrack file
|
||||
void write_blank_file_header(ostream& out) const;
|
||||
void write_blank_file_header(std::ostream& out) const;
|
||||
|
||||
// write the sentence index /idx/ and fill the file header
|
||||
void write_index_and_finalize(ostream& out,
|
||||
vector<id_type> const& idx,
|
||||
void write_index_and_finalize(std::ostream& out,
|
||||
std::vector<id_type> const& idx,
|
||||
count_type tokenCount) const;
|
||||
|
||||
// copy a contiguous sequence of sentences to another stream
|
||||
// return the number of tokens copied
|
||||
id_type copySentences(ostream& trg, id_type start, id_type stop) const;
|
||||
id_type copySentences(std::ostream& trg, id_type start, id_type stop) const;
|
||||
|
||||
/** find the sentence id of a given token */
|
||||
id_type findSid(TKN const* t) const;
|
||||
@ -79,7 +79,7 @@ namespace ugdiss
|
||||
id_type findSid(id_type tokenOffset) const;
|
||||
|
||||
/// re-assign ids based on the id maps in /f/
|
||||
void remap(string const fname, vector<id_type const*> const & f) const;
|
||||
void remap(std::string const fname, std::vector<id_type const*> const & f) const;
|
||||
|
||||
};
|
||||
|
||||
@ -87,7 +87,7 @@ namespace ugdiss
|
||||
template<typename TKN>
|
||||
void
|
||||
mmTtrack<TKN>::
|
||||
remap(string const fname, vector<id_type const*> const & f) const
|
||||
remap(std::string const fname, std::vector<id_type const*> const & f) const
|
||||
{
|
||||
bio::mapped_file myfile(fname);
|
||||
assert(myfile.is_open());
|
||||
@ -128,8 +128,9 @@ namespace ugdiss
|
||||
{
|
||||
if (sid >= this->numSent)
|
||||
{
|
||||
cerr << "Fatal error: requested sentence #"<<sid<<" is beyond corpus size ("
|
||||
<< this->numSent <<")" << endl;
|
||||
std::cerr << "Fatal error: requested sentence #"
|
||||
<< sid <<" is beyond corpus size ("
|
||||
<< this->numSent <<")" << std::endl;
|
||||
}
|
||||
assert(sid < this->numSent);
|
||||
return data+index[sid];
|
||||
@ -155,7 +156,7 @@ namespace ugdiss
|
||||
|
||||
template<typename TKN>
|
||||
mmTtrack<TKN>::
|
||||
mmTtrack(string fname)
|
||||
mmTtrack(std::string fname)
|
||||
{
|
||||
open(fname);
|
||||
}
|
||||
@ -163,18 +164,18 @@ namespace ugdiss
|
||||
template<typename TKN>
|
||||
void
|
||||
mmTtrack<TKN>::
|
||||
open(string fname)
|
||||
open(std::string fname)
|
||||
{
|
||||
if (access(fname.c_str(),F_OK))
|
||||
{
|
||||
ostringstream msg;
|
||||
std::ostringstream msg;
|
||||
msg << "mmTtrack<>::open: File '" << fname << "' does not exist.";
|
||||
throw std::runtime_error(msg.str().c_str());
|
||||
}
|
||||
file.open(fname);
|
||||
if (!file.is_open())
|
||||
{
|
||||
cerr << "Error opening file " << fname << endl;
|
||||
std::cerr << "Error opening file " << fname << std::endl;
|
||||
assert(0);
|
||||
}
|
||||
filepos_type idxOffset;
|
||||
@ -210,7 +211,7 @@ namespace ugdiss
|
||||
template<typename TKN>
|
||||
void
|
||||
mmTtrack<TKN>::
|
||||
write_blank_file_header(ostream& out) const
|
||||
write_blank_file_header(std::ostream& out) const
|
||||
{
|
||||
numwrite(out,filepos_type(0)); // place holder for index start
|
||||
numwrite(out,id_type(0)); // place holder for index size
|
||||
@ -220,8 +221,8 @@ namespace ugdiss
|
||||
template<typename TKN>
|
||||
void
|
||||
mmTtrack<TKN>::
|
||||
write_index_and_finalize(ostream& out,
|
||||
vector<id_type>const& idx,
|
||||
write_index_and_finalize(std::ostream& out,
|
||||
std::vector<id_type>const& idx,
|
||||
id_type tokenCount) const
|
||||
{
|
||||
id_type idxSize = idx.size();
|
||||
@ -237,7 +238,7 @@ namespace ugdiss
|
||||
template<typename TKN>
|
||||
id_type
|
||||
mmTtrack<TKN>::
|
||||
copySentences(ostream& trg, id_type start, id_type stop) const
|
||||
copySentences(std::ostream& trg, id_type start, id_type stop) const
|
||||
{
|
||||
assert(stop > start);
|
||||
TKN const* a = sntStart(start);
|
||||
|
@ -31,8 +31,8 @@
|
||||
#include "ug_corpus_token.h"
|
||||
#include "tpt_pickler.h"
|
||||
|
||||
using namespace ugdiss;
|
||||
using namespace std;
|
||||
// using namespace ugdiss;
|
||||
// using namespace std;
|
||||
namespace Moses {
|
||||
|
||||
typedef L2R_Token<SimpleWordId> Token;
|
||||
@ -43,7 +43,7 @@ namespace Moses {
|
||||
public:
|
||||
typedef mmTSA<Token>::tree_iterator iter;
|
||||
class pstats; // one-sided phrase statistics
|
||||
class jstats; // phrase pair ("joint") statistics
|
||||
class jstats; // phrase std::pair ("joint") statistics
|
||||
class agenda
|
||||
{
|
||||
boost::mutex lock;
|
||||
@ -51,7 +51,7 @@ namespace Moses {
|
||||
class job;
|
||||
class worker;
|
||||
list<job> joblist;
|
||||
vector<sptr<boost::thread> > workers;
|
||||
std::vector<sptr<boost::thread> > workers;
|
||||
bool shutdown;
|
||||
size_t doomed;
|
||||
public:
|
||||
@ -83,7 +83,7 @@ namespace Moses {
|
||||
find_trg_phr_bounds
|
||||
(size_t const sid, size_t const start, size_t const stop,
|
||||
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
|
||||
vector<uchar> * core_alignment, bool const flip) const;
|
||||
std::vector<uchar> * core_alignment, bool const flip) const;
|
||||
|
||||
boost::unordered_map<uint64_t,sptr<pstats> > cache1,cache2;
|
||||
private:
|
||||
@ -99,22 +99,22 @@ namespace Moses {
|
||||
void prep(iter const& phrase);
|
||||
};
|
||||
|
||||
// "joint" (i.e., phrase pair) statistics
|
||||
// "joint" (i.e., phrase std::pair) statistics
|
||||
class
|
||||
mmbitext::
|
||||
jstats
|
||||
{
|
||||
uint32_t my_rcnt; // unweighted count
|
||||
float my_wcnt; // weighted count
|
||||
vector<pair<size_t, vector<uchar> > > my_aln;
|
||||
std::vector<pair<size_t, vector<uchar> > > my_aln;
|
||||
boost::mutex lock;
|
||||
public:
|
||||
jstats();
|
||||
jstats(jstats const& other);
|
||||
uint32_t rcnt() const;
|
||||
float wcnt() const;
|
||||
vector<pair<size_t, vector<uchar> > > const & aln() const;
|
||||
void add(float w, vector<uchar> const& a);
|
||||
std::vector<pair<size_t, vector<uchar> > > const & aln() const;
|
||||
void add(float w, std::vector<uchar> const& a);
|
||||
};
|
||||
|
||||
// struct
|
||||
@ -151,11 +151,11 @@ namespace Moses {
|
||||
size_t in_progress; // keeps track of how many threads are currently working on this
|
||||
boost::unordered_map<uint64_t, jstats> trg;
|
||||
pstats();
|
||||
// vector<phrase> nbest;
|
||||
// std::vector<phrase> nbest;
|
||||
// void select_nbest(size_t const N=10);
|
||||
void release();
|
||||
void register_worker();
|
||||
void add(mmbitext::iter const& trg_phrase, float const w, vector<uchar> const& a);
|
||||
void add(mmbitext::iter const& trg_phrase, float const w, std::vector<uchar> const& a);
|
||||
};
|
||||
|
||||
class
|
||||
|
@ -12,6 +12,9 @@ namespace Moses
|
||||
{
|
||||
namespace bitext
|
||||
{
|
||||
|
||||
using ugdiss::TokenIndex;
|
||||
|
||||
template<typename Token>
|
||||
class
|
||||
PhrasePair
|
||||
@ -27,7 +30,7 @@ namespace Moses
|
||||
std::vector<float> fvals;
|
||||
float dfwd[Moses::LRModel::NONE+1]; // distortion counts // counts or probs?
|
||||
float dbwd[Moses::LRModel::NONE+1]; // distortion counts
|
||||
std::vector<uchar> aln;
|
||||
std::vector<unsigned char> aln;
|
||||
float score;
|
||||
bool inverse;
|
||||
// std::vector<uint32_t> indoc;
|
||||
@ -54,10 +57,10 @@ namespace Moses
|
||||
void
|
||||
fill_lr_vec(LRModel::Direction const& dir,
|
||||
LRModel::ModelType const& mdl,
|
||||
vector<float>& v) const;
|
||||
std::vector<float>& v) const;
|
||||
#ifndef NO_MOSES
|
||||
void
|
||||
print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
|
||||
print(std::ostream& out, TokenIndex const& V1, TokenIndex const& V2,
|
||||
LRModel const& LR) const;
|
||||
#endif
|
||||
|
||||
@ -271,7 +274,7 @@ namespace Moses
|
||||
PhrasePair<Token>
|
||||
::fill_lr_vec(LRModel::Direction const& dir,
|
||||
LRModel::ModelType const& mdl,
|
||||
vector<float>& v) const
|
||||
std::vector<float>& v) const
|
||||
{
|
||||
// how many distinct scores do we have?
|
||||
size_t num_scores = (mdl == LRModel::MSLR ? 4 : mdl == LRModel::MSD ? 3 : 2);
|
||||
@ -301,7 +304,7 @@ namespace Moses
|
||||
template<typename Token>
|
||||
void
|
||||
PhrasePair<Token>
|
||||
::print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
|
||||
::print(std::ostream& out, TokenIndex const& V1, TokenIndex const& V2,
|
||||
LRModel const& LR) const
|
||||
{
|
||||
out << toString (V1, this->start1, this->len1) << " ::: "
|
||||
@ -315,14 +318,14 @@ namespace Moses
|
||||
out << m->first << ":" << m->second;
|
||||
}
|
||||
out << "] [";
|
||||
vector<float> lrscores;
|
||||
std::vector<float> lrscores;
|
||||
this->fill_lr_vec(LR.GetDirection(), LR.GetModelType(), lrscores);
|
||||
for (size_t i = 0; i < lrscores.size(); ++i)
|
||||
{
|
||||
if (i) out << " ";
|
||||
out << boost::format("%.2f") % exp(lrscores[i]);
|
||||
}
|
||||
out << "]" << endl;
|
||||
out << "]" << std::endl;
|
||||
#if 0
|
||||
for (int i = 0; i <= Moses::LRModel::NONE; i++)
|
||||
{
|
||||
|
@ -2,7 +2,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include<vector>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "moses/Util.h"
|
||||
|
@ -21,8 +21,8 @@
|
||||
namespace ugdiss
|
||||
{
|
||||
|
||||
using namespace std;
|
||||
using namespace boost;
|
||||
// using namespace std;
|
||||
// using namespace boost;
|
||||
namespace bio=boost::iostreams;
|
||||
|
||||
template<typename TKN>
|
||||
@ -56,7 +56,7 @@ namespace ugdiss
|
||||
typedef boost::shared_ptr<bitvector> bitset_pointer;
|
||||
typedef TKN Token;
|
||||
typedef BitSetCache<TSA<TKN> > BSC_t;
|
||||
/* to allow caching of bit vectors that are expensive to create on
|
||||
/* to allow caching of bit std::vectors that are expensive to create on
|
||||
* the fly */
|
||||
|
||||
friend class TSA_tree_iterator<TKN>;
|
||||
@ -148,8 +148,8 @@ namespace ugdiss
|
||||
* [keyStart,keyStop)
|
||||
*/
|
||||
char const*
|
||||
lower_bound(typename vector<TKN>::const_iterator const& keyStart,
|
||||
typename vector<TKN>::const_iterator const& keyStop) const;
|
||||
lower_bound(typename std::vector<TKN>::const_iterator const& keyStart,
|
||||
typename std::vector<TKN>::const_iterator const& keyStop) const;
|
||||
char const*
|
||||
lower_bound(TKN const* keyStart, TKN const* keyStop) const;
|
||||
|
||||
@ -160,29 +160,29 @@ namespace ugdiss
|
||||
* [keyStart,keyStop)
|
||||
*/
|
||||
char const*
|
||||
upper_bound(typename vector<TKN>::const_iterator const& keyStart,
|
||||
typename vector<TKN>::const_iterator const& keyStop) const;
|
||||
upper_bound(typename std::vector<TKN>::const_iterator const& keyStart,
|
||||
typename std::vector<TKN>::const_iterator const& keyStop) const;
|
||||
|
||||
char const*
|
||||
upper_bound(TKN const* keyStart, int keyLength) const;
|
||||
|
||||
|
||||
/** dump all suffixes in order to /out/ */
|
||||
void dump(ostream& out, TokenIndex const& T) const;
|
||||
void dump(std::ostream& out, TokenIndex const& T) const;
|
||||
|
||||
/** fill the dynamic bit set with true for all sentences that contain
|
||||
* /phrase/.
|
||||
* @return the raw number of occurrences.
|
||||
*/
|
||||
count_type
|
||||
fillBitSet(vector<TKN> const& phrase, bdBitset& dest) const;
|
||||
fillBitSet(std::vector<TKN> const& phrase, bdBitset& dest) const;
|
||||
|
||||
count_type
|
||||
fillBitSet(TKN const* key, size_t keyLen, bdBitset& dest) const;
|
||||
|
||||
count_type
|
||||
setBits(char const* startRange, char const* endRange,
|
||||
boost::dynamic_bitset<typename ::uint64_t>& bs) const;
|
||||
boost::dynamic_bitset<uint64_t>& bs) const;
|
||||
|
||||
void
|
||||
setTokenBits(char const* startRange, char const* endRange, size_t len,
|
||||
@ -246,11 +246,11 @@ namespace ugdiss
|
||||
getCounts(char const* p, char const* const q,
|
||||
count_type& sids, count_type& raw) const = 0;
|
||||
|
||||
string
|
||||
std::string
|
||||
suffixAt(char const* p, TokenIndex const* V=NULL, size_t maxlen=0)
|
||||
const;
|
||||
|
||||
string
|
||||
std::string
|
||||
suffixAt(ArrayEntry const& I, TokenIndex const* V=NULL, size_t maxlen=0)
|
||||
const;
|
||||
|
||||
@ -269,18 +269,18 @@ namespace ugdiss
|
||||
next 16 bits: length of the phrase
|
||||
*/
|
||||
::uint64_t
|
||||
getSequenceId(typename vector<TKN>::const_iterator const& pstart,
|
||||
typename vector<TKN>::const_iterator const& pstop) const;
|
||||
getSequenceId(typename std::vector<TKN>::const_iterator const& pstart,
|
||||
typename std::vector<TKN>::const_iterator const& pstop) const;
|
||||
|
||||
::uint64_t
|
||||
getSequenceId(TKN const* t, ushort plen) const;
|
||||
|
||||
/** Return the phrase represented by phrase ID pid_ */
|
||||
string
|
||||
std::string
|
||||
getSequence(::uint64_t pid, TokenIndex const& V) const;
|
||||
|
||||
/** Return the phrase represented by phrase ID pid_ */
|
||||
vector<TKN>
|
||||
std::vector<TKN>
|
||||
getSequence(::uint64_t pid) const;
|
||||
|
||||
TKN const*
|
||||
@ -308,7 +308,7 @@ namespace ugdiss
|
||||
|
||||
bool
|
||||
findBranches(TKN const* base, bitvector const& terminals,
|
||||
vector<tree_iterator>& dest) const;
|
||||
std::vector<tree_iterator>& dest) const;
|
||||
|
||||
double aveIndexEntrySize() const
|
||||
{
|
||||
@ -356,7 +356,7 @@ namespace ugdiss
|
||||
template<typename TKN>
|
||||
count_type
|
||||
TSA<TKN>::
|
||||
fillBitSet(vector<TKN> const& key,
|
||||
fillBitSet(std::vector<TKN> const& key,
|
||||
bitvector& bitset) const
|
||||
{
|
||||
if (!key.size()) return 0;
|
||||
@ -555,8 +555,8 @@ namespace ugdiss
|
||||
template<typename TKN>
|
||||
char const*
|
||||
TSA<TKN>::
|
||||
lower_bound(typename vector<TKN>::const_iterator const& keyStart,
|
||||
typename vector<TKN>::const_iterator const& keyStop) const
|
||||
lower_bound(typename std::vector<TKN>::const_iterator const& keyStart,
|
||||
typename std::vector<TKN>::const_iterator const& keyStop) const
|
||||
{
|
||||
TKN const* const a = &(*keyStart);
|
||||
TKN const* const z = &(*keyStop);
|
||||
@ -597,8 +597,8 @@ namespace ugdiss
|
||||
template<typename TKN>
|
||||
char const*
|
||||
TSA<TKN>::
|
||||
upper_bound(typename vector<TKN>::const_iterator const& keyStart,
|
||||
typename vector<TKN>::const_iterator const& keyStop) const
|
||||
upper_bound(typename std::vector<TKN>::const_iterator const& keyStart,
|
||||
typename std::vector<TKN>::const_iterator const& keyStop) const
|
||||
{
|
||||
TKN const* const a = &((TKN)*keyStart);
|
||||
TKN const* const z = &((TKN)*keyStop);
|
||||
@ -631,7 +631,7 @@ namespace ugdiss
|
||||
{
|
||||
char const* lo = lower_bound(keyStart,keyLen);
|
||||
char const* up = upper_bound(keyStart,keyLen);
|
||||
// cerr << up-lo << endl;
|
||||
// cerr << up-lo << std::endl;
|
||||
return rawCnt(lo,up);
|
||||
}
|
||||
|
||||
@ -640,8 +640,8 @@ namespace ugdiss
|
||||
template<typename TKN>
|
||||
::uint64_t
|
||||
TSA<TKN>::
|
||||
getSequenceId(typename vector<TKN>::const_iterator const& pstart,
|
||||
typename vector<TKN>::const_iterator const& pstop) const
|
||||
getSequenceId(typename std::vector<TKN>::const_iterator const& pstart,
|
||||
typename std::vector<TKN>::const_iterator const& pstop) const
|
||||
{
|
||||
return getSequenceId(&(*pstart),pstop-pstart);
|
||||
}
|
||||
@ -668,14 +668,14 @@ namespace ugdiss
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
vector<TKN>
|
||||
std::vector<TKN>
|
||||
TSA<TKN>::
|
||||
getSequence(::uint64_t pid) const
|
||||
{
|
||||
size_t plen = pid % 65536;
|
||||
size_t offset = (pid >> 16) % 65536;
|
||||
TKN const* w = corpus->sntStart(pid >> 32)+offset;
|
||||
vector<TKN> ret(plen);
|
||||
std::vector<TKN> ret(plen);
|
||||
for (size_t i = 0; i < plen; i++, w = w->next())
|
||||
{
|
||||
assert(w);
|
||||
@ -685,11 +685,11 @@ namespace ugdiss
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
string
|
||||
std::string
|
||||
TSA<TKN>::
|
||||
getSequence(::uint64_t pid, TokenIndex const& V) const
|
||||
{
|
||||
ostringstream buf;
|
||||
std::ostringstream buf;
|
||||
TKN const* a = getSequenceStart(pid);
|
||||
buf << V[a->id()];
|
||||
size_t len = getSequenceLength(pid);
|
||||
@ -806,7 +806,7 @@ namespace ugdiss
|
||||
bool
|
||||
TSA<TKN>::
|
||||
findBranches(TKN const* base, bitvector const& terminals,
|
||||
vector<tree_iterator>& dest) const
|
||||
std::vector<tree_iterator>& dest) const
|
||||
{
|
||||
dest.assign(terminals.count(),tree_iterator(this));
|
||||
for (size_t i = terminals.find_first(), k = 0;
|
||||
|
@ -9,24 +9,24 @@
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
// A simple mechanism for caching bit vectors representing occurrences of token
|
||||
// A simple mechanism for caching bit std::vectors representing occurrences of token
|
||||
// sequences in a corpus. Useful for very frequent items for which the bit
|
||||
// vector is expensive to create on the fly. The variable threshold determines
|
||||
// when bit vectors are cached and when they are created on the fly, using the
|
||||
// std::vector is expensive to create on the fly. The variable threshold determines
|
||||
// when bit std::vectors are cached and when they are created on the fly, using the
|
||||
// size of the range of entries in the TSA's index in bytes to determine
|
||||
// whether or not to store the respective bit vector in the cache.
|
||||
// whether or not to store the respective bit std::vector in the cache.
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
// using namespace std;
|
||||
template<typename TSA>
|
||||
class
|
||||
BitSetCache
|
||||
{
|
||||
public:
|
||||
typedef boost::dynamic_bitset<typename ::uint64_t> BitSet;
|
||||
typedef boost::dynamic_bitset<uint64_t> BitSet;
|
||||
typedef boost::shared_ptr<BitSet> bsptr;
|
||||
typedef map<pair<char const*,ushort>,bsptr> myMap;
|
||||
typedef std::map<std::pair<char const*,ushort>,bsptr> myMap;
|
||||
typedef myMap::iterator myMapIter;
|
||||
private:
|
||||
TSA const* tsa;
|
||||
@ -56,7 +56,7 @@ namespace ugdiss
|
||||
if (!lo) return ret;
|
||||
if (up-lo > threshold)
|
||||
{
|
||||
pair<char const*,ushort> k(lo,keyLen);
|
||||
std::pair<char const*,ushort> k(lo,keyLen);
|
||||
myMapIter m = cached1.find(k);
|
||||
if (m != cached1.end())
|
||||
ret = m->second;
|
||||
@ -83,9 +83,9 @@ namespace ugdiss
|
||||
if (!lo) return ret;
|
||||
if (up-lo > threshold)
|
||||
{
|
||||
pair<char const*,ushort> k(lo,keyLen);
|
||||
std::pair<char const*,ushort> k(lo,keyLen);
|
||||
// cout << "bla " << keyStart->id() << " "
|
||||
// << cached2.size() << " " << up-lo << " " << k.second << endl;
|
||||
// << cached2.size() << " " << up-lo << " " << k.second << std::endl;
|
||||
myMapIter m = cached2.find(k);
|
||||
if (m != cached2.end())
|
||||
ret = m->second;
|
||||
|
@ -21,11 +21,11 @@ namespace ugdiss
|
||||
#define _DISPLAY_CHAIN
|
||||
// for debugging only
|
||||
template<typename T>
|
||||
void display(T const* x, string label)
|
||||
void display(T const* x, std::string label)
|
||||
{
|
||||
cout << label << ":";
|
||||
for (;x;x=next(x)) cout << " " << x->lemma;
|
||||
cout << endl;
|
||||
std::cout << label << ":";
|
||||
for (;x;x=next(x)) std::cout << " " << x->lemma;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -47,11 +47,11 @@ namespace ugdiss
|
||||
TSA_tree_iterator
|
||||
{
|
||||
protected:
|
||||
vector<char const*> lower;
|
||||
vector<char const*> upper;
|
||||
std::vector<char const*> lower;
|
||||
std::vector<char const*> upper;
|
||||
|
||||
// for debugging ...
|
||||
void showBounds(ostream& out) const;
|
||||
void showBounds(std::ostream& out) const;
|
||||
public:
|
||||
typedef TKN Token;
|
||||
|
||||
@ -76,7 +76,7 @@ namespace ugdiss
|
||||
bool full_match_only=true);
|
||||
TSA_tree_iterator(TSA<Token> const* s,
|
||||
TokenIndex const& V,
|
||||
string const& key);
|
||||
std::string const& key);
|
||||
|
||||
char const* lower_bound(int p) const;
|
||||
char const* upper_bound(int p) const;
|
||||
@ -96,7 +96,7 @@ namespace ugdiss
|
||||
virtual bool over();
|
||||
virtual bool up();
|
||||
|
||||
string str(TokenIndex const* V=NULL, int start=0, int stop=0) const;
|
||||
std::string str(TokenIndex const* V=NULL, int start=0, int stop=0) const;
|
||||
|
||||
// checks if the sentence [start,stop) contains the given sequence.
|
||||
bool match(Token const* start, Token const* stop) const;
|
||||
@ -105,23 +105,23 @@ namespace ugdiss
|
||||
|
||||
// fillBitSet: deprecated; use markSentences() instead
|
||||
count_type
|
||||
fillBitSet(boost::dynamic_bitset<typename ::uint64_t>& bitset) const;
|
||||
fillBitSet(boost::dynamic_bitset<uint64_t>& bitset) const;
|
||||
|
||||
count_type
|
||||
markEndOfSequence(Token const* start, Token const* stop,
|
||||
boost::dynamic_bitset<typename ::uint64_t>& dest) const;
|
||||
boost::dynamic_bitset<uint64_t>& dest) const;
|
||||
count_type
|
||||
markSequence(Token const* start, Token const* stop, bitvector& dest) const;
|
||||
|
||||
count_type
|
||||
markSentences(boost::dynamic_bitset<typename ::uint64_t>& bitset) const;
|
||||
markSentences(boost::dynamic_bitset<uint64_t>& bitset) const;
|
||||
|
||||
count_type
|
||||
markOccurrences(boost::dynamic_bitset<typename ::uint64_t>& bitset,
|
||||
markOccurrences(boost::dynamic_bitset<uint64_t>& bitset,
|
||||
bool markOnlyStartPosition=false) const;
|
||||
|
||||
count_type
|
||||
markOccurrences(vector<ushort>& dest) const;
|
||||
markOccurrences(std::vector<ushort>& dest) const;
|
||||
|
||||
::uint64_t
|
||||
getSequenceId() const;
|
||||
@ -181,7 +181,7 @@ namespace ugdiss
|
||||
return this->size();
|
||||
}
|
||||
|
||||
sptr<vector<typename ttrack::Position> >
|
||||
sptr<std::vector<typename ttrack::Position> >
|
||||
randomSample(int level, size_t N) const;
|
||||
|
||||
};
|
||||
@ -286,7 +286,7 @@ namespace ugdiss
|
||||
// display(root->corpus->getToken(U),"U1");
|
||||
|
||||
int x = root->corpus->cmp(U,L,lower.size()-1);
|
||||
// cerr << "x=" << x << endl;
|
||||
// cerr << "x=" << x << std::endl;
|
||||
if (x != 1)
|
||||
return false;
|
||||
lower.back() = upper.back();
|
||||
@ -359,10 +359,10 @@ namespace ugdiss
|
||||
TSA_tree_iterator<Token>::
|
||||
TSA_tree_iterator(TSA<Token> const* s,
|
||||
TokenIndex const& V,
|
||||
string const& key)
|
||||
std::string const& key)
|
||||
: root(s)
|
||||
{
|
||||
istringstream buf(key); string w;
|
||||
std::istringstream buf(key); std::string w;
|
||||
while (buf >> w)
|
||||
{
|
||||
if (this->extend(V[w]))
|
||||
@ -482,8 +482,8 @@ namespace ugdiss
|
||||
#if 0
|
||||
tsa::ArrayEntry I;
|
||||
root->readEntry(lo,I);
|
||||
cout << I.sid << " " << I.offset << endl;
|
||||
cout << root->corpus->sntLen(I.sid) << endl;
|
||||
cout << I.sid << " " << I.offset << std::endl;
|
||||
cout << root->corpus->sntLen(I.sid) << std::endl;
|
||||
#endif
|
||||
hi = root->find_end(lo, hi, getToken(0), 1, 0);
|
||||
upper.push_back(hi);
|
||||
@ -574,11 +574,11 @@ namespace ugdiss
|
||||
Token const* eos = root->corpus->sntEnd(A.sid);
|
||||
#endif
|
||||
if (p < 0) p += lower.size();
|
||||
// cerr << p << ". " << t->id() << endl;
|
||||
// cerr << p << ". " << t->id() << std::endl;
|
||||
while (p-- > 0)
|
||||
{
|
||||
t = next(t);
|
||||
// if (t) cerr << p << ". " << t->id() << endl;
|
||||
// if (t) cerr << p << ". " << t->id() << std::endl;
|
||||
assert(t >= bos && t < eos);
|
||||
}
|
||||
return t;
|
||||
@ -616,7 +616,7 @@ namespace ugdiss
|
||||
template<typename Token>
|
||||
count_type
|
||||
TSA_tree_iterator<Token>::
|
||||
fillBitSet(boost::dynamic_bitset<typename ::uint64_t>& bitset) const
|
||||
fillBitSet(boost::dynamic_bitset<uint64_t>& bitset) const
|
||||
{
|
||||
return markSentences(bitset);
|
||||
}
|
||||
@ -626,7 +626,7 @@ namespace ugdiss
|
||||
template<typename Token>
|
||||
count_type
|
||||
TSA_tree_iterator<Token>::
|
||||
markSentences(boost::dynamic_bitset<typename ::uint64_t>& bitset) const
|
||||
markSentences(boost::dynamic_bitset<uint64_t>& bitset) const
|
||||
{
|
||||
assert(root && root->corpus);
|
||||
bitset.resize(root->corpus->size());
|
||||
@ -653,7 +653,7 @@ namespace ugdiss
|
||||
template<typename Token>
|
||||
count_type
|
||||
TSA_tree_iterator<Token>::
|
||||
markOccurrences(boost::dynamic_bitset<typename ::uint64_t>& bitset, bool markOnlyStartPosition) const
|
||||
markOccurrences(boost::dynamic_bitset<uint64_t>& bitset, bool markOnlyStartPosition) const
|
||||
{
|
||||
assert(root && root->corpus);
|
||||
if (bitset.size() != root->corpus->numTokens())
|
||||
@ -669,7 +669,7 @@ namespace ugdiss
|
||||
template<typename Token>
|
||||
count_type
|
||||
TSA_tree_iterator<Token>::
|
||||
markOccurrences(vector<ushort>& dest) const
|
||||
markOccurrences(std::vector<ushort>& dest) const
|
||||
{
|
||||
assert(root && root->corpus);
|
||||
assert(dest.size() == root->corpus->numTokens());
|
||||
@ -700,7 +700,7 @@ namespace ugdiss
|
||||
count_type
|
||||
TSA_tree_iterator<Token>::
|
||||
markEndOfSequence(Token const* start, Token const* stop,
|
||||
boost::dynamic_bitset<typename ::uint64_t>& dest) const
|
||||
boost::dynamic_bitset<uint64_t>& dest) const
|
||||
{
|
||||
count_type matchCount=0;
|
||||
Token const* a = getToken(0);
|
||||
@ -769,7 +769,7 @@ namespace ugdiss
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
string
|
||||
std::string
|
||||
TSA_tree_iterator<Token>::
|
||||
str(TokenIndex const* V, int start, int stop) const
|
||||
{
|
||||
@ -779,7 +779,7 @@ namespace ugdiss
|
||||
assert(start>=0 && start < int(this->size()));
|
||||
assert(stop > 0 && stop <= int(this->size()));
|
||||
Token const* x = this->getToken(0);
|
||||
ostringstream buf;
|
||||
std::ostringstream buf;
|
||||
for (int i = start; i < stop; ++i, x = x->next())
|
||||
{
|
||||
assert(x);
|
||||
@ -802,7 +802,7 @@ namespace ugdiss
|
||||
assert(start>=0 && start < int(this->size()));
|
||||
assert(stop > 0 && stop <= int(this->size()));
|
||||
Token const* x = this->getToken(0);
|
||||
ostringstream buf;
|
||||
std::ostringstream buf;
|
||||
for (int i = start; i < stop; ++i, x = x->next())
|
||||
{
|
||||
assert(x);
|
||||
@ -899,15 +899,15 @@ namespace ugdiss
|
||||
|
||||
/// randomly select up to N occurrences of the sequence
|
||||
template<typename Token>
|
||||
sptr<vector<typename ttrack::Position> >
|
||||
sptr<std::vector<typename ttrack::Position> >
|
||||
TSA_tree_iterator<Token>::
|
||||
randomSample(int level, size_t N) const
|
||||
{
|
||||
if (level < 0) level += lower.size();
|
||||
assert(level >=0);
|
||||
|
||||
sptr<vector<typename ttrack::Position> >
|
||||
ret(new vector<typename ttrack::Position>(N));
|
||||
sptr<std::vector<typename ttrack::Position> >
|
||||
ret(new std::vector<typename ttrack::Position>(N));
|
||||
|
||||
size_t m=0; // number of samples selected so far
|
||||
typename Token::ArrayEntry I(lower.at(level));
|
||||
|
@ -22,7 +22,7 @@
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
// using namespace std;
|
||||
|
||||
typedef boost::dynamic_bitset<uint64_t> bdBitset;
|
||||
|
||||
@ -39,12 +39,12 @@ namespace ugdiss
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
string
|
||||
std::string
|
||||
toString(TokenIndex const& V, Token const* x, size_t const len)
|
||||
{
|
||||
if (!len) return "";
|
||||
UTIL_THROW_IF2(!x, HERE << ": Unexpected end of phrase!");
|
||||
ostringstream buf;
|
||||
std::ostringstream buf;
|
||||
buf << V[x->id()];
|
||||
size_t i = 1;
|
||||
for (x = x->next(); x && i < len; ++i, x = x->next())
|
||||
@ -100,10 +100,10 @@ namespace ugdiss
|
||||
endPos(id_type sid) const { return sntEnd(sid)-sntStart(0); }
|
||||
|
||||
/** Don't use this unless you want a copy of the sentence */
|
||||
vector<TKN>
|
||||
std::vector<TKN>
|
||||
operator[](id_type sid) const
|
||||
{
|
||||
return vector<TKN>(sntStart(sid),sntEnd(sid));
|
||||
return std::vector<TKN>(sntStart(sid),sntEnd(sid));
|
||||
}
|
||||
|
||||
/** @return size of corpus in number of sentences */
|
||||
@ -114,9 +114,9 @@ namespace ugdiss
|
||||
|
||||
/** @return string representation of sentence /sid/
|
||||
* Currently only defined for Ttrack<id_type> */
|
||||
string str(id_type sid, TokenIndex const& T) const;
|
||||
std::string str(id_type sid, TokenIndex const& T) const;
|
||||
|
||||
string pid2str(TokenIndex const* V, uint64_t pid) const;
|
||||
std::string pid2str(TokenIndex const* V, uint64_t pid) const;
|
||||
|
||||
// /** @return string representation of sentence /sid/
|
||||
// * Currently only defined for Ttrack<id_type> */
|
||||
@ -124,8 +124,8 @@ namespace ugdiss
|
||||
|
||||
/** counts the tokens in the corpus; used for example in the construction of
|
||||
* token sequence arrays */
|
||||
count_type count_tokens(vector<count_type>& cnt, bdBitset const* filter,
|
||||
int lengthCutoff=0, ostream* log=NULL) const;
|
||||
count_type count_tokens(std::vector<count_type>& cnt, bdBitset const* filter,
|
||||
int lengthCutoff=0, std::ostream* log=NULL) const;
|
||||
|
||||
// static id_type toID(TKN const& t);
|
||||
|
||||
@ -171,8 +171,8 @@ namespace ugdiss
|
||||
template<typename TKN>
|
||||
count_type
|
||||
Ttrack<TKN>::
|
||||
count_tokens(vector<count_type>& cnt, bdBitset const* filter,
|
||||
int lengthCutoff, ostream* log) const
|
||||
count_tokens(std::vector<count_type>& cnt, bdBitset const* filter,
|
||||
int lengthCutoff, std::ostream* log) const
|
||||
{
|
||||
bdBitset filter2;
|
||||
if (!filter)
|
||||
@ -199,7 +199,7 @@ namespace ugdiss
|
||||
{
|
||||
if (log)
|
||||
*log << "WARNING: skipping sentence #" << sid
|
||||
<< " with more than 65536 tokens" << endl;
|
||||
<< " with more than 65536 tokens" << std::endl;
|
||||
expectedTotal -= stop-k;
|
||||
}
|
||||
else
|
||||
@ -207,7 +207,7 @@ namespace ugdiss
|
||||
totalCount += stop-k;
|
||||
for (; k < stop; ++k)
|
||||
{
|
||||
// cout << sid << " " << stop-k << " " << k->lemma << " " << k->id() << " " << sizeof(*k) << endl;
|
||||
// cout << sid << " " << stop-k << " " << k->lemma << " " << k->id() << " " << sizeof(*k) << std::endl;
|
||||
id_type wid = k->id();
|
||||
while (wid >= cnt.size()) cnt.push_back(0);
|
||||
cnt[wid]++;
|
||||
@ -217,8 +217,8 @@ namespace ugdiss
|
||||
if (this->size() == filter->count())
|
||||
{
|
||||
if (totalCount != expectedTotal)
|
||||
cerr << "OOPS: expected " << expectedTotal
|
||||
<< " tokens but counted " << totalCount << endl;
|
||||
std::cerr << "OOPS: expected " << expectedTotal
|
||||
<< " tokens but counted " << totalCount << std::endl;
|
||||
assert(totalCount == expectedTotal);
|
||||
}
|
||||
return totalCount;
|
||||
@ -244,25 +244,25 @@ namespace ugdiss
|
||||
int ret=-1;
|
||||
|
||||
#if 0
|
||||
cerr << "A: "; for (TKN const* x = a; x; x = next(x)) cerr << x->lemma << " "; cerr << endl;
|
||||
cerr << "B: "; for (TKN const* x = b; x; x = next(x)) cerr << x->lemma << " "; cerr << endl;
|
||||
cerr << "A: "; for (TKN const* x = a; x; x = next(x)) cerr << x->lemma << " "; cerr << std::endl;
|
||||
cerr << "B: "; for (TKN const* x = b; x; x = next(x)) cerr << x->lemma << " "; cerr << std::endl;
|
||||
#endif
|
||||
|
||||
while (a >= bosA && a < eosA)
|
||||
{
|
||||
// cerr << keyLength << "a. " << (a ? a->lemma : 0) << " " << (b ? b->lemma : 0) << endl;
|
||||
// cerr << keyLength << "a. " << (a ? a->lemma : 0) << " " << (b ? b->lemma : 0) << std::endl;
|
||||
if (*a < *b) { break; } // return -1;
|
||||
if (*a > *b) { ret = 2; break; } // return 2;
|
||||
a = next(a);
|
||||
b = next(b);
|
||||
// cerr << keyLength << "b. " << (a ? a->lemma : 0) << " " << (b ? b->lemma : 0) << endl;
|
||||
// cerr << keyLength << "b. " << (a ? a->lemma : 0) << " " << (b ? b->lemma : 0) << std::endl;
|
||||
if (--keyLength==0 || b < bosB || b >= eosB)
|
||||
{
|
||||
ret = (a < bosA || a >= eosA) ? 0 : 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// cerr << "RETURNING " << ret << endl;
|
||||
// cerr << "RETURNING " << ret << std::endl;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -312,7 +312,7 @@ namespace ugdiss
|
||||
{
|
||||
cout << t2->lemma << "." << int(t2->minpos) << " "
|
||||
<< k->lemma << "." << int(k->minpos) << " "
|
||||
<< t2->cmp(*k) << endl;
|
||||
<< t2->cmp(*k) << std::endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -382,7 +382,7 @@ namespace ugdiss
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
string
|
||||
std::string
|
||||
Ttrack<TKN>::
|
||||
pid2str(TokenIndex const* V, uint64_t pid) const
|
||||
{
|
||||
@ -390,7 +390,7 @@ namespace ugdiss
|
||||
pid >>= 16;
|
||||
uint32_t off = pid % (1<<16);
|
||||
uint32_t sid = pid>>16;
|
||||
ostringstream buf;
|
||||
std::ostringstream buf;
|
||||
TKN const* t = sntStart(sid) + off;
|
||||
TKN const* stop = t + len;
|
||||
if (V)
|
||||
|
@ -57,13 +57,13 @@ namespace ugdiss
|
||||
cout << "A: " << z->id();
|
||||
for (z = next(z); z >= bosA && z < eosA; z = next(z))
|
||||
cout << "-" << z->id();
|
||||
cout << endl;
|
||||
cout << std::endl;
|
||||
|
||||
z = b;
|
||||
cout << "B: " << z->id();
|
||||
for (z = next(z); z >= bosB && z < eosB; z = next(z))
|
||||
cout << "-" << z->id();
|
||||
cout << endl;
|
||||
cout << std::endl;
|
||||
#endif
|
||||
while (*a == *b)
|
||||
{
|
||||
@ -76,7 +76,7 @@ namespace ugdiss
|
||||
}
|
||||
int x = a->cmp(*b);
|
||||
|
||||
// cout << " " << (x < 0 ? "YES" : "NO") << endl;
|
||||
// cout << " " << (x < 0 ? "YES" : "NO") << std::endl;
|
||||
|
||||
assert (x != 0);
|
||||
return x < 0;
|
||||
|
@ -10,24 +10,24 @@
|
||||
#include "tpt_typedefs.h"
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
// using namespace std;
|
||||
typedef boost::dynamic_bitset<uint64_t> bitvector;
|
||||
|
||||
typedef vector<vector<float> > flt_2d_table;
|
||||
typedef vector<flt_2d_table> flt_3d_table;
|
||||
typedef vector<flt_3d_table> flt_4d_table;
|
||||
typedef std::vector<std::vector<float> > flt_2d_table;
|
||||
typedef std::vector<flt_2d_table> flt_3d_table;
|
||||
typedef std::vector<flt_3d_table> flt_4d_table;
|
||||
|
||||
typedef vector<vector<ushort> > ushort_2d_table;
|
||||
typedef vector<ushort_2d_table> ushort_3d_table;
|
||||
typedef vector<ushort_3d_table> ushort_4d_table;
|
||||
typedef std::vector<std::vector<ushort> > ushort_2d_table;
|
||||
typedef std::vector<ushort_2d_table> ushort_3d_table;
|
||||
typedef std::vector<ushort_3d_table> ushort_4d_table;
|
||||
|
||||
typedef vector<vector<short> > short_2d_table;
|
||||
typedef vector<short_2d_table> short_3d_table;
|
||||
typedef vector<short_3d_table> short_4d_table;
|
||||
typedef std::vector<std::vector<short> > short_2d_table;
|
||||
typedef std::vector<short_2d_table> short_3d_table;
|
||||
typedef std::vector<short_3d_table> short_4d_table;
|
||||
|
||||
typedef vector<vector<int> > int_2d_table;
|
||||
typedef vector<int_2d_table> int_3d_table;
|
||||
typedef vector<int_3d_table> int_4d_table;
|
||||
typedef std::vector<std::vector<int> > int_2d_table;
|
||||
typedef std::vector<int_2d_table> int_3d_table;
|
||||
typedef std::vector<int_3d_table> int_4d_table;
|
||||
}
|
||||
|
||||
#define sptr boost::shared_ptr
|
||||
|
@ -296,6 +296,7 @@ namespace Moses
|
||||
load_extra_data(string bname, bool locking = true)
|
||||
{
|
||||
using namespace boost;
|
||||
using namespace ugdiss;
|
||||
// TO DO: ADD CHECKS FOR ROBUSTNESS
|
||||
// - file existence?
|
||||
// - same number of lines?
|
||||
@ -701,7 +702,7 @@ namespace Moses
|
||||
#if 1
|
||||
if (m_bias_log && m_lr_func && m_bias_loglevel > 3)
|
||||
{
|
||||
typename PhrasePair<Token>::SortDescendingByJointCount sorter;
|
||||
PhrasePair<Token>::SortDescendingByJointCount sorter;
|
||||
sort(ppfix.begin(), ppfix.end(),sorter);
|
||||
BOOST_FOREACH(PhrasePair<Token> const& pp, ppfix)
|
||||
{
|
||||
|
@ -145,7 +145,7 @@ namespace Moses
|
||||
|
||||
std::vector<std::vector<id_type> > wlex21;
|
||||
// word translation lexicon (without counts, get these from calc_lex.COOC)
|
||||
typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> mm2dtable_t;
|
||||
typedef ugdiss::mm2dTable<id_type,id_type,uint32_t,uint32_t> mm2dtable_t;
|
||||
mm2dtable_t COOCraw;
|
||||
|
||||
TargetPhrase*
|
||||
|
@ -16,7 +16,7 @@ namespace Moses {
|
||||
float m_alpha;
|
||||
string m_lexfile;
|
||||
public:
|
||||
LexicalPhraseScorer2<Token> scorer;
|
||||
ugdiss::LexicalPhraseScorer2<Token> scorer;
|
||||
|
||||
PScoreLex1(string const& alphaspec, string const& lexfile)
|
||||
{
|
||||
|
@ -27,7 +27,7 @@ namespace Moses {
|
||||
{
|
||||
if (x == '+') { --checksum; continue; }
|
||||
if (x != 'g' && x != 's' && x != 'r') continue;
|
||||
string s = (format("pbwd-%c%.3f") % x % c).str();
|
||||
string s = (boost::format("pbwd-%c%.3f") % x % c).str();
|
||||
this->m_feature_names.push_back(s);
|
||||
}
|
||||
this->m_num_feats = this->m_feature_names.size();
|
||||
|
@ -28,7 +28,7 @@ namespace Moses {
|
||||
{
|
||||
if (x == '+') { --checksum; continue; }
|
||||
if (x != 'g' && x != 's' && x != 'r') continue;
|
||||
string s = (format("pfwd-%c%.3f") % x % c).str();
|
||||
string s = (boost::format("pfwd-%c%.3f") % x % c).str();
|
||||
this->m_feature_names.push_back(s);
|
||||
}
|
||||
this->m_num_feats = this->m_feature_names.size();
|
||||
|
@ -12,7 +12,7 @@ namespace Moses {
|
||||
class
|
||||
PScoreUnaligned : public PhraseScorer<Token>
|
||||
{
|
||||
typedef boost::dynamic_bitset<typename ::uint64_t> bitvector;
|
||||
typedef boost::dynamic_bitset<uint64_t> bitvector;
|
||||
public:
|
||||
PScoreUnaligned(string const spec)
|
||||
{
|
||||
|
@ -1,5 +1,5 @@
|
||||
#include "TranslationRequest.h"
|
||||
#include "moses/ContextScope.h"
|
||||
#include "moses/ContextScope.h"
|
||||
#include <boost/foreach.hpp>
|
||||
|
||||
namespace MosesServer
|
||||
|
@ -24,9 +24,10 @@
|
||||
#define BOOST_TEST_MODULE MosesTrainingScoreFeature
|
||||
#include <boost/test/test_tools.hpp>
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <boost/assign/list_of.hpp>
|
||||
|
||||
#include <unordered_set>
|
||||
#include <unordered_map>
|
||||
//#include <unordered_set>
|
||||
//#include <unordered_map>
|
||||
|
||||
using namespace MosesTraining;
|
||||
using namespace std;
|
||||
@ -54,16 +55,16 @@ BOOST_AUTO_TEST_CASE(manager_configure_domain_except)
|
||||
//Check that configure rejects illegal domain arg combinations
|
||||
ScoreFeatureManager manager;
|
||||
BOOST_CHECK_THROW(
|
||||
manager.configure( {"--DomainRatio","/dev/null","--DomainIndicator","/dev/null"}),
|
||||
manager.configure(boost::assign::list_of("--DomainRatio")("/dev/null")("--DomainIndicator")("/dev/null")),
|
||||
ScoreFeatureArgumentException);
|
||||
BOOST_CHECK_THROW(
|
||||
manager.configure( {"--SparseDomainSubset","/dev/null","--SparseDomainRatio","/dev/null"}),
|
||||
manager.configure(boost::assign::list_of("--SparseDomainSubset")("/dev/null")("--SparseDomainRatio")("/dev/null")),
|
||||
ScoreFeatureArgumentException);
|
||||
BOOST_CHECK_THROW(
|
||||
manager.configure( {"--SparseDomainBlah","/dev/null"}),
|
||||
manager.configure(boost::assign::list_of("--SparseDomainBlah")("/dev/null")),
|
||||
ScoreFeatureArgumentException);
|
||||
BOOST_CHECK_THROW(
|
||||
manager.configure( {"--DomainSubset"}),
|
||||
manager.configure(boost::assign::list_of("--DomainSubset")),
|
||||
ScoreFeatureArgumentException);
|
||||
}
|
||||
|
||||
@ -97,25 +98,27 @@ T adder(T first, Args... args)
|
||||
BOOST_AUTO_TEST_CASE(manager_config_domain)
|
||||
{
|
||||
checkDomainConfigured<RatioDomainFeature>
|
||||
( {"--DomainRatio","/dev/null"});
|
||||
(boost::assign::list_of("--DomainRatio")("/dev/null"));
|
||||
checkDomainConfigured<IndicatorDomainFeature>
|
||||
( {"--DomainIndicator","/dev/null"});
|
||||
(boost::assign::list_of("--DomainIndicator")("/dev/null"));
|
||||
checkDomainConfigured<SubsetDomainFeature>
|
||||
( {"--DomainSubset","/dev/null"});
|
||||
(boost::assign::list_of("--DomainSubset")("/dev/null"));
|
||||
checkDomainConfigured<SparseRatioDomainFeature>
|
||||
( {"--SparseDomainRatio","/dev/null"});
|
||||
(boost::assign::list_of("--SparseDomainRatio")("/dev/null"));
|
||||
checkDomainConfigured<SparseIndicatorDomainFeature>
|
||||
( {"--SparseDomainIndicator","/dev/null"});
|
||||
(boost::assign::list_of("--SparseDomainIndicator")("/dev/null"));
|
||||
checkDomainConfigured<SparseSubsetDomainFeature>
|
||||
( {"--SparseDomainSubset","/dev/null"});
|
||||
(boost::assign::list_of("--SparseDomainSubset")("/dev/null"));
|
||||
|
||||
/*
|
||||
// C++11 testing
|
||||
unordered_set<int> s;
|
||||
s.insert(4);
|
||||
s.insert(7);
|
||||
s.insert(4);
|
||||
s.insert(1);
|
||||
|
||||
for (auto i: s) {
|
||||
for (auto i: s) {
|
||||
cerr << i << " ";
|
||||
}
|
||||
|
||||
@ -124,7 +127,7 @@ for (auto i: s) {
|
||||
m["ba"] = 6;
|
||||
m["aabc"] = 7;
|
||||
|
||||
for (auto i: m) {
|
||||
for (auto i: m) {
|
||||
cerr << i.first << "=" << i.second << " ";
|
||||
}
|
||||
|
||||
@ -132,6 +135,6 @@ for (auto i: m) {
|
||||
|
||||
std::string s1 = "x", s2 = "aa", s3 = "bb", s4 = "yy";
|
||||
std::string ssum = adder(s1, s2, s3, s4);
|
||||
|
||||
*/
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user