Routine check-in.

This commit is contained in:
Ulrich Germann 2014-03-13 13:41:32 +00:00
parent 94657fd589
commit ce75b58f6f
4 changed files with 73 additions and 36 deletions

View File

@ -46,6 +46,7 @@ lib moses :
[ glob
*.cpp
TranslationModel/*.cpp
TranslationModel/UG/*.cpp
TranslationModel/fuzzy-match/*.cpp
TranslationModel/DynSAInclude/*.cpp
TranslationModel/RuleTable/*.cpp

View File

@ -24,7 +24,7 @@ mmTtrack<SimpleWordId> MCT;
bool sform;
bool have_mtt, have_mct;
bool with_sids;
bool with_positions;
void
interpret_args(int ac, char* av[])
{
@ -34,6 +34,7 @@ interpret_args(int ac, char* av[])
("help,h", "print this message")
("numbers,n", po::bool_switch(&with_sids), "print sentence ids as first token")
("sform,s", po::bool_switch(&sform), "sform only")
("with-positions,p", po::bool_switch(&with_positions), "show word positions")
;
po::options_description h("Hidden Options");
@ -68,10 +69,10 @@ printRangeMTT(size_t start, size_t stop)
for (;start < stop; start++)
{
size_t i = 0;
Token const* t = MTT.sntStart(start);
Token const* s = MTT.sntStart(start);
Token const* e = MTT.sntEnd(start);
if (with_sids) cout << start << " ";
for (;t < e; ++t)
for (Token const* t = s; t < e; ++t)
{
#if 0
uchar const* x = reinterpret_cast<uchar const*>(t);
@ -91,7 +92,11 @@ printRangeMTT(size_t start, size_t stop)
cout << i+t->parent << " ";
cout << DT[t->dtype] << endl;
}
else cout << SF[t->id()] << " ";
else
{
if (with_positions) cout << t-s << ":";
cout << SF[t->id()] << " ";
}
}
cout << endl;
}
@ -102,10 +107,15 @@ printRangeMCT(size_t start, size_t stop)
{
for (;start < stop; start++)
{
SimpleWordId const* t = MCT.sntStart(start);
SimpleWordId const* s = MCT.sntStart(start);
SimpleWordId const* t = s;
SimpleWordId const* e = MCT.sntEnd(start);
if (with_sids) cout << start << " ";
while (t < e) cout << SF[(t++)->id()] << " ";
while (t < e)
{
if (with_positions) cout << t-s << ":";
cout << SF[(t++)->id()] << " ";
}
cout << endl;
}
}

View File

@ -526,7 +526,7 @@ namespace Moses {
job(typename TSA<Token>::tree_iterator const& m,
sptr<TSA<Token> > const& r, size_t maxsmpl, bool isfwd);
};
public:
class
worker
{
@ -535,7 +535,7 @@ namespace Moses {
worker(agenda& a) : ag(a) {}
void operator()();
};
private:
list<sptr<job> > joblist;
vector<sptr<boost::thread> > workers;
bool shutdown;
@ -638,8 +638,9 @@ namespace Moses {
bitvector full_alignment(100*100);
while (j->step(sid,offset))
{
cerr << sid << ":" << offset << " at " << __FILE__<< ":" << __LINE__ << endl;
aln.clear();
int po_fwd=5,po_bwd=5;
int po_fwd=po_other,po_bwd=po_other;
if (j->fwd)
{
if (!ag.bt.find_trg_phr_bounds
@ -1063,11 +1064,12 @@ namespace Moses {
template<typename Token>
bool
Bitext<Token>::
find_trg_phr_bounds(size_t const sid, size_t const start, size_t const stop,
find_trg_phr_bounds
(size_t const sid,
size_t const start, size_t const stop,
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
int & po_fwd, int & po_bwd,
vector<uchar>* core_alignment,
bitvector* full_alignment,
vector<uchar>* core_alignment, bitvector* full_alignment,
bool const flip) const
{
// if (core_alignment) cout << "HAVE CORE ALIGNMENT" << endl;
@ -1076,6 +1078,10 @@ namespace Moses {
// are be definition unaligned, we store only the core alignment in *core_alignment
// it is up to the calling function to shift alignment points over for start positions
// of extracted phrases that start with a fringe word
assert(T1);
assert(T2);
assert(Tx);
bitvector forbidden((flip ? T1 : T2)->sntLen(sid));
size_t slen1 = (*T1).sntLen(sid);
size_t slen2 = (*T2).sntLen(sid);
@ -1098,6 +1104,7 @@ namespace Moses {
{
if (flip) { p = binread(p,trg); assert(p<x); p = binread(p,src); }
else { p = binread(p,src); assert(p<x); p = binread(p,trg); }
// cerr << src << "/" << slen1 << " " << trg << "/" << slen2 << endl;
if (src < start || src >= stop)
forbidden.set(trg);
else
@ -1219,22 +1226,23 @@ namespace Moses {
{
ag.reset(new agenda(*this));
// ag->add_workers(1);
ag->add_workers(20);
// ag->add_workers(20);
}
typedef boost::unordered_map<uint64_t,sptr<pstats> > pcache_t;
sptr<pstats> ret;
if (max_sample == this->default_sample_size)
{
#if 0
uint64_t pid = phrase.getPid();
pcache_t & cache(phrase.root == &(*this->I1) ? cache1 : cache2);
pcache_t::value_type entry(pid,sptr<pstats>());
pair<pcache_t::iterator,bool> foo;
{
// boost::lock_guard<boost::mutex>(this->lock);
foo = cache.emplace(entry);
}
if (foo.second) foo.first->second = ag->add_job(phrase, max_sample);
ret = foo.first->second;
#else
ret = ag->add_job(phrase, max_sample);
#endif
}
else ret = ag->add_job(phrase, max_sample);
return ret;
@ -1249,6 +1257,12 @@ namespace Moses {
sptr<pstats> ret;
ret = prep2(phrase, this->default_sample_size);
assert(ret);
// single worker (for debugging)
typename agenda::worker w(*this->ag);
w();
return ret;
boost::unique_lock<boost::mutex> lock(ret->lock);
while (ret->in_progress)
ret->ready.wait(lock);

View File

@ -74,29 +74,27 @@ namespace Moses
Mmsapt::
load_extra_data(string bname)
{
// TO DO: ADD CHECKS FOR ROBUSTNESS
// - file existence?
// - same number of lines?
// - sane word alignment?
vector<string> text1,text2,symal;
string line;
filtering_istream in1,in2,ina;
open_input_stream(bname+L1+".txt.gz",in1);
cerr << __FILE__ << ":" << __LINE__ << endl;
while(getline(in1,line)) text1.push_back(line);
cerr << __FILE__ << ":" << __LINE__ << endl;
open_input_stream(bname+L2+".txt.gz",in2);
cerr << __FILE__ << ":" << __LINE__ << endl;
while(getline(in2,line)) text2.push_back(line);
cerr << __FILE__ << ":" << __LINE__ << endl;
open_input_stream(bname+L1+"-"+L2+".symal.gz",ina);
cerr << __FILE__ << ":" << __LINE__ << endl;
while(getline(ina,line))
{
cerr << line << endl;
symal.push_back(line);
}
cerr << __FILE__ << ":" << __LINE__ << endl;
while(getline(in1,line)) text1.push_back(line);
while(getline(in2,line)) text2.push_back(line);
while(getline(ina,line)) symal.push_back(line);
// cerr << "Read " << btdyn->T1->size() << " sentence pairs" << endl;
lock_guard<mutex> guard(this->lock);
cerr << __FILE__ << ":" << __LINE__ << endl;
btdyn = btdyn->add(text1,text2,symal);
assert(btdyn);
cerr << __FILE__ << ":" << __LINE__ << endl;
cerr << "Loaded " << btdyn->T1->size() << " sentence pairs" << endl;
}
@ -264,7 +262,7 @@ namespace Moses
parse_pid(a->first, sid, off, len);
if (btb.T2)
{
Token const* x = btb.T2->sntStart(sid) + off;
Token const* x = bta.T2->sntStart(sid) + off;
TSA<Token>::tree_iterator m(btb.I2.get(), x, x+len);
if (m.size() == len)
pp.update(a->first,m.approxOccurrenceCount(),a->second);
@ -469,13 +467,17 @@ namespace Moses
dyn = btdyn;
}
assert(dyn);
vector<id_type> sphrase(src.GetSize());
for (size_t i = 0; i < src.GetSize(); ++i)
{
Factor const* f = src.GetFactor(i,input_factor);
id_type wid = (*btfix.V1)[f->ToString()];
cerr << f->ToString() << " ";
sphrase[i] = wid;
}
cerr << endl;
TSA<Token>::tree_iterator mfix(btfix.I1.get()), mdyn(dyn->I1.get());
for (size_t i = 0; mfix.size() == i && i < sphrase.size(); ++i)
@ -493,10 +495,20 @@ namespace Moses
// do we need this lock here?
// Is it used here to control the total number of running threads???
boost::lock_guard<boost::mutex> guard(this->lock);
sfix = btfix.lookup(mfix);
// sfix = btfix.lookup(mfix);
}
cerr << "Fixed lookup OK mdyn.size() = " << mdyn.size() << endl;
if (mdyn.size() == sphrase.size())
{
cerr << "count: " << mdyn.approxOccurrenceCount() << endl;
sdyn = dyn->lookup(mdyn);
}
cerr << "Dynamic lookup OK" << endl;
if (poolCounts)
{
if (!pool_pstats(src, mfix.getPid(),sfix.get(),btfix,