Changed implementation of indocs (to keep track of which documents phrases come from) from vector to map.

This commit is contained in:
Ulrich Germann 2015-06-25 15:17:26 +01:00
parent 78b2810cfe
commit 22cc22064c
6 changed files with 26 additions and 13 deletions

View File

@ -137,7 +137,10 @@ int Bitext<Token>::agenda::job
float p = (*m_bias)[sid];
id_type docid = m_bias->GetClass(sid);
uint32_t k = docid < stats->indoc.size() ? stats->indoc[docid] : 0;
// uint32_t k = docid < stats->indoc.size() ? stats->indoc[docid] : 0;
std::map<uint32_t,uint32_t>::const_iterator m = stats->indoc.find(docid);
uint32_t k = m != stats->indoc.end() ? m->second : 0 ;
// always consider candidates from dominating documents and
// from documents that have not been considered at all yet
@ -159,11 +162,17 @@ int Bitext<Token>::agenda::job
e = root->getCorpus()->sntEnd(sid);
*log << docid << ":" << sid << " " << size_t(k) << "/" << N
<< " @" << p << " => " << d << " [";
for (size_t i = 0; i < stats->indoc.size(); ++i)
for (std::map<uint32_t, uint32_t>::const_iterator m = stats->indoc.begin();
m != stats->indoc.end(); ++m)
{
if (i) *log << " ";
*log << stats->indoc[i];
if (m != stats->indoc.begin()) *log << " ";
*log << m->first << ":" << m->second;
}
// for (size_t i = 0; i < stats->indoc.size(); ++i)
// {
// if (i) *log << " ";
// *log << stats->indoc[i];
// }
*log << "] ";
for (; x < e; ++x) *log << (*m_bitext->V1)[x->id()] << " ";
if (!ret) *log << "SKIP";

View File

@ -76,7 +76,7 @@ namespace Moses
++obwd[bwd_orient];
if (docid >= 0)
{
while (int(indoc.size()) <= docid) indoc.push_back(0);
// while (int(indoc.size()) <= docid) indoc.push_back(0);
++indoc[docid];
}
}

View File

@ -27,7 +27,8 @@ namespace Moses
uint32_t obwd[Moses::LRModel::NONE+1]; // backward distortion type counts
public:
vector<uint32_t> indoc; // counts origin of samples (for biased sampling)
std::map<uint32_t,uint32_t> indoc;
// vector<uint32_t> indoc; // counts origin of samples (for biased sampling)
jstats();
jstats(jstats const& other);
uint32_t rcnt() const; // raw joint counts

View File

@ -58,7 +58,7 @@ namespace Moses
++obwd[po_bwd];
if (docid >= 0)
{
while (int(indoc.size()) <= docid) indoc.push_back(0);
// while (int(indoc.size()) <= docid) indoc.push_back(0);
++indoc[docid];
}
}

View File

@ -33,8 +33,8 @@ namespace Moses
uint32_t ofwd[Moses::LRModel::NONE+1]; // distribution of fwd phrase orientations
uint32_t obwd[Moses::LRModel::NONE+1]; // distribution of bwd phrase orientations
std::vector<uint32_t> indoc; // distribution over where samples came from
// std::vector<uint32_t> indoc; // distribution over where samples came from
std::map<uint32_t,uint32_t> indoc;
typedef std::map<uint64_t, jstats> trg_map_t;
trg_map_t trg;
pstats();

View File

@ -30,7 +30,8 @@ namespace Moses
std::vector<uchar> aln;
float score;
bool inverse;
std::vector<uint32_t> indoc;
// std::vector<uint32_t> indoc;
std::map<uint32_t,uint32_t> indoc;
PhrasePair() { };
PhrasePair(PhrasePair const& o);
@ -306,10 +307,12 @@ namespace Moses
out << toString (V1, this->start1, this->len1) << " ::: "
<< toString (V2, this->start2, this->len2) << " "
<< this->joint << " [";
for (size_t i = 0; i < this->indoc.size(); ++i)
// for (size_t i = 0; i < this->indoc.size(); ++i)
for (std::map<uint32_t,uint32_t>::const_iterator m = indoc.begin();
m != indoc.end(); ++m)
{
if (i) out << " ";
out << this->indoc[i];
if (m != indoc.begin()) out << " ";
out << m->first << ":" << m->second;
}
out << "] [";
vector<float> lrscores;