Bug fix and in-line code documentation.

This commit is contained in:
Ulrich Germann 2015-06-01 18:21:52 +01:00
parent 25f98a446e
commit 349163f3fd

View File

@ -16,7 +16,8 @@ namespace ugdiss
TokenIndex:: TokenIndex::
TokenIndex(string unkToken) TokenIndex(string unkToken)
: ridx(0),unkLabel(unkToken),unkId(1),numTokens(0) : ridx(0), unkLabel(unkToken), unkId(1), numTokens(0)
, startIdx(0), endIdx(0)
{ {
lock.reset(new boost::mutex()); lock.reset(new boost::mutex());
}; };
@ -94,15 +95,25 @@ namespace ugdiss
TokenIndex:: TokenIndex::
operator[](char const* p) const operator[](char const* p) const
{ {
if (startIdx==endIdx && !dynamic) return strcmp(p,"NULL") && unkId; if (startIdx != endIdx)
Entry const* bla = lower_bound(startIdx,endIdx,p,comp); {
if (bla != endIdx && !strcmp(comp.base+bla->offset,p)) Entry const* bla = lower_bound(startIdx,endIdx,p,comp);
return bla->id; if (bla != endIdx && !strcmp(comp.base+bla->offset,p))
if (!dynamic) return unkId; return bla->id;
if (!dynamic) return unkId;
}
else if (!dynamic) return strcmp(p,"NULL") && unkId;
boost::lock_guard<boost::mutex> lk(*this->lock); boost::lock_guard<boost::mutex> lk(*this->lock);
// stuff below is new as of 2011-01-30, for dynamic adding of unknown items // stuff below is new as of 2011-01-30, for dynamic adding of
// IMPORTANT: numTokens is not currently not changed, it is the number of // unknown items IMPORTANT: numTokens is not currently not
// PRE-EXISING TOKENS, not including dynamically added Items // changed, it is the number of PRE-EXISING TOKENS, not including
// dynamically added Items
// if (!str2idExtra)
// {
// this->str2idExtra.reset(new map<string,id_type>());
// this->newWords.reset(new vector<string>());
// }
map<string,id_type>::value_type newItem(p,str2idExtra->size()+numTokens); map<string,id_type>::value_type newItem(p,str2idExtra->size()+numTokens);
pair<map<string,id_type>::iterator,bool> foo = str2idExtra->insert(newItem); pair<map<string,id_type>::iterator,bool> foo = str2idExtra->insert(newItem);
if (foo.second) // it actually is a new item if (foo.second) // it actually is a new item
@ -144,10 +155,13 @@ namespace ugdiss
if (!ridx.size()) if (!ridx.size())
{ {
boost::lock_guard<boost::mutex> lk(*this->lock); boost::lock_guard<boost::mutex> lk(*this->lock);
// Someone else (multi-threading!) may have created the
// reverse index in the meantime, so let's check again
if (!ridx.size()) ridx = reverseIndex(); if (!ridx.size()) ridx = reverseIndex();
} }
if (id < ridx.size()) if (id < ridx.size())
return ridx[id]; return ridx[id];
boost::lock_guard<boost::mutex> lk(*this->lock); boost::lock_guard<boost::mutex> lk(*this->lock);
if (dynamic && id < ridx.size()+newWords->size()) if (dynamic && id < ridx.size()+newWords->size())
return (*newWords)[id-ridx.size()].c_str(); return (*newWords)[id-ridx.size()].c_str();