fixed quite a few nasty bugs.

collectionrec neg/pos key counting overruns.
This commit is contained in:
Matt Wells 2013-11-06 15:44:50 -08:00
parent afb5a2be64
commit 0655160c26
5 changed files with 39 additions and 26 deletions

View File

@ -671,12 +671,6 @@ class CollectionRec {
class SpiderColl *m_spiderColl;
// each Rdb has a tree, so keep the pos/neg key count here so
// that RdbTree does not have to have its own array limited by
// MAX_COLLS which we did away with because we made this dynamic.
long m_numPosKeysInTree[RDB_END];
long m_numNegKeysInTree[RDB_END];
long m_overflow;
long m_overflow2;
@ -1016,6 +1010,12 @@ class CollectionRec {
RdbList m_timedbList;
// each Rdb has a tree, so keep the pos/neg key count here so
// that RdbTree does not have to have its own array limited by
// MAX_COLLS which we did away with because we made this dynamic.
long m_numPosKeysInTree[RDB_END];
long m_numNegKeysInTree[RDB_END];
//long m_numEventsOnHost;
// do we have the doc:quality var in any url filter?

View File

@ -534,8 +534,10 @@ bool Rdb::addColl ( char *coll ) {
// . set CollectionRec::m_numPos/NegKeysInTree[rdbId]
// . these counts are now stored in the CollectionRec and not
// in RdbTree since the # of collections can be huge!
CollectionRec *cr = g_collectiondb.m_recs[collnum];
m_tree.setNumKeys ( cr );
if ( m_useTree ) {
CollectionRec *cr = g_collectiondb.m_recs[collnum];
m_tree.setNumKeys ( cr );
}
//if ( (long)collnum >= m_numBases ) m_numBases = (long)collnum + 1;
// Success

View File

@ -32,6 +32,10 @@ RdbTree::RdbTree () {
m_useProtection = false;
m_pickRight = false;
m_gettingList = 0;
// before resetting... we have to set this so clear() won't breach buffers
m_rdbId = -1;
reset();
}
@ -125,10 +129,6 @@ bool RdbTree::set ( long fixedDataSize ,
// sanity
if ( rdbId < -1 ) { char *xx=NULL;*xx=0; }
if ( rdbId >= RDB_END ) { char *xx=NULL;*xx=0; }
// is it a valid one
m_isRealTree = true;
if ( m_rdbId <= RDB_NONE ) m_isRealTree = false;
if ( m_rdbId >= RDB_END ) m_isRealTree = false;
// if its doledb, set it
//if ( dbname && strcmp(dbname,"doledb") == 0 ) m_rdbId = RDB_DOLEDB;
// adjust m_maxMem to virtual infinity if it was -1
@ -273,11 +273,12 @@ long RdbTree::clear ( ) {
// clear tree counts for all collections!
long nc = g_collectiondb.m_numRecs;
// BUT only if we are an Rdb::m_tree!!!
if ( ! m_isRealTree ) nc = 0;
if ( m_rdbId == -1 ) nc = 0;
// otherwise, we overwrite stuff in CollectionRec we shouldn't
for ( long i = 0 ; i < nc ; i++ ) {
CollectionRec *cr = g_collectiondb.getRec(i);
if ( ! cr ) continue;
//if ( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
cr->m_numNegKeysInTree[(unsigned char)m_rdbId] = 0;
cr->m_numPosKeysInTree[(unsigned char)m_rdbId] = 0;
}
@ -547,7 +548,8 @@ long RdbTree::addNode ( collnum_t collnum ,
// collections using the same Rdb::m_tree!
// crap, when fixing a tree this will segfault because
// m_recs[collnum] is NULL.
if ( m_isRealTree && g_collectiondb.m_recs[collnum] ) {
if ( m_rdbId >= 0 && g_collectiondb.m_recs[collnum] ) {
//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
g_collectiondb.m_recs[collnum]->
m_numNegKeysInTree[(unsigned char)m_rdbId] =0;
g_collectiondb.m_recs[collnum]->
@ -629,7 +631,8 @@ long RdbTree::addNode ( collnum_t collnum ,
// collections using the same Rdb::m_tree!
// crap, when fixing a tree this will segfault because
// m_recs[collnum] is NULL.
if ( m_isRealTree && g_collectiondb.m_recs[collnum] ) {
if ( m_rdbId >= 0 && g_collectiondb.m_recs[collnum] ) {
//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
g_collectiondb.m_recs[collnum]->
m_numNegKeysInTree[(unsigned char)m_rdbId]++;
}
@ -639,7 +642,8 @@ long RdbTree::addNode ( collnum_t collnum ,
//m_numPosKeysPerColl[collnum]++;
// crap, when fixing a tree this will segfault because
// m_recs[collnum] is NULL.
if ( m_isRealTree && g_collectiondb.m_recs[collnum] ) {
if ( m_rdbId >= 0 && g_collectiondb.m_recs[collnum] ) {
//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
g_collectiondb.m_recs[collnum]->
m_numPosKeysInTree[(unsigned char)m_rdbId]++;
}
@ -834,14 +838,14 @@ void RdbTree::deleteNode ( long i , bool freeData ) {
if ( KEYNEG(m_keys,i,m_ks) ) {
m_numNegativeKeys--;
//m_numNegKeysPerColl[m_collnums[i]]--;
if ( m_isRealTree )
if ( m_rdbId >= 0 )
g_collectiondb.m_recs[m_collnums[i]]->
m_numPosKeysInTree[(unsigned char)m_rdbId]--;
}
else {
m_numPositiveKeys--;
//m_numPosKeysPerColl[m_collnums[i]]--;
if ( m_isRealTree )
if ( m_rdbId >= 0 )
g_collectiondb.m_recs[m_collnums[i]]->
m_numPosKeysInTree[(unsigned char)m_rdbId]--;
}
@ -868,7 +872,8 @@ void RdbTree::deleteNode ( long i , bool freeData ) {
m_numPositiveKeys = 0;
//m_numNegKeysPerColl[m_collnums[i]] = 0;
//m_numPosKeysPerColl[m_collnums[i]] = 0;
if ( m_isRealTree ) {
if ( m_rdbId >= 0 ) {
//if ( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
g_collectiondb.m_recs[m_collnums[i]]->
m_numNegKeysInTree[(unsigned char)m_rdbId] = 0;
g_collectiondb.m_recs[m_collnums[i]]->
@ -937,16 +942,20 @@ void RdbTree::deleteNode ( long i , bool freeData ) {
if ( KEYNEG(m_keys,i,m_ks) ) {
m_numNegativeKeys--;
//m_numNegKeysPerColl[m_collnums[i]]--;
if ( m_isRealTree )
if ( m_rdbId >= 0 ) {
//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
g_collectiondb.m_recs[m_collnums[i]]->
m_numNegKeysInTree[(unsigned char)m_rdbId]--;
}
}
else {
m_numPositiveKeys--;
//m_numPosKeysPerColl[m_collnums[i]]--;
if ( m_isRealTree )
if ( m_rdbId >= 0 ) {
//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
g_collectiondb.m_recs[m_collnums[i]]->
m_numPosKeysInTree[(unsigned char)m_rdbId]--;
}
}
// debug step -- check chain from iparent down making sure that
// all kids don't have -2 for their parent... seems to be a rare bug
@ -3050,14 +3059,14 @@ void RdbTree::cleanTree ( ) { // char **bases ) {
}
long RdbTree::getNumNegativeKeys ( collnum_t collnum ) {
if ( ! m_isRealTree ) { char *xx=NULL;*xx=0; }
if ( m_rdbId < 0 ) { char *xx=NULL;*xx=0; }
CollectionRec *cr = g_collectiondb.m_recs[collnum];
if ( ! cr ) return 0;
return cr->m_numNegKeysInTree[(unsigned char)m_rdbId];
}
long RdbTree::getNumPositiveKeys ( collnum_t collnum ) {
if ( ! m_isRealTree ) { char *xx=NULL;*xx=0; }
if ( m_rdbId < 0 ) { char *xx=NULL;*xx=0; }
CollectionRec *cr = g_collectiondb.m_recs[collnum];
if ( ! cr ) return 0;
return cr->m_numPosKeysInTree[(unsigned char)m_rdbId];
@ -3067,6 +3076,8 @@ void RdbTree::setNumKeys ( CollectionRec *cr ) {
if ( ! cr ) return;
if ( ((unsigned char)m_rdbId) >= RDB_END ) { char *xx=NULL;*xx=0; }
collnum_t collnum = cr->m_collnum;
cr->m_numNegKeysInTree[(unsigned char)m_rdbId] = 0;
cr->m_numPosKeysInTree[(unsigned char)m_rdbId] = 0;

View File

@ -360,7 +360,7 @@ class RdbTree {
// need to pass this file to the fastSave() thread
//BigFile *m_saveFile;
char m_rdbId;
char m_isRealTree;
//char m_isRealTree;
char m_dir[128];
char m_dbname[32];
char m_memTag[16];

View File

@ -329,12 +329,12 @@
<maxRobotstxtCacheAge>86400</>
# Only spider URLs scheduled to be spidered at this time or after. In UTC.
<spiderStartTime>18 Jan 1970 20:00 UTC</>
<spiderStartTime>19 Jan 1970 04:00 UTC</>
# Only spider URLs scheduled to be spidered at this time or before. If "use
# current time" is true then the current local time is used for this value
# instead. in UTC.
<spiderEndTime>02 Jan 1970 08:00 UTC</>
<spiderEndTime>02 Jan 1970 16:00 UTC</>
# Use the current time as the spider end time?
<useCurrentTime>1</>