mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 12:17:35 +03:00
fixed quite a few nasty bugs.
collectionrec neg/pos key counting overruns.
This commit is contained in:
parent
afb5a2be64
commit
0655160c26
@ -671,12 +671,6 @@ class CollectionRec {
|
||||
|
||||
class SpiderColl *m_spiderColl;
|
||||
|
||||
// each Rdb has a tree, so keep the pos/neg key count here so
|
||||
// that RdbTree does not have to have its own array limited by
|
||||
// MAX_COLLS which we did away with because we made this dynamic.
|
||||
long m_numPosKeysInTree[RDB_END];
|
||||
long m_numNegKeysInTree[RDB_END];
|
||||
|
||||
long m_overflow;
|
||||
long m_overflow2;
|
||||
|
||||
@ -1016,6 +1010,12 @@ class CollectionRec {
|
||||
RdbList m_timedbList;
|
||||
|
||||
|
||||
// each Rdb has a tree, so keep the pos/neg key count here so
|
||||
// that RdbTree does not have to have its own array limited by
|
||||
// MAX_COLLS which we did away with because we made this dynamic.
|
||||
long m_numPosKeysInTree[RDB_END];
|
||||
long m_numNegKeysInTree[RDB_END];
|
||||
|
||||
//long m_numEventsOnHost;
|
||||
|
||||
// do we have the doc:quality var in any url filter?
|
||||
|
6
Rdb.cpp
6
Rdb.cpp
@ -534,8 +534,10 @@ bool Rdb::addColl ( char *coll ) {
|
||||
// . set CollectionRec::m_numPos/NegKeysInTree[rdbId]
|
||||
// . these counts are now stored in the CollectionRec and not
|
||||
// in RdbTree since the # of collections can be huge!
|
||||
CollectionRec *cr = g_collectiondb.m_recs[collnum];
|
||||
m_tree.setNumKeys ( cr );
|
||||
if ( m_useTree ) {
|
||||
CollectionRec *cr = g_collectiondb.m_recs[collnum];
|
||||
m_tree.setNumKeys ( cr );
|
||||
}
|
||||
|
||||
//if ( (long)collnum >= m_numBases ) m_numBases = (long)collnum + 1;
|
||||
// Success
|
||||
|
41
RdbTree.cpp
41
RdbTree.cpp
@ -32,6 +32,10 @@ RdbTree::RdbTree () {
|
||||
m_useProtection = false;
|
||||
m_pickRight = false;
|
||||
m_gettingList = 0;
|
||||
|
||||
// before resetting... we have to set this so clear() won't breach buffers
|
||||
m_rdbId = -1;
|
||||
|
||||
reset();
|
||||
}
|
||||
|
||||
@ -125,10 +129,6 @@ bool RdbTree::set ( long fixedDataSize ,
|
||||
// sanity
|
||||
if ( rdbId < -1 ) { char *xx=NULL;*xx=0; }
|
||||
if ( rdbId >= RDB_END ) { char *xx=NULL;*xx=0; }
|
||||
// is it a valid one
|
||||
m_isRealTree = true;
|
||||
if ( m_rdbId <= RDB_NONE ) m_isRealTree = false;
|
||||
if ( m_rdbId >= RDB_END ) m_isRealTree = false;
|
||||
// if its doledb, set it
|
||||
//if ( dbname && strcmp(dbname,"doledb") == 0 ) m_rdbId = RDB_DOLEDB;
|
||||
// adjust m_maxMem to virtual infinity if it was -1
|
||||
@ -273,11 +273,12 @@ long RdbTree::clear ( ) {
|
||||
// clear tree counts for all collections!
|
||||
long nc = g_collectiondb.m_numRecs;
|
||||
// BUT only if we are an Rdb::m_tree!!!
|
||||
if ( ! m_isRealTree ) nc = 0;
|
||||
if ( m_rdbId == -1 ) nc = 0;
|
||||
// otherwise, we overwrite stuff in CollectionRec we shouldn't
|
||||
for ( long i = 0 ; i < nc ; i++ ) {
|
||||
CollectionRec *cr = g_collectiondb.getRec(i);
|
||||
if ( ! cr ) continue;
|
||||
//if ( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
|
||||
cr->m_numNegKeysInTree[(unsigned char)m_rdbId] = 0;
|
||||
cr->m_numPosKeysInTree[(unsigned char)m_rdbId] = 0;
|
||||
}
|
||||
@ -547,7 +548,8 @@ long RdbTree::addNode ( collnum_t collnum ,
|
||||
// collections using the same Rdb::m_tree!
|
||||
// crap, when fixing a tree this will segfault because
|
||||
// m_recs[collnum] is NULL.
|
||||
if ( m_isRealTree && g_collectiondb.m_recs[collnum] ) {
|
||||
if ( m_rdbId >= 0 && g_collectiondb.m_recs[collnum] ) {
|
||||
//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
|
||||
g_collectiondb.m_recs[collnum]->
|
||||
m_numNegKeysInTree[(unsigned char)m_rdbId] =0;
|
||||
g_collectiondb.m_recs[collnum]->
|
||||
@ -629,7 +631,8 @@ long RdbTree::addNode ( collnum_t collnum ,
|
||||
// collections using the same Rdb::m_tree!
|
||||
// crap, when fixing a tree this will segfault because
|
||||
// m_recs[collnum] is NULL.
|
||||
if ( m_isRealTree && g_collectiondb.m_recs[collnum] ) {
|
||||
if ( m_rdbId >= 0 && g_collectiondb.m_recs[collnum] ) {
|
||||
//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
|
||||
g_collectiondb.m_recs[collnum]->
|
||||
m_numNegKeysInTree[(unsigned char)m_rdbId]++;
|
||||
}
|
||||
@ -639,7 +642,8 @@ long RdbTree::addNode ( collnum_t collnum ,
|
||||
//m_numPosKeysPerColl[collnum]++;
|
||||
// crap, when fixing a tree this will segfault because
|
||||
// m_recs[collnum] is NULL.
|
||||
if ( m_isRealTree && g_collectiondb.m_recs[collnum] ) {
|
||||
if ( m_rdbId >= 0 && g_collectiondb.m_recs[collnum] ) {
|
||||
//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
|
||||
g_collectiondb.m_recs[collnum]->
|
||||
m_numPosKeysInTree[(unsigned char)m_rdbId]++;
|
||||
}
|
||||
@ -834,14 +838,14 @@ void RdbTree::deleteNode ( long i , bool freeData ) {
|
||||
if ( KEYNEG(m_keys,i,m_ks) ) {
|
||||
m_numNegativeKeys--;
|
||||
//m_numNegKeysPerColl[m_collnums[i]]--;
|
||||
if ( m_isRealTree )
|
||||
if ( m_rdbId >= 0 )
|
||||
g_collectiondb.m_recs[m_collnums[i]]->
|
||||
m_numPosKeysInTree[(unsigned char)m_rdbId]--;
|
||||
}
|
||||
else {
|
||||
m_numPositiveKeys--;
|
||||
//m_numPosKeysPerColl[m_collnums[i]]--;
|
||||
if ( m_isRealTree )
|
||||
if ( m_rdbId >= 0 )
|
||||
g_collectiondb.m_recs[m_collnums[i]]->
|
||||
m_numPosKeysInTree[(unsigned char)m_rdbId]--;
|
||||
}
|
||||
@ -868,7 +872,8 @@ void RdbTree::deleteNode ( long i , bool freeData ) {
|
||||
m_numPositiveKeys = 0;
|
||||
//m_numNegKeysPerColl[m_collnums[i]] = 0;
|
||||
//m_numPosKeysPerColl[m_collnums[i]] = 0;
|
||||
if ( m_isRealTree ) {
|
||||
if ( m_rdbId >= 0 ) {
|
||||
//if ( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
|
||||
g_collectiondb.m_recs[m_collnums[i]]->
|
||||
m_numNegKeysInTree[(unsigned char)m_rdbId] = 0;
|
||||
g_collectiondb.m_recs[m_collnums[i]]->
|
||||
@ -937,16 +942,20 @@ void RdbTree::deleteNode ( long i , bool freeData ) {
|
||||
if ( KEYNEG(m_keys,i,m_ks) ) {
|
||||
m_numNegativeKeys--;
|
||||
//m_numNegKeysPerColl[m_collnums[i]]--;
|
||||
if ( m_isRealTree )
|
||||
if ( m_rdbId >= 0 ) {
|
||||
//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
|
||||
g_collectiondb.m_recs[m_collnums[i]]->
|
||||
m_numNegKeysInTree[(unsigned char)m_rdbId]--;
|
||||
}
|
||||
}
|
||||
else {
|
||||
m_numPositiveKeys--;
|
||||
//m_numPosKeysPerColl[m_collnums[i]]--;
|
||||
if ( m_isRealTree )
|
||||
if ( m_rdbId >= 0 ) {
|
||||
//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
|
||||
g_collectiondb.m_recs[m_collnums[i]]->
|
||||
m_numPosKeysInTree[(unsigned char)m_rdbId]--;
|
||||
}
|
||||
}
|
||||
// debug step -- check chain from iparent down making sure that
|
||||
// all kids don't have -2 for their parent... seems to be a rare bug
|
||||
@ -3050,14 +3059,14 @@ void RdbTree::cleanTree ( ) { // char **bases ) {
|
||||
}
|
||||
|
||||
long RdbTree::getNumNegativeKeys ( collnum_t collnum ) {
|
||||
if ( ! m_isRealTree ) { char *xx=NULL;*xx=0; }
|
||||
if ( m_rdbId < 0 ) { char *xx=NULL;*xx=0; }
|
||||
CollectionRec *cr = g_collectiondb.m_recs[collnum];
|
||||
if ( ! cr ) return 0;
|
||||
return cr->m_numNegKeysInTree[(unsigned char)m_rdbId];
|
||||
}
|
||||
|
||||
long RdbTree::getNumPositiveKeys ( collnum_t collnum ) {
|
||||
if ( ! m_isRealTree ) { char *xx=NULL;*xx=0; }
|
||||
if ( m_rdbId < 0 ) { char *xx=NULL;*xx=0; }
|
||||
CollectionRec *cr = g_collectiondb.m_recs[collnum];
|
||||
if ( ! cr ) return 0;
|
||||
return cr->m_numPosKeysInTree[(unsigned char)m_rdbId];
|
||||
@ -3067,6 +3076,8 @@ void RdbTree::setNumKeys ( CollectionRec *cr ) {
|
||||
|
||||
if ( ! cr ) return;
|
||||
|
||||
if ( ((unsigned char)m_rdbId) >= RDB_END ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
collnum_t collnum = cr->m_collnum;
|
||||
cr->m_numNegKeysInTree[(unsigned char)m_rdbId] = 0;
|
||||
cr->m_numPosKeysInTree[(unsigned char)m_rdbId] = 0;
|
||||
|
@ -360,7 +360,7 @@ class RdbTree {
|
||||
// need to pass this file to the fastSave() thread
|
||||
//BigFile *m_saveFile;
|
||||
char m_rdbId;
|
||||
char m_isRealTree;
|
||||
//char m_isRealTree;
|
||||
char m_dir[128];
|
||||
char m_dbname[32];
|
||||
char m_memTag[16];
|
||||
|
@ -329,12 +329,12 @@
|
||||
<maxRobotstxtCacheAge>86400</>
|
||||
|
||||
# Only spider URLs scheduled to be spidered at this time or after. In UTC.
|
||||
<spiderStartTime>18 Jan 1970 20:00 UTC</>
|
||||
<spiderStartTime>19 Jan 1970 04:00 UTC</>
|
||||
|
||||
# Only spider URLs scheduled to be spidered at this time or before. If "use
|
||||
# current time" is true then the current local time is used for this value
|
||||
# instead. in UTC.
|
||||
<spiderEndTime>02 Jan 1970 08:00 UTC</>
|
||||
<spiderEndTime>02 Jan 1970 16:00 UTC</>
|
||||
|
||||
# Use the current time as the spider end time?
|
||||
<useCurrentTime>1</>
|
||||
|
Loading…
Reference in New Issue
Block a user