mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 12:17:35 +03:00
fix more bugs.
This commit is contained in:
parent
1fb85db307
commit
978910ca7a
@ -256,8 +256,10 @@ bool Collectiondb::addRec ( char *coll , char *cpc , long cpclen , bool isNew ,
|
||||
// capacity for us already...
|
||||
if ( i >= m_numRecs &&
|
||||
(i+1)*4 > m_recPtrBuf.getCapacity() ) {
|
||||
long need = (i+1-m_numRecs)*sizeof(CollectionRec *);
|
||||
// true here means to clear the new space to zeroes
|
||||
long need = (i+1)*sizeof(CollectionRec *);
|
||||
long have = m_recPtrBuf.getCapacity();
|
||||
need -= have;
|
||||
// true here means to clear the new space to zeroes
|
||||
if ( ! m_recPtrBuf.reserve ( need ,NULL, true ) )
|
||||
return log("admin: error growing rec ptr buf");
|
||||
}
|
||||
@ -411,6 +413,12 @@ bool Collectiondb::addRec ( char *coll , char *cpc , long cpclen , bool isNew ,
|
||||
// reserve it
|
||||
if ( i >= m_numRecs ) m_numRecs = i + 1;
|
||||
|
||||
// sanity
|
||||
for ( long j = 0 ; j < m_numRecs ; j++ ) {
|
||||
if ( ! m_recs[j] ) continue;
|
||||
if ( m_recs[j]->m_collnum == 1 ) continue;
|
||||
}
|
||||
|
||||
// count it
|
||||
m_numRecsUsed++;
|
||||
// update the time
|
||||
|
@ -2670,6 +2670,10 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
|
||||
, cx->m_collectiveRespiderFrequency
|
||||
, (long)cx->m_diffbotOnlyProcessIfNew
|
||||
);
|
||||
sb.safePrintf("\"pageProcessPattern\":\"");
|
||||
sb.safeUtf8ToJSON ( cx->m_diffbotPageProcessPattern.
|
||||
getBufStart() );
|
||||
sb.safePrintf("\",\n");
|
||||
sb.safePrintf("\"notifyEmail\":\"");
|
||||
sb.safeUtf8ToJSON ( cx->m_notifyEmail.getBufStart() );
|
||||
sb.safePrintf("\",\n");
|
||||
|
@ -3043,6 +3043,8 @@ long RdbTree::getNumPositiveKeys ( collnum_t collnum ) {
|
||||
|
||||
void RdbTree::setNumKeys ( CollectionRec *cr ) {
|
||||
|
||||
if ( ! cr ) return;
|
||||
|
||||
collnum_t collnum = cr->m_collnum;
|
||||
cr->m_numNegKeysInTree[(unsigned char)m_rdbId] = 0;
|
||||
cr->m_numPosKeysInTree[(unsigned char)m_rdbId] = 0;
|
||||
|
12
Spider.cpp
12
Spider.cpp
@ -3915,9 +3915,9 @@ void SpiderLoop::spiderDoledUrls ( ) {
|
||||
// not while repairing
|
||||
if ( g_repairMode ) return;
|
||||
|
||||
if ( g_conf.m_logDebugSpider )
|
||||
log("spider: trying to get a doledb rec to spider. "
|
||||
"currentnumout=%li",m_numSpidersOut);
|
||||
//if ( g_conf.m_logDebugSpider )
|
||||
// log("spider: trying to get a doledb rec to spider. "
|
||||
// "currentnumout=%li",m_numSpidersOut);
|
||||
|
||||
// when getting a lock we keep a ptr to the SpiderRequest in the
|
||||
// doledb list, so do not try to read more just yet until we know
|
||||
@ -8401,7 +8401,7 @@ long getUrlFilterNum2 ( SpiderRequest *sreq ,
|
||||
if ( isForMsg20 ) continue;
|
||||
// check the extension
|
||||
if ( urlLen<=5 ) continue;
|
||||
ext = url - 4;
|
||||
ext = url + urlLen - 4;
|
||||
if ( ext[0] == '.' ) {
|
||||
if ( to_lower_a(ext[1]) == 'c' &&
|
||||
to_lower_a(ext[2]) == 's' &&
|
||||
@ -8411,6 +8411,10 @@ long getUrlFilterNum2 ( SpiderRequest *sreq ,
|
||||
to_lower_a(ext[2]) == 'p' &&
|
||||
to_lower_a(ext[3]) == 'g' )
|
||||
goto gotOne;
|
||||
if ( to_lower_a(ext[1]) == 'p' &&
|
||||
to_lower_a(ext[2]) == 'n' &&
|
||||
to_lower_a(ext[3]) == 'g' )
|
||||
goto gotOne;
|
||||
if ( to_lower_a(ext[1]) == 'w' &&
|
||||
to_lower_a(ext[2]) == 'm' &&
|
||||
to_lower_a(ext[3]) == 'v' )
|
||||
|
Loading…
Reference in New Issue
Block a user