seems like we can spider multiple urls

from same ip at same time now.
This commit is contained in:
mwells 2013-09-24 09:32:26 -06:00
parent 8461e33b53
commit e594af898a
4 changed files with 45 additions and 16 deletions

View File

@ -620,6 +620,10 @@ bool HttpRequest::set (char *url,long offset,long size,time_t ifModifiedSince,
m_isLocal = true;
#endif
// comcast
//if ( sock && strncmp(iptoa(sock->m_ip),"75.160.49.8",11) == 0)
// m_isLocal = true;
// roadrunner ip
// if ( sock && strncmp(iptoa(sock->m_ip),"66.162.42.131",13) == 0)
// m_isLocal = true;

21
Rdb.cpp
View File

@ -1951,7 +1951,9 @@ bool Rdb::addRecord ( collnum_t collnum,
if ( KEYNEG(key) ) {
// log debug
logf(LOG_DEBUG,"spflow: removed doledb key "
"for uh48=%llu",
"for pri=%li time=%lu uh48=%llu",
(long)g_doledb.getPriority(&doleKey),
(long)g_doledb.getSpiderTime(&doleKey),
g_doledb.getUrlHash48(&doleKey));
}
else {
@ -1959,9 +1961,14 @@ bool Rdb::addRecord ( collnum_t collnum,
//SpiderColl *sc = g_spiderCache.getSpiderColl(collnum)
// do not overflow!
// log debug
logf(LOG_DEBUG,"spflow: added doledb key "
"for uh48=%llu",
g_doledb.getUrlHash48(&doleKey));
SpiderRequest *sreq = (SpiderRequest *)data;
logf(LOG_DEBUG,"spflow: added doledb key "
"for pri=%li time=%lu uh48=%llu docid=%lli u=%s",
(long)g_doledb.getPriority(&doleKey),
(long)g_doledb.getSpiderTime(&doleKey),
g_doledb.getUrlHash48(&doleKey),
sreq->m_probDocId,
sreq->m_url);
}
}
@ -2198,8 +2205,10 @@ bool Rdb::addRecord ( collnum_t collnum,
(char *)key,
sizeof(key_t) );
// debug log
if ( g_conf.m_logDebugSpider )
log("rdb: cursor reset pri=%li to %s",
if ( g_conf.m_logDebugSpider ||
g_conf.m_logDebugSpiderFlow )
log("spflow: cursor reset pri=%li to "
"%s",
pri,KEYSTR(key,12));
}
// that's it for doledb mods

View File

@ -2659,7 +2659,7 @@ bool SpiderColl::scanSpiderdb ( bool needList ) {
// that scanSpiderdb() repopulates doledb again with that
// "firstIp". this way we can spider multiple urls from the
// same ip at the same time.
if ( g_spiderLoop.m_lockTable.isInTable(&sreq->m_probDocId) )
if ( g_spiderLoop.isInLockTable(sreq->m_probDocId) )
continue;
// ok, we got a new winner
@ -2839,7 +2839,7 @@ bool SpiderColl::scanSpiderdb ( bool needList ) {
return true;
}
if ( g_spiderLoop.m_lockTable.isInTable(&m_bestRequest->m_probDocId)){
if ( g_spiderLoop.isInLockTable ( m_bestRequest->m_probDocId ) ) {
char *xx=NULL;*xx=0; }
// make the doledb key first for this so we can add it
@ -3444,15 +3444,8 @@ void SpiderLoop::spiderDoledUrls ( ) {
m_gettingDoledbList = true;
// log this now
if ( g_conf.m_logDebugSpider ) {
if ( g_conf.m_logDebugSpider )
m_doleStart = gettimeofdayInMillisecondsLocal();
// 12 byte doledb keys
//long pri = g_doledb.getPriority(&m_sc->m_nextDoledbKey);
//logf(LOG_DEBUG,"spider: loading list from doledb startkey=%s"
// " pri=%li",
// KEYSTR(&m_sc->m_nextDoledbKey,12),
// pri);
}
// get a spider rec for us to spider from doledb
if ( ! m_msg5.getList ( RDB_DOLEDB ,
@ -3570,6 +3563,21 @@ bool SpiderLoop::gotDoledbList2 ( ) {
return true;
}
// if debugging the spider flow show the start key if list non-empty
if ( g_conf.m_logDebugSpiderFlow ) {
// 12 byte doledb keys
long pri = g_doledb.getPriority(&m_sc->m_nextDoledbKey);
long stm = g_doledb.getSpiderTime(&m_sc->m_nextDoledbKey);
long long uh48 = g_doledb.getUrlHash48(&m_sc->m_nextDoledbKey);
logf(LOG_DEBUG,"spider: loading list from doledb startkey=%s"
" pri=%li time=%lu uh48=%llu",
KEYSTR(&m_sc->m_nextDoledbKey,12),
pri,
stm,
uh48);
}
//time_t nowGlobal = getTimeGlobal();
// double check
@ -4427,6 +4435,12 @@ void gotLockReplyWrapper ( void *state , UdpSlot *slot ) {
else g_spiderLoop.spiderDoledUrls();
}
bool SpiderLoop::isInLockTable ( long long probDocId ) {
unsigned long long lockKey=g_titledb.getFirstProbableDocId(probDocId);
HashTableX *ht = &g_spiderLoop.m_lockTable;
return ht->isInTable ( &lockKey );
}
// . returns false if blocked, true otherwise.
// . returns true and sets g_errno on error
// . before we can spider for a SpiderRequest we must be granted the lock

View File

@ -1189,6 +1189,8 @@ class SpiderLoop {
~SpiderLoop();
SpiderLoop();
bool isInLockTable ( long long probDocId );
// free all XmlDocs and m_list
void reset();