mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 12:17:35 +03:00
do not add timestamps to lastdownload
cache if skiphammercheck is true. those are like robots.txt or redirs or root files.
This commit is contained in:
parent
0f3374e3f3
commit
57eb231a4e
12
Msg13.cpp
12
Msg13.cpp
@ -679,9 +679,12 @@ void downloadTheDocForReals ( Msg13Request *r ) {
|
||||
// will overwrite it with a timestamp when the download completes
|
||||
// . but if measuring crawldelay from beginning of the download then
|
||||
// store the current time
|
||||
if ( r->m_crawlDelayFromEnd )
|
||||
// . do NOT do this when downloading robots.txt etc. type files
|
||||
// which should have skipHammerCheck set to true
|
||||
if ( r->m_crawlDelayFromEnd && ! r->m_skipHammerCheck ) {
|
||||
s_hammerCache.addLongLong(0,r->m_firstIp, 0LL);//nowms);
|
||||
else {
|
||||
}
|
||||
else if ( ! r->m_skipHammerCheck ) {
|
||||
// get time now
|
||||
long long nowms = gettimeofdayInMilliseconds();
|
||||
s_hammerCache.addLongLong(0,r->m_firstIp, nowms);
|
||||
@ -781,8 +784,9 @@ void gotHttpReply2 ( void *state ,
|
||||
|
||||
// get time now
|
||||
long long nowms = gettimeofdayInMilliseconds();
|
||||
// now store the current time in the cache
|
||||
if ( r->m_crawlDelayFromEnd )
|
||||
// . now store the current time in the cache
|
||||
// . do NOT do this for robots.txt etc. where we skip hammer check
|
||||
if ( r->m_crawlDelayFromEnd && ! r->m_skipHammerCheck )
|
||||
s_hammerCache.addLongLong(0,r->m_firstIp,nowms);
|
||||
// note it
|
||||
if ( g_conf.m_logDebugSpider )
|
||||
|
Loading…
Reference in New Issue
Block a user