mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 04:07:13 +03:00
fix bug of losing the hopcount 0 spiderrequest
because it gets overridden by a link to itself then it becomes hopcount 1.
This commit is contained in:
parent
953d636448
commit
5804e28230
@ -27321,6 +27321,8 @@ char *XmlDoc::addOutlinkSpiderRecsToMetaList ( ) {
|
||||
if ( ! m_hopCountValid ) { char *xx=NULL;*xx=0; }
|
||||
//if ( ! m_spideredTimeValid ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
int64_t myUh48 = m_firstUrl.getUrlHash48();
|
||||
|
||||
// . pre-allocate a buffer to hold the spider recs
|
||||
// . taken from SpiderRequest::store()
|
||||
int32_t size = 0;
|
||||
@ -27781,9 +27783,15 @@ char *XmlDoc::addOutlinkSpiderRecsToMetaList ( ) {
|
||||
strcpy(ksr.m_url,s);
|
||||
// this must be valid
|
||||
if ( ! m_docIdValid ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
// set the key, ksr.m_key. isDel = false
|
||||
ksr.setKey ( firstIp, *d , false );
|
||||
|
||||
// we were hopcount 0, so if we link to ourselves we override
|
||||
// our original hopcount of 0 with this guy that has a
|
||||
// hopcount of 1. that sux... so don't do it.
|
||||
if ( ksr.getUrlHash48() == myUh48 ) continue;
|
||||
|
||||
// if we've recently added this url to spiderdb in Spider.cpp, skip it
|
||||
//if ( sc && sc->isInDupCache ( &ksr , false ) )
|
||||
// continue;
|
||||
|
Loading…
Reference in New Issue
Block a user