mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 20:27:43 +03:00
sanity checks to ensure fakefirstip
was able to convert to a real good firstip
This commit is contained in:
parent
7b4b8b27bd
commit
970eb33a83
@ -3002,7 +3002,7 @@ bool CollectionRec::rebuildUrlFilters ( ) {
|
||||
|
||||
m_regExs[i].set("errorcount>=1 && !hastmperror");
|
||||
m_spiderPriorities [i] = 15;
|
||||
m_spiderFreqs [i] = 0.00; // 86 seconds
|
||||
m_spiderFreqs [i] = 0.0;
|
||||
m_maxSpidersPerRule [i] = 0; // turn off spiders if not tmp error
|
||||
i++;
|
||||
|
||||
@ -3015,7 +3015,7 @@ bool CollectionRec::rebuildUrlFilters ( ) {
|
||||
// and for docs that have errors respider once every 5 hours
|
||||
m_regExs[i].set("errorcount==2 && hastmperror");
|
||||
m_spiderPriorities [i] = 40;
|
||||
m_spiderFreqs [i] = 0.1; // 2.4 hrs
|
||||
m_spiderFreqs [i] = 0.003; // 3*86 seconds (was 24 hrs)
|
||||
i++;
|
||||
|
||||
// excessive errors? (tcp/dns timed out, etc.) retry once per month?
|
||||
|
29
XmlDoc.cpp
29
XmlDoc.cpp
@ -2080,6 +2080,9 @@ void XmlDoc::getRevisedSpiderRequest ( SpiderRequest *revisedReq ) {
|
||||
// this must be valid for us of course
|
||||
if ( ! m_firstIpValid ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
// wtf?
|
||||
if ( m_firstIp == 0 || m_firstIp == -1 ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
// store the real ip in there now
|
||||
revisedReq->m_firstIp = m_firstIp;
|
||||
|
||||
@ -2226,6 +2229,16 @@ bool XmlDoc::indexDoc ( ) {
|
||||
m_indexCodeValid = true;
|
||||
goto logErr;
|
||||
}
|
||||
// sanity log
|
||||
if ( ! m_firstIpValid ) { char *xx=NULL;*xx=0; }
|
||||
// sanity log
|
||||
if ( *fip == 0 || *fip == -1 ) {
|
||||
char *url = "unknown";
|
||||
if ( m_sreqValid ) url = m_sreq.m_url;
|
||||
log("build: error2 getting real firstip of %li for "
|
||||
"%s. Not adding new spider req", (long)*fip,url);
|
||||
goto skipNewAdd1;
|
||||
}
|
||||
// store the new request (store reply for this below)
|
||||
m_metaList2.pushChar(RDB_SPIDERDB);
|
||||
// store it here
|
||||
@ -2238,6 +2251,7 @@ bool XmlDoc::indexDoc ( ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
skipNewAdd1:
|
||||
|
||||
////
|
||||
//
|
||||
@ -22086,6 +22100,19 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
|
||||
SpiderRequest revisedReq;
|
||||
// this fills it in
|
||||
getRevisedSpiderRequest ( &revisedReq );
|
||||
|
||||
// sanity log
|
||||
if ( ! m_firstIpValid ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
// sanity log
|
||||
if ( m_firstIp == 0 || m_firstIp == -1 ) {
|
||||
char *url = "unknown";
|
||||
if ( m_sreqValid ) url = m_sreq.m_url;
|
||||
log("build: error3 getting real firstip of %li for "
|
||||
"%s. not adding new request.",(long)m_firstIp,url);
|
||||
goto skipNewAdd2;
|
||||
}
|
||||
|
||||
// store it back
|
||||
memcpy ( m_p , &revisedReq , revisedReq.getRecSize() );
|
||||
// skip over it
|
||||
@ -22094,6 +22121,8 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
|
||||
if ( m_p - saved > needSpiderdb3 ) { char *xx=NULL;*xx=0; }
|
||||
}
|
||||
|
||||
skipNewAdd2:
|
||||
|
||||
//
|
||||
// ADD SPIDERDB RECORDS of outlinks
|
||||
//
|
||||
|
Loading…
Reference in New Issue
Block a user