diff --git a/Spider.cpp b/Spider.cpp
index a29d19a7..436107b1 100644
--- a/Spider.cpp
+++ b/Spider.cpp
@@ -3029,6 +3029,12 @@ void SpiderColl::populateDoledbFromWaitingTree ( ) { // bool reentry ) {
if ( m_isPopulating ) return;
// skip if in repair mode
if ( g_repairMode ) return;
+
+ // let's skip if spiders off so we can inject/popoulate the index quick
+ // since addSpiderRequest() calls addToWaitingTree() which then calls
+ // this.
+ if ( ! g_conf.m_spideringEnabled ) return;
+
// try skipping!!!!!!!!!!!
// yeah, this makes us scream. in addition to calling
// Doledb::m_rdb::addRecord() below
diff --git a/XmlDoc.cpp b/XmlDoc.cpp
index cf75a918..144fb425 100644
--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
@@ -1873,10 +1873,10 @@ void XmlDoc::setStatus ( char *s ) {
if ( s == s_last ) return;
bool timeIt = false;
- if ( m_sreqValid &&
- m_sreq.m_isInjecting &&
- m_sreq.m_isPageInject )
- timeIt = true;
+ // if ( m_sreqValid &&
+ // m_sreq.m_isInjecting &&
+ // m_sreq.m_isPageInject )
+ // timeIt = true;
if ( g_conf.m_logDebugBuildTime )
timeIt = true;
@@ -1885,7 +1885,7 @@ void XmlDoc::setStatus ( char *s ) {
int64_t now = gettimeofdayInMillisecondsLocal();
if ( s_lastTimeStart == 0LL ) s_lastTimeStart = now;
int32_t took = now - s_lastTimeStart;
- if ( took > 100 )
+ //if ( took > 100 )
log("xmldoc: %s (xd=0x%"PTRFMT" "
"u=%s) took %"INT32"ms",
s_last,
@@ -13724,6 +13724,12 @@ LinkInfo *XmlDoc::getLinkInfo1 ( ) {
if ( m_linkInfo1Valid && ptr_linkInfo1 )
return ptr_linkInfo1;
+ // at least get our firstip so if cr->m_getLinkInfo is false
+ // then getRevisedSpiderReq() will not core because it is invalid
+ int32_t *ip = getFirstIp();
+ if ( ! ip || ip == (int32_t *)-1 ) return (LinkInfo *)ip;
+
+
// just return nothing if not doing link voting
CollectionRec *cr = getCollRec();
if ( ! cr ) return NULL;
@@ -13755,8 +13761,6 @@ LinkInfo *XmlDoc::getLinkInfo1 ( ) {
if ( ! sni || sni == (int32_t *)-1 ) return (LinkInfo *)sni;
//int32_t *fip = getFirstIp();
//if ( ! fip || fip == (int32_t *)-1 ) return (LinkInfo *)fip;
- int32_t *ip = getFirstIp();
- if ( ! ip || ip == (int32_t *)-1 ) return (LinkInfo *)ip;
int64_t *d = getDocId();
if ( ! d || d == (int64_t *)-1 ) return (LinkInfo *)d;
// sanity check. error?
@@ -30582,47 +30586,50 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
// breathe
QUICKPOLL( m_niceness );
- //if ( cr->m_doLinkSpamCheck ) {
- // reset to NULL to avoid gbstrlen segfault
- char *note = NULL;
- // need this
- if ( ! m_xmlValid ) { char *xx=NULL;*xx=0; }
- // time it
- //int64_t start = gettimeofdayInMilliseconds();
+ if ( ! m_req->m_doLinkSpamCheck )
+ reply->m_isLinkSpam = false;
- Url linkeeUrl;
- linkeeUrl.set ( m_req->ptr_linkee );
+ if ( m_req->m_doLinkSpamCheck ) {
+ // reset to NULL to avoid gbstrlen segfault
+ char *note = NULL;
+ // need this
+ if ( ! m_xmlValid ) { char *xx=NULL;*xx=0; }
+ // time it
+ //int64_t start = gettimeofdayInMilliseconds();
- // get it. does not block.
- reply->m_isLinkSpam = ::isLinkSpam ( linker ,
- m_ip ,
- ptr_indCatIds ,
- size_indCatIds / 4 ,
- m_siteNumInlinks,
- &m_xml,
- links,
- MAXDOCLEN,//150000,//maxDocLen ,
- ¬e ,
- &linkeeUrl , // url ,
- linkNode ,
- cr->m_coll ,
- m_niceness );
- // store it
- if ( note ) {
- // include the \0
- reply->ptr_note = note;
- reply->size_note = gbstrlen(note)+1;
+ Url linkeeUrl;
+ linkeeUrl.set ( m_req->ptr_linkee );
+
+ // get it. does not block.
+ reply->m_isLinkSpam = ::isLinkSpam ( linker ,
+ m_ip ,
+ ptr_indCatIds ,
+ size_indCatIds / 4 ,
+ m_siteNumInlinks,
+ &m_xml,
+ links,
+ MAXDOCLEN,//150000,
+ ¬e ,
+ &linkeeUrl , // url ,
+ linkNode ,
+ cr->m_coll ,
+ m_niceness );
+ // store it
+ if ( note ) {
+ // include the \0
+ reply->ptr_note = note;
+ reply->size_note = gbstrlen(note)+1;
+ }
+ // log the reason why it is a log page
+ if ( reply->m_isLinkSpam )
+ log(LOG_DEBUG,"build: linker %s: %s.",
+ linker->getUrl(),note);
+ // sanity
+ if ( reply->m_isLinkSpam && ! note )
+ log("linkspam: missing note for d=%"INT64"!",m_docId);
+ // store times... nah, might have yielded cpu!
+ reply->m_timeLinkSpam = 0;
}
- // log the reason why it is a log page
- if ( reply->m_isLinkSpam )
- log(LOG_DEBUG,"build: linker %s: %s.",
- linker->getUrl(),note);
- // sanity
- if ( reply->m_isLinkSpam && ! note )
- log("linkspam: missing note for d=%"INT64"!",m_docId);
- // store times... nah, might have yielded cpu!
- reply->m_timeLinkSpam = 0;
- //}
// breathe
QUICKPOLL(m_niceness);
diff --git a/html/faq.html b/html/faq.html
index 54264dbb..8ae897e2 100644
--- a/html/faq.html
+++ b/html/faq.html
@@ -1251,9 +1251,10 @@ override that switch.
-Spider Optimizations:
+Spidering and Indexing Optimizations: