From 584d48edc7e6393b7b010508c96571f7e9acfc58 Mon Sep 17 00:00:00 2001 From: Matt Wells Date: Sat, 13 Dec 2014 13:17:26 -0800 Subject: [PATCH] be able to turn off getting of link info for faster rebuild of GI. --- Parms.cpp | 9 +++++++-- XmlDoc.cpp | 18 ++++++++++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/Parms.cpp b/Parms.cpp index 8940b9ee..50d4753b 100644 --- a/Parms.cpp +++ b/Parms.cpp @@ -17011,12 +17011,17 @@ void Parms::init ( ) { m->m_title = "enable link voting"; m->m_desc = "If this is true Gigablast will " "index hyper-link text and use hyper-link " - "structures to boost the quality of indexed documents."; + "structures to boost the quality of indexed documents. " + "You can disable this when doing a ton of injections to " + "keep things fast. Then do a posdb (index) rebuild " + "after re-enabling this when you are done injecting. Or " + "if you simply do not want link voting this will speed up" + "your injections and spidering a bit."; m->m_cgi = "glt"; m->m_off = (char *)&cr.m_getLinkInfo - x; m->m_type = TYPE_BOOL; m->m_def = "1"; - m->m_flags = PF_HIDDEN | PF_NOSAVE; + m->m_flags = PF_CLONE|PF_API;//PF_HIDDEN | PF_NOSAVE; m->m_page = PAGE_SPIDER; m->m_obj = OBJ_COLL; m++; diff --git a/XmlDoc.cpp b/XmlDoc.cpp index 71504593..b4776f4e 100644 --- a/XmlDoc.cpp +++ b/XmlDoc.cpp @@ -13658,12 +13658,25 @@ LinkInfo s_dummy2; // . returns -1 if blocked, will re-call m_callback LinkInfo *XmlDoc::getLinkInfo1 ( ) { + if ( m_linkInfo1Valid && ptr_linkInfo1 ) + return ptr_linkInfo1; + + // just return nothing if not doing link voting + CollectionRec *cr = getCollRec(); + if ( ! cr ) return NULL; + // to keep things fast we avoid getting link info for some collections + if ( ! m_linkInfo1Valid && ! cr->m_getLinkInfo ) { + ptr_linkInfo1 = NULL; + m_linkInfo1Valid = true; + } + // sometimes it is NULL in title rec when setting from title rec if ( m_linkInfo1Valid && ! ptr_linkInfo1 ) { memset ( &s_dummy2 , 0 , sizeof(LinkInfo) ); s_dummy2.m_lisize = sizeof(LinkInfo); ptr_linkInfo1 = &s_dummy2; size_linkInfo1 = sizeof(LinkInfo); + return ptr_linkInfo1; } // return if we got it @@ -13673,9 +13686,6 @@ LinkInfo *XmlDoc::getLinkInfo1 ( ) { // change status setStatus ( "getting local inlinkers" ); - CollectionRec *cr = getCollRec(); - if ( ! cr ) return NULL; - XmlDoc **od = getOldXmlDoc ( ); if ( ! od || od == (XmlDoc **)-1 ) return (LinkInfo *)od; int32_t *sni = getSiteNumInlinks(); @@ -13805,7 +13815,7 @@ LinkInfo *XmlDoc::getLinkInfo1 ( ) { // onlyNeedGoodInlinks = false; //} - // call it + // call it. this is defined in Linkdb.cpp char *url = getFirstUrl()->getUrl(); if ( ! getLinkInfo ( &m_tmpBuf12, &m_mcast12,