when rebuilding posdb do not rebuild for spiderdb

This commit is contained in:
mwells 2014-12-03 11:32:22 -07:00
parent 12d1477135
commit ca8194d9b0
2 changed files with 48 additions and 27 deletions

12
Rdb.h
View File

@ -44,18 +44,18 @@ enum {
// . secondary rdbs for rebuilding done in PageRepair.cpp
// . we add new recs into these guys and then make the original rdbs
// point to them when we are done.
RDB2_INDEXDB2,
RDB2_TITLEDB2, //21
RDB2_INDEXDB2,//21
RDB2_TITLEDB2,
RDB2_SECTIONDB2,
RDB2_SPIDERDB2,
RDB2_TFNDB2,
RDB2_CLUSTERDB2,
RDB2_DATEDB2, // 26
RDB2_CLUSTERDB2, // 26
RDB2_DATEDB2,
RDB2_LINKDB2,
RDB2_PLACEDB2,
RDB2_REVDB2,
RDB2_TAGDB2,
RDB2_POSDB2, // 31
RDB2_TAGDB2, // 31
RDB2_POSDB2,
RDB2_CATDB2,
RDB_END
};

View File

@ -2403,7 +2403,9 @@ bool XmlDoc::indexDoc ( ) {
goto skipNewAdd1;
}
// store the new request (store reply for this below)
m_metaList2.pushChar(RDB_SPIDERDB);
char rd = RDB_SPIDERDB;
if ( m_useSecondaryRdbs ) rd = RDB2_SPIDERDB2;
m_metaList2.pushChar(rd);
// store it here
SpiderRequest revisedReq;
// this fills it in
@ -2416,22 +2418,32 @@ bool XmlDoc::indexDoc ( ) {
skipNewAdd1:
////
//
// make these fake so getNewSpiderReply() below does not block
//
////
SpiderReply *nsr = getFakeSpiderReply ( );
// this can be NULL and g_errno set to ENOCOLLREC or something
if ( ! nsr )
return true;
SpiderReply *nsr = NULL;
//SafeBuf metaList;
if ( ! m_metaList2.pushChar(RDB_SPIDERDB) )
return true;
// if only rebuilding posdb do not rebuild spiderdb
if ( m_useSpiderdb ) {
////
//
// make these fake so getNewSpiderReply() below does not block
//
////
nsr = getFakeSpiderReply ( );
// this can be NULL and g_errno set to ENOCOLLREC or something
if ( ! nsr )
return true;
//SafeBuf metaList;
char rd = RDB_SPIDERDB;
if ( m_useSecondaryRdbs ) rd = RDB2_SPIDERDB2;
if ( ! m_metaList2.pushChar( rd ) )
return true;
if ( ! m_metaList2.safeMemcpy ( (char *)nsr,nsr->getRecSize()))
return true;
}
if ( ! m_metaList2.safeMemcpy ( (char *)nsr , nsr->getRecSize() ) )
return true;
m_msg4Launched = true;
@ -2439,9 +2451,11 @@ bool XmlDoc::indexDoc ( ) {
logIt();
// log this for debug now
SafeBuf tmp;
nsr->print(&tmp);
log("xmldoc: added reply %s",tmp.getBufStart());
if ( nsr ) {
SafeBuf tmp;
nsr->print(&tmp);
log("xmldoc: added reply %s",tmp.getBufStart());
}
// clear g_errno
g_errno = 0;
@ -20756,6 +20770,7 @@ bool XmlDoc::hashMetaList ( HashTableX *ht ,
p += dataSize;
// ignore spiderdb recs for parsing consistency check
if ( rdbId == RDB_SPIDERDB ) continue;
if ( rdbId == RDB2_SPIDERDB2 ) continue;
// ignore tagdb as well!
if ( rdbId == RDB_TAGDB || rdbId == RDB2_TAGDB2 ) continue;
// skip revdb for now too
@ -21356,7 +21371,9 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
// request is not used again
//SpiderReply srep;
// store the rdbid
if ( ! m_zbuf.pushChar(RDB_SPIDERDB) )
char rd = RDB_SPIDERDB;
if ( m_useSecondaryRdbs ) rd = RDB2_SPIDERDB2;
if ( ! m_zbuf.pushChar(rd) )
return NULL;
// store that reply to indicate this spider request has
// been fulfilled!
@ -21399,7 +21416,7 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
pd = g_titledb.getProbableDocId(parentUrl);
ksr.setKey ( m_sreq.m_firstIp, pd , false );
// store this
if ( ! m_zbuf.pushChar(RDB_SPIDERDB) )
if ( ! m_zbuf.pushChar(rd) )
return NULL;
// then the request
if ( ! m_zbuf.safeMemcpy(&ksr,ksr.getRecSize() ) )
@ -21692,7 +21709,9 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
// now add the new rescheduled time
setStatus ( "adding SpiderReply to spiderdb" );
// rdbid first
*m_p++ = RDB_SPIDERDB;
char rd = RDB_SPIDERDB;
if ( m_useSecondaryRdbs ) rd = RDB2_SPIDERDB2;
*m_p++ = rd;
// get this
if ( ! m_srepValid ) { char *xx=NULL;*xx=0; }
// store the spider rec
@ -22677,6 +22696,8 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
m_sreq.m_isInjecting &&
m_sreq.m_fakeFirstIp &&
! m_sreq.m_forceDelete &&
// do not rebuild spiderdb if only rebuilding posdb
m_useSpiderdb &&
/// don't add requests like http://xyz.com/xxx-diffbotxyz0 though
! m_isDiffbotJSONObject )
needSpiderdb3 = m_sreq.getRecSize() + 1;