fix sitelist update logic.

This commit is contained in:
mwells 2014-04-05 18:26:00 -07:00
parent ac5cf7971b
commit b0dbf833a7
3 changed files with 59 additions and 8 deletions

View File

@ -2277,7 +2277,7 @@ bool CollectionRec::hasSearchPermission ( TcpSocket *s , long encapIp ) {
}
bool expandRegExShortcuts ( SafeBuf *sb ) ;
bool updateSiteList ( collnum_t collnum , bool addSeeds );
bool updateSiteListTables ( collnum_t collnum,bool addSeeds,char *siteListArg);
void nukeDoledb ( collnum_t collnum );
// . anytime the url filters are updated, this function is called
@ -2340,10 +2340,14 @@ bool CollectionRec::rebuildUrlFilters ( ) {
// maybe this is good enough
//if ( sc ) sc->m_waitingTreeNeedsRebuild = true;
CollectionRec *cr = sc->m_cr;
// . rebuild sitetable? in PageBasic.cpp.
// . re-adds seed spdierrequests using msg4
// . true = addSeeds
updateSiteList ( m_collnum , true );
updateSiteListTables ( m_collnum ,
true ,
cr->m_siteListBuf.getBufStart() );
}

View File

@ -73,7 +73,9 @@ public:
// . uses msg4 to add seeds to spiderdb if necessary
// . only adds seeds for the shard we are on iff we are responsible for
// the fake firstip!!!
bool updateSiteList ( collnum_t collnum , bool addSeeds ) {
bool updateSiteListTables ( collnum_t collnum ,
bool addSeeds ,
char *siteListArg ) {
CollectionRec *cr = g_collectiondb.getRec ( collnum );
if ( ! cr ) return true;
@ -142,10 +144,10 @@ bool updateSiteList ( collnum_t collnum , bool addSeeds ) {
// use this so it will be free automatically when msg4 completes!
SafeBuf *spiderReqBuf = &sc->m_msg4x.m_tmpBuf;
char *siteList = cr->m_siteListBuf.getBufStart();
//char *siteList = cr->m_siteListBuf.getBufStart();
// scan the list
char *pn = siteList;
char *pn = siteListArg;
// completely empty?
if ( ! pn ) return true;
@ -391,10 +393,15 @@ char *getMatchingUrlPattern ( SpiderColl *sc , SpiderRequest *sreq ) {
// check domain specific tables
HashTableX *dt = &sc->m_siteListDomTable;
// get this
CollectionRec *cr = sc->m_cr;
// need to build dom table for pattern matching?
if ( dt->getNumSlotsUsed() == 0 ) {
if ( dt->getNumSlotsUsed() == 0 && cr ) {
// do not add seeds, just make siteListDomTable, etc.
updateSiteList ( sc->m_collnum , false );
updateSiteListTables ( sc->m_collnum ,
false , // add seeds?
cr->m_siteListBuf.getBufStart() );
}
if ( dt->getNumSlotsUsed() == 0 ) {

View File

@ -122,6 +122,40 @@ bool printUrlExpressionExamples ( SafeBuf *sb ) ;
//
////////
// from PageBasic.cpp:
bool updateSiteListTables(collnum_t collnum,bool addSeeds,char *siteListArg);
bool CommandUpdateSiteList ( char *rec ) {
// caller must specify collnum
collnum_t collnum = getCollnumFromParmRec ( rec );
if ( collnum < 0 ) {
log("parms: bad collnum for update site list");
g_errno = ENOCOLLREC;
return true;
}
// sanity
long dataSize = getDataSizeFromParmRec ( rec );
if ( dataSize < 0 ) {
log("parms: bad site list size = %li bad!",dataSize);
g_errno = EBADENGINEER;
return true;
}
// need this
CollectionRec *cr = g_collectiondb.getRec ( collnum );
// get the sitelist
char *data = getDataFromParmRec ( rec );
// update it
updateSiteListTables ( collnum ,
true , // add NEW seeds?
data // entire sitelist
);
// now that we deduped the old site list with the new one for
// purposes of adding NEW seeds, we can do the final copy
cr->m_siteListBuf.set ( data );
return true;
}
// . require user manually execute this to prevent us fucking up the data
// at first initially because of a bad hosts.conf file!!!
// . maybe put a red 'A' in the hosts table on the web page to indicate
@ -7610,6 +7644,7 @@ void Parms::init ( ) {
m->m_page = PAGE_BASIC_SETTINGS;
m->m_obj = OBJ_COLL;
m->m_type = TYPE_SAFEBUF;
m->m_func = CommandUpdateSiteList;
m->m_def = "";
// rebuild urlfilters now will nuke doledb and call updateSiteList()
m->m_flags = PF_TEXTAREA | PF_DUP | PF_REBUILDURLFILTERS;
@ -7680,6 +7715,7 @@ void Parms::init ( ) {
m->m_page = PAGE_SITES;
m->m_obj = OBJ_COLL;
m->m_type = TYPE_SAFEBUF;
m->m_func = CommandUpdateSiteList;
m->m_def = "";
// rebuild urlfilters now will nuke doledb and call updateSiteList()
m->m_flags = PF_TEXTAREA | PF_REBUILDURLFILTERS;
@ -18056,7 +18092,11 @@ bool Parms::updateParm ( char *rec , WaitEntry *we ) {
}
// cmd to execute?
if ( parm->m_type == TYPE_CMD ) {
if ( parm->m_type == TYPE_CMD ||
// sitelist is a safebuf but it requires special deduping
// logic to update it so it uses CommandUpdateSiteList() to
// do the updating
parm->m_func ) {
// all parm rec data for TYPE_CMD should be ascii/utf8 chars
// and should be \0 terminated
char *data = getDataFromParmRec ( rec );