mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 12:17:35 +03:00
fix sitelist update logic.
This commit is contained in:
parent
ac5cf7971b
commit
b0dbf833a7
@ -2277,7 +2277,7 @@ bool CollectionRec::hasSearchPermission ( TcpSocket *s , long encapIp ) {
|
||||
}
|
||||
|
||||
bool expandRegExShortcuts ( SafeBuf *sb ) ;
|
||||
bool updateSiteList ( collnum_t collnum , bool addSeeds );
|
||||
bool updateSiteListTables ( collnum_t collnum,bool addSeeds,char *siteListArg);
|
||||
void nukeDoledb ( collnum_t collnum );
|
||||
|
||||
// . anytime the url filters are updated, this function is called
|
||||
@ -2340,10 +2340,14 @@ bool CollectionRec::rebuildUrlFilters ( ) {
|
||||
// maybe this is good enough
|
||||
//if ( sc ) sc->m_waitingTreeNeedsRebuild = true;
|
||||
|
||||
CollectionRec *cr = sc->m_cr;
|
||||
|
||||
// . rebuild sitetable? in PageBasic.cpp.
|
||||
// . re-adds seed spdierrequests using msg4
|
||||
// . true = addSeeds
|
||||
updateSiteList ( m_collnum , true );
|
||||
updateSiteListTables ( m_collnum ,
|
||||
true ,
|
||||
cr->m_siteListBuf.getBufStart() );
|
||||
}
|
||||
|
||||
|
||||
|
@ -73,7 +73,9 @@ public:
|
||||
// . uses msg4 to add seeds to spiderdb if necessary
|
||||
// . only adds seeds for the shard we are on iff we are responsible for
|
||||
// the fake firstip!!!
|
||||
bool updateSiteList ( collnum_t collnum , bool addSeeds ) {
|
||||
bool updateSiteListTables ( collnum_t collnum ,
|
||||
bool addSeeds ,
|
||||
char *siteListArg ) {
|
||||
|
||||
CollectionRec *cr = g_collectiondb.getRec ( collnum );
|
||||
if ( ! cr ) return true;
|
||||
@ -142,10 +144,10 @@ bool updateSiteList ( collnum_t collnum , bool addSeeds ) {
|
||||
// use this so it will be free automatically when msg4 completes!
|
||||
SafeBuf *spiderReqBuf = &sc->m_msg4x.m_tmpBuf;
|
||||
|
||||
char *siteList = cr->m_siteListBuf.getBufStart();
|
||||
//char *siteList = cr->m_siteListBuf.getBufStart();
|
||||
|
||||
// scan the list
|
||||
char *pn = siteList;
|
||||
char *pn = siteListArg;
|
||||
|
||||
// completely empty?
|
||||
if ( ! pn ) return true;
|
||||
@ -391,10 +393,15 @@ char *getMatchingUrlPattern ( SpiderColl *sc , SpiderRequest *sreq ) {
|
||||
// check domain specific tables
|
||||
HashTableX *dt = &sc->m_siteListDomTable;
|
||||
|
||||
// get this
|
||||
CollectionRec *cr = sc->m_cr;
|
||||
|
||||
// need to build dom table for pattern matching?
|
||||
if ( dt->getNumSlotsUsed() == 0 ) {
|
||||
if ( dt->getNumSlotsUsed() == 0 && cr ) {
|
||||
// do not add seeds, just make siteListDomTable, etc.
|
||||
updateSiteList ( sc->m_collnum , false );
|
||||
updateSiteListTables ( sc->m_collnum ,
|
||||
false , // add seeds?
|
||||
cr->m_siteListBuf.getBufStart() );
|
||||
}
|
||||
|
||||
if ( dt->getNumSlotsUsed() == 0 ) {
|
||||
|
42
Parms.cpp
42
Parms.cpp
@ -122,6 +122,40 @@ bool printUrlExpressionExamples ( SafeBuf *sb ) ;
|
||||
//
|
||||
////////
|
||||
|
||||
|
||||
// from PageBasic.cpp:
|
||||
bool updateSiteListTables(collnum_t collnum,bool addSeeds,char *siteListArg);
|
||||
|
||||
bool CommandUpdateSiteList ( char *rec ) {
|
||||
// caller must specify collnum
|
||||
collnum_t collnum = getCollnumFromParmRec ( rec );
|
||||
if ( collnum < 0 ) {
|
||||
log("parms: bad collnum for update site list");
|
||||
g_errno = ENOCOLLREC;
|
||||
return true;
|
||||
}
|
||||
// sanity
|
||||
long dataSize = getDataSizeFromParmRec ( rec );
|
||||
if ( dataSize < 0 ) {
|
||||
log("parms: bad site list size = %li bad!",dataSize);
|
||||
g_errno = EBADENGINEER;
|
||||
return true;
|
||||
}
|
||||
// need this
|
||||
CollectionRec *cr = g_collectiondb.getRec ( collnum );
|
||||
// get the sitelist
|
||||
char *data = getDataFromParmRec ( rec );
|
||||
// update it
|
||||
updateSiteListTables ( collnum ,
|
||||
true , // add NEW seeds?
|
||||
data // entire sitelist
|
||||
);
|
||||
// now that we deduped the old site list with the new one for
|
||||
// purposes of adding NEW seeds, we can do the final copy
|
||||
cr->m_siteListBuf.set ( data );
|
||||
return true;
|
||||
}
|
||||
|
||||
// . require user manually execute this to prevent us fucking up the data
|
||||
// at first initially because of a bad hosts.conf file!!!
|
||||
// . maybe put a red 'A' in the hosts table on the web page to indicate
|
||||
@ -7610,6 +7644,7 @@ void Parms::init ( ) {
|
||||
m->m_page = PAGE_BASIC_SETTINGS;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m->m_type = TYPE_SAFEBUF;
|
||||
m->m_func = CommandUpdateSiteList;
|
||||
m->m_def = "";
|
||||
// rebuild urlfilters now will nuke doledb and call updateSiteList()
|
||||
m->m_flags = PF_TEXTAREA | PF_DUP | PF_REBUILDURLFILTERS;
|
||||
@ -7680,6 +7715,7 @@ void Parms::init ( ) {
|
||||
m->m_page = PAGE_SITES;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m->m_type = TYPE_SAFEBUF;
|
||||
m->m_func = CommandUpdateSiteList;
|
||||
m->m_def = "";
|
||||
// rebuild urlfilters now will nuke doledb and call updateSiteList()
|
||||
m->m_flags = PF_TEXTAREA | PF_REBUILDURLFILTERS;
|
||||
@ -18056,7 +18092,11 @@ bool Parms::updateParm ( char *rec , WaitEntry *we ) {
|
||||
}
|
||||
|
||||
// cmd to execute?
|
||||
if ( parm->m_type == TYPE_CMD ) {
|
||||
if ( parm->m_type == TYPE_CMD ||
|
||||
// sitelist is a safebuf but it requires special deduping
|
||||
// logic to update it so it uses CommandUpdateSiteList() to
|
||||
// do the updating
|
||||
parm->m_func ) {
|
||||
// all parm rec data for TYPE_CMD should be ascii/utf8 chars
|
||||
// and should be \0 terminated
|
||||
char *data = getDataFromParmRec ( rec );
|
||||
|
Loading…
Reference in New Issue
Block a user