mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 12:17:35 +03:00
fix injection stuff
This commit is contained in:
parent
8157c5be14
commit
a7462ed1f4
@ -258,7 +258,7 @@ bool sendReply ( void *state ) {
|
||||
|
||||
// . if we're talking w/ a robot he doesn't care about this crap
|
||||
// . send him back the error code (0 means success)
|
||||
if ( url && gr->m_int16_tReply ) {
|
||||
if ( url && gr->m_shortReply ) {
|
||||
char buf[1024*32];
|
||||
char *p = buf;
|
||||
// return docid and hostid
|
||||
@ -634,7 +634,11 @@ bool Msg7::inject ( void *state ,
|
||||
gr->m_newOnly, // index iff new
|
||||
|
||||
this ,
|
||||
doneInjectingWrapper9 ) )
|
||||
doneInjectingWrapper9 ,
|
||||
|
||||
// extra shit
|
||||
gr->m_firstIndexed,
|
||||
gr->m_lastSpidered ) )
|
||||
// we blocked...
|
||||
return false;
|
||||
|
||||
@ -1302,7 +1306,7 @@ bool ImportState::importLoop ( ) {
|
||||
// gr->m_diffbotReply = NULL;
|
||||
// gr->m_injectLinks = false;
|
||||
// gr->m_spiderLinks = true;
|
||||
// gr->m_int16_tReply = false;
|
||||
// gr->m_shortReply = false;
|
||||
// gr->m_newOnly = false;
|
||||
// gr->m_deleteUrl = false;
|
||||
// gr->m_recycle = true; // recycle content? or sitelinks?
|
||||
|
24
Parms.cpp
24
Parms.cpp
@ -14816,7 +14816,7 @@ void Parms::init ( ) {
|
||||
m->m_def = "0";
|
||||
m->m_flags = PF_HIDDEN;
|
||||
m->m_page = PAGE_INJECT;
|
||||
m->m_off = (char *)&gr.m_int16_tReply - (char *)&gr;
|
||||
m->m_off = (char *)&gr.m_shortReply - (char *)&gr;
|
||||
m++;
|
||||
|
||||
m->m_title = "only inject content if new";
|
||||
@ -14889,6 +14889,28 @@ void Parms::init ( ) {
|
||||
m->m_off = (char *)&gr.m_hopCount - (char *)&gr;
|
||||
m++;
|
||||
|
||||
m->m_title = "last spider time";
|
||||
m->m_desc = "Override last time spidered";
|
||||
m->m_cgi = "lastspidered";
|
||||
m->m_obj = OBJ_GBREQUEST;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "0";
|
||||
m->m_flags = PF_HIDDEN; // | PF_API
|
||||
m->m_page = PAGE_INJECT;
|
||||
m->m_off = (char *)&gr.m_lastSpidered - (char *)&gr;
|
||||
m++;
|
||||
|
||||
m->m_title = "first indexed";
|
||||
m->m_desc = "Override first indexed time";
|
||||
m->m_cgi = "firstindexed";
|
||||
m->m_obj = OBJ_GBREQUEST;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "0";
|
||||
m->m_flags = PF_HIDDEN; // | PF_API
|
||||
m->m_page = PAGE_INJECT;
|
||||
m->m_off = (char *)&gr.m_firstIndexed - (char *)&gr;
|
||||
m++;
|
||||
|
||||
|
||||
m->m_title = "content has mime";
|
||||
m->m_desc = "If the content of the url is provided below, does "
|
||||
|
10
Parms.h
10
Parms.h
@ -129,7 +129,7 @@ class GigablastRequest {
|
||||
char *m_diffbotReply; // secret thing from dan
|
||||
char m_injectLinks;
|
||||
char m_spiderLinks;
|
||||
char m_int16_tReply;
|
||||
char m_shortReply;
|
||||
char m_newOnly;
|
||||
char m_deleteUrl;
|
||||
char m_recycle;
|
||||
@ -139,7 +139,13 @@ class GigablastRequest {
|
||||
char m_getSections;
|
||||
char m_gotSections;
|
||||
int32_t m_charset;
|
||||
int32_t m_hopCount;
|
||||
int32_t m_hopCount; // hopcount
|
||||
// older ones
|
||||
uint32_t m_firstIndexed; // firstimdexed
|
||||
uint32_t m_lastSpidered; // lastspidered;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
///////////
|
||||
|
32
XmlDoc.cpp
32
XmlDoc.cpp
@ -1973,7 +1973,10 @@ bool XmlDoc::injectDoc ( char *url ,
|
||||
bool newOnly, // index iff new
|
||||
|
||||
void *state,
|
||||
void (*callback)(void *state) ) {
|
||||
void (*callback)(void *state) ,
|
||||
|
||||
uint32_t firstIndexed,
|
||||
uint32_t lastSpidered ) {
|
||||
|
||||
// wait until we are synced with host #0
|
||||
if ( ! isClockInSync() ) {
|
||||
@ -2007,6 +2010,9 @@ bool XmlDoc::injectDoc ( char *url ,
|
||||
SpiderRequest sreq;
|
||||
sreq.setFromInject ( cleanUrl );
|
||||
|
||||
if ( lastSpidered )
|
||||
sreq.m_addedTime = lastSpidered;
|
||||
|
||||
if ( deleteUrl )
|
||||
sreq.m_forceDelete = 1;
|
||||
|
||||
@ -2035,7 +2041,7 @@ bool XmlDoc::injectDoc ( char *url ,
|
||||
deleteUrl, // false, // deleteFromIndex ,
|
||||
0,//forcedIp ,
|
||||
contentType ,
|
||||
0,//lastSpidered ,
|
||||
lastSpidered,//lastSpidered overide
|
||||
contentHasMime )) {
|
||||
// g_errno should be set if that returned false
|
||||
if ( ! g_errno ) { char *xx=NULL;*xx=0; }
|
||||
@ -2058,15 +2064,15 @@ bool XmlDoc::injectDoc ( char *url ,
|
||||
//if ( recycleContent ) m_recycleContent = true;
|
||||
|
||||
// othercrap
|
||||
//if ( firstIndexed ) {
|
||||
// m_firstIndexedDate = firstIndexed;
|
||||
// m_firstIndexedDateValid = true;
|
||||
//}
|
||||
if ( firstIndexed ) {
|
||||
m_firstIndexedDate = firstIndexed;
|
||||
m_firstIndexedDateValid = true;
|
||||
}
|
||||
|
||||
//if ( lastSpidered ) {
|
||||
// m_spideredTime = lastSpidered;
|
||||
// m_spideredTimeValid = true;
|
||||
//}
|
||||
if ( lastSpidered ) {
|
||||
m_spideredTime = lastSpidered;
|
||||
m_spideredTimeValid = true;
|
||||
}
|
||||
|
||||
if ( hopCount != -1 ) {
|
||||
m_hopCount = hopCount;
|
||||
@ -22697,7 +22703,9 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
|
||||
m_sreq.m_fakeFirstIp &&
|
||||
! m_sreq.m_forceDelete &&
|
||||
// do not rebuild spiderdb if only rebuilding posdb
|
||||
m_useSpiderdb &&
|
||||
// this is explicitly for injecting so we need to add
|
||||
// the spider request to spiderdb...
|
||||
//m_useSpiderdb &&
|
||||
/// don't add requests like http://xyz.com/xxx-diffbotxyz0 though
|
||||
! m_isDiffbotJSONObject )
|
||||
needSpiderdb3 = m_sreq.getRecSize() + 1;
|
||||
@ -23229,7 +23237,7 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
|
||||
if ( m_useSecondaryRdbs )
|
||||
getRebuiltSpiderRequest ( &revisedReq );
|
||||
|
||||
// this fills it in
|
||||
// this fills it in for doing injections
|
||||
if ( ! m_useSecondaryRdbs ) {
|
||||
getRevisedSpiderRequest ( &revisedReq );
|
||||
// sanity log
|
||||
|
Loading…
Reference in New Issue
Block a user