do not set indexcode to EFAKEFIRSTIP

for INJECTED urls, just added urls.
fix add url page to not always use 'main'
collection. added reset/restart cmds to spider page.
This commit is contained in:
Matt Wells 2014-01-18 11:09:30 -08:00
parent 178af5f781
commit 22aa13e34d
3 changed files with 43 additions and 23 deletions

View File

@ -391,8 +391,15 @@ bool printAddUrlHomePage ( SafeBuf &sb , char *url , HttpRequest *r ) {
sb.safePrintf("<br><br>\n");
sb.safePrintf("<form method=get action=/addurl name=f>\n");
//CollectionRec *cr = g_collectiondb.getRec ( "main" );
//sb.safePrintf("<input type=hidden name=c value=\"%s\">",cr->m_coll);
// the collection we want to add the url to
char *coll = r->getString("c");
if ( ! coll ) coll = "";
if ( coll )
sb.safePrintf("<input type=hidden name=c value=\"%s\">",coll);
sb.safePrintf("<input name=u type=text size=60 value=\"");
if ( url ) {
SafeBuf tmp;
@ -453,11 +460,12 @@ bool printAddUrlHomePage ( SafeBuf &sb , char *url , HttpRequest *r ) {
unsigned long long rand64 = gettimeofdayInMillisecondsLocal();
// msg7 needs an explicit collection for /addurl for injecting
// in PageInject.cpp. it does not use defaults for safety.
sb.safePrintf("&id=%lu&c=main&rand=%llu';\n"
sb.safePrintf("&id=%lu&c=%s&rand=%llu';\n"
"client.open('GET', url );\n"
"client.send();\n"
"</script>\n"
, h32
, coll
, rand64
);
sb.safePrintf("</div>\n");
@ -1552,6 +1560,8 @@ void doneInjectingWrapper3 ( void *st ) {
//CollectionRec *cr = g_collectiondb.getRec ( st1->m_coll );
// collection name
char *coll = st1->m_coll;
if ( ! coll ) coll = "";
//char tt [ 128 ];
//tt[0] = '\0';
@ -1658,8 +1668,10 @@ void doneInjectingWrapper3 ( void *st ) {
unsigned long rand32 = rand();
// in the mime to 0 seconds!
sb.safePrintf("<b>Url successfully added. "
"<a href=/search?rand=%lu&q=url%%3A",
rand32);
"<a href=/search?rand=%lu&"
"c=%s&q=url%%3A",
rand32,
coll);
sb.urlEncode(url);
sb.safePrintf(">Check it</a> or "
"<a href=http://www.gigablast.com/seo?u=");

View File

@ -9271,24 +9271,6 @@ void Parms::init ( ) {
m->m_cast = 1;
m++;
m->m_title = "reset collection";
m->m_desc = "reset collection";
m->m_cgi = "reset";
m->m_type = TYPE_CMD;
m->m_page = PAGE_NONE;
m->m_func2 = CommandResetColl;
m->m_cast = 1;
m++;
m->m_title = "restart collection";
m->m_desc = "restart collection";
m->m_cgi = "restart";
m->m_type = TYPE_CMD;
m->m_page = PAGE_NONE;
m->m_func2 = CommandRestartColl;
m->m_cast = 1;
m++;
m->m_title = "in sync";
m->m_desc = "signify in sync with host 0";
m->m_cgi = "insync";
@ -9321,6 +9303,26 @@ void Parms::init ( ) {
m->m_def = "1";
m++;
m->m_title = "reset collection";
m->m_desc = "Remove all documents from the collection and turn "
"spiders off.";
m->m_cgi = "reset";
m->m_type = TYPE_CMD;
m->m_page = PAGE_SPIDER;
m->m_func2 = CommandResetColl;
m->m_cast = 1;
m++;
m->m_title = "restart collection";
m->m_desc = "Remove all documents from the collection and start "
"spidering over again.";
m->m_cgi = "restart";
m->m_type = TYPE_CMD;
m->m_page = PAGE_SPIDER;
m->m_func2 = CommandRestartColl;
m->m_cast = 1;
m++;
/*
m->m_title = "new spidering enabled";
m->m_desc = "When enabled the spider adds NEW "

View File

@ -2106,7 +2106,13 @@ bool XmlDoc::indexDoc2 ( ) {
// do this before we increment pageDownloadAttempts below so that
// john's smoke tests, which use those counts, are not affected
if ( m_oldsrValid && m_oldsr.m_fakeFirstIp &&
if ( m_oldsrValid &&
m_oldsr.m_fakeFirstIp &&
// only do for add url, not for injects. injects expect
// the doc to be indexed while the browser waits. add url
// is really just adding the spider request and returning
// to the browser without delay.
! m_oldsr.m_isInjecting &&
// diffbot requests are ok though!
! strstr(m_oldsr.m_url,"-diffbotxyz") ) {
m_indexCodeValid = true;