basic controls code checkpoint.

This commit is contained in:
Matt Wells 2014-02-08 15:10:06 -07:00
parent dabd691626
commit e593b6e1de
5 changed files with 372 additions and 73 deletions

View File

@ -59,7 +59,8 @@ OBJS = Tfndb.o UdpSlot.o Rebalance.o \
Users.o Images.o Wiki.o Wiktionary.o Scraper.o \
Dates.o Sections.o SiteGetter.o Syncdb.o \
Placedb.o Address.o Test.o GeoIP.o GeoIPCity.o Synonyms.o \
Cachedb.o Monitordb.o dlstubs.o PageCrawlBot.o Json.o
Cachedb.o Monitordb.o dlstubs.o PageCrawlBot.o Json.o PageBasic.o
CHECKFORMATSTRING = -D_CHECK_FORMAT_STRING_

310
PageBasic.cpp Normal file
View File

@ -0,0 +1,310 @@
#include "SafeBuf.h"
#include "HttpRequest.h"
bool printSiteListBox ( SafeBuf *sb , HttpRequest *hr ) ;
//
// main > Basic > Settings
//
bool sendPageBasicSettings ( TcpSocket *socket , HttpRequest *hr ) {
char buf [ 128000 ];
SafeBuf sb(buf,128000);
char *fs = hr->getString("format",NULL,NULL);
char fmt = FMT_HTML;
if ( fs && strcmp(fs,"html") == 0 ) fmt = FMT_HTML;
if ( fs && strcmp(fs,"json") == 0 ) fmt = FMT_JSON;
if ( fs && strcmp(fs,"xml") == 0 ) fmt = FMT_XML;
// print standard header
if ( fmt == FMT_HTML )
g_pages.printAdminTop ( &sb , socket , hr );
CollectionRec *cr = getCollRecFromHttpRequest ( hr );
if ( ! cr ) {
g_httpServer.sendErrorReply(socket,500,"invalid collection");
return true;
}
sb.safePrintf("<form method=POST submit=/basic/settings>\n");
// print pause or resume button
if ( cr->m_spideringEnabled )
sb.safePrintf("<input type=submit "
"style=\""
"font:Helvetica Neue,Helvetica Arial;"
"\" "
"text=\"Pause Spidering\" "
"name=pause value=1>");
else
sb.safePrintf("<input type=submit "
"style=\""
"font:Helvetica Neue,Helvetica Arial;"
"\" "
"text=\"Resume Spidering\" "
"name=pause value=0>");
sb.safePrintf(" &nbsp; &nbsp; ");
// the restart button
sb.safePrintf("<input type=submit text=\"Restart Collection\" "
"name=restart value=1 title=\"Reset "
"the current collection's index and start spidering "
"over, but keep all the settings and "
"the site list below.\">");
sb.safePrintf("<br><br>");
// also used in the advanced controls under the "add url" tab i guess
printSiteListBox ( &sb , hr );
if ( fmt == FMT_HTML ) sb.safePrintf ( "<br><br>\n" );
if ( fmt != FMT_JSON )
// wrap up the form, print a submit button
g_pages.printAdminBottom ( &sb );
return g_httpServer.sendDynamicPage ( socket,
sb.getBufStart() ,
sb.length() ,
-1 ,
false,//POSTReply ,
NULL , // contType
-1 , // httpstatus
NULL,//cookie ,
NULL );// charset
}
bool printSiteListBox ( SafeBuf *sb , HttpRequest *hr ) {
CollectionRe *cr = getCollectionRec ( hr );
if ( ! cr ) return true;
char *submittedSiteList = hr->getString("sitelist" );
// we do not automatically set this parm so that we can verify it
// before setting cr->m_siteListBuf
bool valid = true;
SafeBuf validMsg;
if ( submittedSiteList )
valid = validateSiteList (submittedSiteList,&validMsg);
// if it is a valid list of sites... broadcast it to all hosts
// so they can update cr->m_siteList with it. when they get it
// they will have to update their siteListTable hashtable so which
// we use to quickly determine if we should spider a url or not
// in Spider.cpp
if ( valid && submittedSiteList &&
// if it was too big this might say oom i guess
! g_parms.broadcastParm( submittedSiteList , "sitelist" ) ) {
// tell the browser why we failed
validMsg.safePrintf("Error distributing site list: %s",
mstrerror(g_errno));
valid = false;
}
// print if submitted site list is valid or not
if ( ! valid )
sb.safePrintf("<br><font color=red><b>"
"%s"
"</b></font>"
"<br>"
, validMsg.getBufStart() );
// it is a safebuf parm
char *siteList = cr->m_siteListBuf.getBufStart();
SafeBuf msgBuf;
char *status = "";
long max = 100000;
if ( cr->m_numSiteEntries > max ) {
msgBuf.safePrintf( "<font color=red><b>"
"There are %li site entries, too many to "
"display on this web page. Please use the "
"file upload feature only for now."
"</b></font>"
, max );
status = " disabled";
}
char *msg2 = msgBuf.getBufStart();
if ( ! msg2 ) msg2 = "";
// now list of sites to include, or exclude
sb->safePrintf ( "List of sites to spider, one per line:"
"<br>"
"%s"
"<br>"
"<textarea cols=80 rows=40%s>"
, msg2
, status
);
// print sites
sb->safeMemcpy ( &cr->m_siteListBuf );
sb->safePrintf("</textarea>\n");
sb->safePrintf("<br>"
"<br>"
//"Alternatively you can edit the local "
//"file %s/coll.%s.%li/sitelist.txt and "
//"then click this link: <a>reload file</a>. "
//"Or you can <a>upload a file</a> "
"Alternatively, you can "
"<input "
"size=20 "
"type=file "
"name=\"Upload a File\"> of "
"urls "
"to REPLACE all the urls in here now. If there "
"is an error with your submission then "
"Gigablast will tell you and not "
"perform the replacement. "
"<br><br>"
"On the command like you can issue a command like "
"<i>"
"gb addurls &lt; fileofurls.txt"
"</i> or "
"<i>"
"gb addfile &lt; *.html"
"</i> or "
"<i>"
"gb injecturls &lt; fileofurls.txt"
"</i> or "
"<i>"
"gb injectfile &lt; *.html"
"</i> or "
"to schedule downloads or inject content directly "
"into Gigablast."
);
sb->safePrintf("<br><br>");
// example table
sb->safePrintf("<table>"
"<tr><td colspan=2><center>Examples"
"</center></td></tr>" );
sb->safePrintf(
"<tr>"
"<td>*</td>"
"<td>Spider all urls encountered. If you just enter "
"this by itself, then Gigablast will initiate spidering "
"automatically at dmoz.org, an internet "
"directory of good sites.</td>"
"</tr>"
"<tr>"
"<td>goodstuff.com</td>"
"<td>"
"Spider urls on goodstuff.com and on "
"any subdomain of goodstuff.com"
"</td>"
"</tr>"
"<tr>"
"<td>http://goodstuff.com</td>"
"<td>"
"Only spider urls beginning with http://goodstuff.com/ "
"</td>"
"</tr>"
"<tr>"
"<td>https://goodstuff.com</td>"
"<td>"
"Only spider urls beginning with https://goodstuff.com/ "
"</td>"
"</tr>"
"<tr>"
"<td>http://*.goodstuff.com</td>"
"<td>"
"Only spider urls from a subdomain of goodstuff.com "
"and only using the http, not https, protocol."
"</td>"
"</tr>"
"<tr>"
"<td>http://xyz.goodstuff.com/$</td>"
"<td>"
"Only spider the single url http://xyz.goodstuff.com/"
"</td>"
"</tr>"
"<tr>"
"<td>goodstuff.com/mydir/</td>"
"<td>"
"Spider urls on any subdomain of goodstuff.com AND "
"in the /mydir/ directory or subdirectory thereof."
"</td>"
"</tr>"
/*
"<tr>"
"<td>goodstuff.com/mydir/*boots*</td>"
"<td>"
"Spider urls on any subdomain of goodstuff.com AND "
"in the /mydir/ directory or subdirectory thereof "
"AND with the word boots somewhere in the url."
"</td>"
"</tr>"
"<tr>"
"<td>goodstuff.com/mydir/*boots$</td>"
"<td>"
"Spider urls on any subdomain of goodstuff.com AND "
"in the /mydir/ directory or subdirectory thereof "
"AND ENDING in the word boots."
"</td>"
"</tr>"
*/
"<tr>"
"<td>file://C/mydir/mysubdir/"
"<td>"
"Spider all local files in the specified directory."
"</td>"
"</tr>"
"<tr>"
"<td>-badstuff.com</td>"
"<td>Exclude all pages from badstuff.com</td>"
"</tr>"
"<tr>"
"<td>mytag goodstuff.com</td>"
"<td>"
"Advanced users only. "
"Tag all urls from goodstuff.com with <i>mytag</i> "
"which can be used like <i>tag:mytag</i> in the "
"<a href=/scheduler>spider scheduler</a> for more "
"precise spidering control over url subsets."
"</td>"
"</tr>"
"</table>"
);
}

View File

@ -4508,45 +4508,3 @@ bool setSpiderParmsFromHtmlRequest ( TcpSocket *socket ,
//////////
//bool sendPageLast100Urls ( TcpSocket *socket , HttpRequest *hr ) {
//
// BASIC admin controls
//
bool sendPageBasic ( TcpSocket *socket , HttpRequest *hr ) {
char buf [ 128000 ];
SafeBuf sb(buf,128000);
char *fs = hr->getString("format",NULL,NULL);
char fmt = FMT_HTML;
if ( fs && strcmp(fs,"html") == 0 ) fmt = FMT_HTML;
if ( fs && strcmp(fs,"json") == 0 ) fmt = FMT_JSON;
if ( fs && strcmp(fs,"xml") == 0 ) fmt = FMT_XML;
// print standard header
if ( fmt == FMT_HTML )
g_pages.printAdminTop ( &sb , socket , hr );
sb.safePrintf("BASIC CONTROLS");
if ( fmt == FMT_HTML ) sb.safePrintf ( "<br><br>\n" );
if ( fmt != FMT_JSON )
// wrap up the form, print a submit button
g_pages.printAdminBottom ( &sb );
return g_httpServer.sendDynamicPage ( socket,
sb.getBufStart() ,
sb.length() ,
-1 ,
false,//POSTReply ,
NULL , // contType
-1 , // httpstatus
NULL,//cookie ,
NULL );// charset
}

View File

@ -94,9 +94,15 @@ static WebPage s_pages[] = {
// "word vectors page",
// sendPageWordVec , 0 } ,
{ PAGE_BASIC , "admin/basic" , 0 , "basic controls" , 1 , 0 ,
"basic controls page",
sendPageBasic , 0 } ,
{ PAGE_BASIC_SETTINGS, "admin/settings", 0 , "settings",1, 0 ,
"Basic settings page.", sendPageBasicSettings , 0 } ,
{ PAGE_BASIC_STATUS, "admin/status", 0 , "status",1, 0 ,
"Basic status page.", sendPageBasicStatus , 0 } ,
{ PAGE_BASIC_DIFFBOT, "admin/diffbot", 0 , "diffbot",1, 0 ,
"Basic diffbot page.", sendPageBasicDiffbot , 0 } ,
{ PAGE_BASIC_PASSWORDS, "admin/passwords", 0 , "passwords",1, 0 ,
"Basic passwords page.", sendPageBasicPasswords , 0 } ,
{ PAGE_MASTER , "admin/master" , 0 , "master controls" , 1 , 0 ,
//USER_MASTER | USER_PROXY ,
@ -1029,8 +1035,8 @@ bool Pages::printAdminTop (SafeBuf *sb ,
sb->safePrintf("<input type=hidden name=master value=0>\n");
}
// should any changes be broadcasted to all hosts?
sb->safePrintf ("<input type=hidden name=cast value=\"%li\">\n",
(long)s_pages[page].m_cast);
//sb->safePrintf ("<input type=hidden name=cast value=\"%li\">\n",
// (long)s_pages[page].m_cast);
@ -1051,6 +1057,11 @@ bool Pages::printAdminTop (SafeBuf *sb ,
if ( adds )
sb->safePrintf("<br>%s",mb.getBufStart());
// print breadcrumb. main > Basic > Settings
char *menu = "Advanced";
if ( page == PAGE_BASIC ) menu = "Basic";
sb->safePrintf("<b>%s > %s > %s", coll, menu, s_pages[page].m_name);
// print Basic | Advanced links
if ( page == PAGE_BASIC ) {
sb->safePrintf ( "<b><font color=red>Basic</font></b>"
@ -1361,6 +1372,7 @@ void Pages::printFormData( SafeBuf *sb, TcpSocket *s, HttpRequest *r ) {
}
/*
char *Pages::printAdminBottom ( char *p , char *pend , HttpRequest *r ) {
return printAdminBottom ( p , pend );
}
@ -1376,6 +1388,7 @@ char *Pages::printAdminBottom ( char *p , char *pend ) {
p += gbstrlen ( p );
return p;
}
*/
bool Pages::printAdminBottom ( SafeBuf *sb, HttpRequest *r ) {
return printAdminBottom ( sb );
@ -1404,6 +1417,7 @@ bool Pages::printAdminBottom2 ( SafeBuf *sb ) {
return status;
}
/*
char *Pages::printTail ( char *p , char *pend , bool isLocal ) {
// don't breech the buffer
if ( p + 2000 >= pend ) return p;
@ -1455,6 +1469,7 @@ char *Pages::printTail ( char *p , char *pend , bool isLocal ) {
// return length of bytes we stored
return p ;
}
*/
bool Pages::printTail ( SafeBuf* sb, bool isLocal ) {
// now print the tail
@ -1516,7 +1531,7 @@ bool Pages::printColors ( SafeBuf *sb, char* bodyJavascript ) {
bodyJavascript);
return true;
}
/*
char *Pages::printColors ( char *p , char *pend, char* bodyJavascript ) {
// print font and color stuff
sprintf ( p ,
@ -1550,7 +1565,7 @@ char *Pages::printColors2 ( char *p , char *pend ) {
"a:active,.fl:active{color:#f00}"
"//--></style>\n"
/*
"<style><!--"
"body,td,div,.p,a{font-family:arial,sans-serif }"
"div,td{color:#000}"
@ -1573,11 +1588,12 @@ char *Pages::printColors2 ( char *p , char *pend ) {
".ch{cursor:pointer;cursor:hand}"
"//-->"
"</style>"
*/
);
p += gbstrlen ( p );
return p;
}
*/
bool Pages::printColors3 ( SafeBuf *sb ) {
// print font and color stuff
@ -1590,7 +1606,7 @@ bool Pages::printColors3 ( SafeBuf *sb ) {
);
return true;
}
/*
char *Pages::printFocus ( char *p , char *pend ) {
// print the logo in upper right corner
sprintf ( p ,
@ -1600,7 +1616,7 @@ char *Pages::printFocus ( char *p , char *pend ) {
p += gbstrlen ( p );
return p;
}
*/
bool Pages::printLogo ( SafeBuf *sb, char *coll ) {
// print the logo in upper right corner
@ -1613,7 +1629,7 @@ bool Pages::printLogo ( SafeBuf *sb, char *coll ) {
return true;
}
/*
char *Pages::printLogo ( char *p , char *pend , char *coll ) {
// print the logo in upper right corner
if ( ! coll ) coll = "";
@ -1625,6 +1641,7 @@ char *Pages::printLogo ( char *p , char *pend , char *coll ) {
p += gbstrlen ( p );
return p;
}
*/
bool Pages::printHostLinks ( SafeBuf* sb ,
long page ,
@ -1727,6 +1744,7 @@ bool Pages::printHostLinks ( SafeBuf* sb ,
return status;
}
/*
char *Pages::printHostLinks ( char *p ,
char *pend ,
long page ,
@ -1780,7 +1798,7 @@ char *Pages::printHostLinks ( char *p ,
}
return p;
}
*/
// . print the master admin links if "user" is USER_MASTER
// . print the collection admin links if "user" is USER_ADMIN
@ -1829,7 +1847,7 @@ bool Pages::printAdminLinks ( SafeBuf *sb,
//if ( (s_pages[i].m_perm & user) == 0 ) continue;
//if ( ! g_users.hasPermission(username,i) ) continue;
// do not print Sync link if only one host
//if ( i == PAGE_SYNC && g_hostdb.getNumHosts() == 1 ) continue;
//if ( i == PAGE_SYNC && g_hostdb.getNumHosts() == 1) continue;
// top or bottom
if ( top && i >= PAGE_CGIPARMS ) continue;
if ( ! top && i < PAGE_CGIPARMS ) continue;
@ -2480,7 +2498,8 @@ bool sendPageCgiParms ( TcpSocket *s , HttpRequest *r ) {
"<tr bgcolor=#%s><td><b>CGI</b></td>"
"<td><b>Page</b></td>"
"<td><b>Type</b></td>"
"<td><b>Name</b></td><td><b>Description</b></td></tr>\n",
"<td><b>Name</b></td>"
"<td><b>Description</b></td></tr>\n",
TABLE_STYLE , DARK_BLUE);
for ( long i = 0; i < g_parms.m_numParms; i++ ) {
Parm *parm = &g_parms.m_parms[i];

43
Pages.h
View File

@ -26,7 +26,13 @@ extern char *g_msg;
// . declare all dynamic functions here
// . these are all defined in Page*.cpp files
// . these are called to send a dynamic page
bool sendPageBasic ( TcpSocket *s , HttpRequest *r );
bool sendPageBasicSettings ( TcpSocket *s , HttpRequest *r );
bool sendPageBasicStatus ( TcpSocket *s , HttpRequest *r );
bool sendPageBasicDiffbot ( TcpSocket *s , HttpRequest *r );
bool sendPageBasicPasswords ( TcpSocket *s , HttpRequest *r );
bool sendPageRoot ( TcpSocket *s , HttpRequest *r );
bool sendPageRoot ( TcpSocket *s , HttpRequest *r, char *cookie );
bool sendPageResults ( TcpSocket *s , HttpRequest *r );
@ -178,8 +184,8 @@ class Pages {
void printFormTop( SafeBuf *sb, HttpRequest *r );
void printFormData( SafeBuf *sb, TcpSocket *s, HttpRequest *r );
char *printAdminBottom ( char *p, char *pend, HttpRequest *r );
char *printAdminBottom ( char *p, char *pend);
//char *printAdminBottom ( char *p, char *pend, HttpRequest *r );
//char *printAdminBottom ( char *p, char *pend);
bool printAdminBottom ( SafeBuf *sb, HttpRequest *r );
bool printAdminBottom ( SafeBuf *sb);
bool printAdminBottom2 ( SafeBuf *sb, HttpRequest *r );
@ -189,21 +195,21 @@ class Pages {
//long user ,
//char *username,
//char *pwd );
char *printTail ( char *p ,
char *pend ,
bool isLocal );
//char *printTail ( char *p ,
// char *pend ,
// bool isLocal );
//long user ,
//char *username,
//char *pwd ) ;
bool printColors ( SafeBuf *sb , char* bodyJavascript = "" ) ;
char *printColors ( char *p , char *pend ,
char* bodyJavascript = "");
//char *printColors ( char *p , char *pend ,
// char* bodyJavascript = "");
char *printColors2 ( char *p , char *pend ) ;
//char *printColors2 ( char *p , char *pend ) ;
bool printColors3 ( SafeBuf *sb ) ;
char *printFocus ( char *p , char *pend ) ;
//char *printFocus ( char *p , char *pend ) ;
bool printLogo ( SafeBuf *sb, char *coll ) ;
char *printLogo ( char *p , char *pend , char *coll ) ;
//char *printLogo ( char *p , char *pend , char *coll ) ;
bool printHostLinks ( SafeBuf *sb ,
long page ,
char *username ,
@ -212,7 +218,7 @@ class Pages {
char *pwd ,
long fromIp ,
char *qs = NULL ) ;
/*
char *printHostLinks ( char *p ,
char *pend ,
long page ,
@ -220,6 +226,7 @@ class Pages {
char *pwd ,
long fromIp ,
char *qs = NULL ) ;
*/
bool printAdminLinks ( SafeBuf *sb,
long page ,
//long user ,
@ -227,7 +234,7 @@ class Pages {
char *coll ,
char *pwd ,
bool top ) ;
/*
char *printAdminLinks ( char *p ,
char *pend ,
long page ,
@ -236,6 +243,7 @@ class Pages {
char *coll ,
char *pwd ,
bool top ) ;
*/
bool printCollectionNavBar ( SafeBuf *sb ,
long page ,
//long user ,
@ -243,7 +251,7 @@ class Pages {
char *coll ,
char *pwd ,
char *qs );
/*
char *printCollectionNavBar ( char *p ,
char *pend ,
long page ,
@ -252,7 +260,7 @@ class Pages {
char *coll ,
char *pwd ,
char *qs = NULL );
*/
/*
bool printRulesetDropDown ( SafeBuf *sb ,
long user ,
@ -295,7 +303,10 @@ enum {
//PAGE_WORDVECTOR ,
// basic controls page /admin/basic
PAGE_BASIC ,
PAGE_BASIC_SETTINGS ,
PAGE_BASIC_STATUS ,
PAGE_BASIC_DIFFBOT ,
PAGE_BASIC_PASSWORDS ,
// master admin pages
PAGE_MASTER ,