2014-02-08 16:24:33 -07:00

314 lines
8.0 KiB

#include "SafeBuf.h"
#include "HttpRequest.h"
#include "SearchInput.h"
#include "Pages.h"
#include "Parms.h"
bool printSiteListBox ( SafeBuf *sb , HttpRequest *hr ) ;
// main > Basic > Settings
bool sendPageBasicSettings ( TcpSocket *socket , HttpRequest *hr ) {
char buf [ 128000 ];
SafeBuf sb(buf,128000);
char *fs = hr->getString("format",NULL,NULL);
char fmt = FORMAT_HTML;
if ( fs && strcmp(fs,"html") == 0 ) fmt = FORMAT_HTML;
if ( fs && strcmp(fs,"json") == 0 ) fmt = FORMAT_JSON;
if ( fs && strcmp(fs,"xml") == 0 ) fmt = FORMAT_XML;
// print standard header
if ( fmt == FORMAT_HTML )
g_pages.printAdminTop ( &sb , socket , hr );
CollectionRec *cr = getCollRecFromHttpRequest ( hr );
if ( ! cr ) {
g_httpServer.sendErrorReply(socket,500,"invalid collection");
return true;
sb.safePrintf("<form method=POST submit=/basic/settings>\n");
// print pause or resume button
if ( cr->m_spideringEnabled )
sb.safePrintf("<input type=submit "
"font:Helvetica Neue,Helvetica Arial;"
"\" "
"text=\"Pause Spidering\" "
"name=pause value=1>");
sb.safePrintf("<input type=submit "
"font:Helvetica Neue,Helvetica Arial;"
"\" "
"text=\"Resume Spidering\" "
"name=pause value=0>");
sb.safePrintf(" &nbsp; &nbsp; ");
// the restart button
sb.safePrintf("<input type=submit text=\"Restart Collection\" "
"name=restart value=1 title=\"Reset "
"the current collection's index and start spidering "
"over, but keep all the settings and "
"the site list below.\">");
// also used in the advanced controls under the "add url" tab i guess
printSiteListBox ( &sb , hr );
if ( fmt == FORMAT_HTML ) sb.safePrintf ( "<br><br>\n" );
if ( fmt != FORMAT_JSON )
// wrap up the form, print a submit button
g_pages.printAdminBottom ( &sb );
return g_httpServer.sendDynamicPage ( socket,
sb.getBufStart() ,
sb.length() ,
-1 ,
false,//POSTReply ,
NULL , // contType
-1 , // httpstatus
NULL,//cookie ,
NULL );// charset
bool printSiteListBox ( SafeBuf *sb , HttpRequest *hr ) {
CollectionRec *cr = getCollectionRec ( hr );
if ( ! cr ) return true;
char *submittedSiteList = hr->getString("sitelist" );
// we do not automatically set this parm so that we can verify it
// before setting cr->m_siteListBuf
bool valid = true;
SafeBuf validMsg;
if ( submittedSiteList )
valid = validateSiteList (submittedSiteList,&validMsg);
// if it is a valid list of sites... broadcast it to all hosts
// so they can update cr->m_siteList with it. when they get it
// they will have to update their siteListTable hashtable so which
// we use to quickly determine if we should spider a url or not
// in Spider.cpp
if ( valid && submittedSiteList &&
// if it was too big this might say oom i guess
! g_parms.broadcastParm( submittedSiteList , "sitelist" ) ) {
// tell the browser why we failed
validMsg.safePrintf("Error distributing site list: %s",
valid = false;
// print if submitted site list is valid or not
if ( ! valid )
sb.safePrintf("<br><font color=red><b>"
, validMsg.getBufStart() );
// it is a safebuf parm
char *siteList = cr->m_siteListBuf.getBufStart();
SafeBuf msgBuf;
char *status = "";
long max = 100000;
if ( cr->m_numSiteEntries > max ) {
msgBuf.safePrintf( "<font color=red><b>"
"There are %li site entries, too many to "
"display on this web page. Please use the "
"file upload feature only for now."
, max );
status = " disabled";
char *msg2 = msgBuf.getBufStart();
if ( ! msg2 ) msg2 = "";
// now list of sites to include, or exclude
sb->safePrintf ( "List of sites to spider, one per line:"
"<textarea cols=80 rows=40%s>"
, msg2
, status
// print sites
sb->safeMemcpy ( &cr->m_siteListBuf );
//"Alternatively you can edit the local "
//"file %s/coll.%s.%li/sitelist.txt and "
//"then click this link: <a>reload file</a>. "
//"Or you can <a>upload a file</a> "
"Alternatively, you can "
"<input "
"size=20 "
"type=file "
"name=\"Upload a File\"> of "
"urls "
"to REPLACE all the urls in here now. If there "
"is an error with your submission then "
"Gigablast will tell you and not "
"perform the replacement. "
"On the command like you can issue a command like "
"gb addurls &lt; fileofurls.txt"
"</i> or "
"gb addfile &lt; *.html"
"</i> or "
"gb injecturls &lt; fileofurls.txt"
"</i> or "
"gb injectfile &lt; *.html"
"</i> or "
"to schedule downloads or inject content directly "
"into Gigablast."
// example table
"<tr><td colspan=2><center>Examples"
"</center></td></tr>" );
"<td>Spider all urls encountered. If you just enter "
"this by itself, then Gigablast will initiate spidering "
"automatically at, an internet "
"directory of good sites.</td>"
"Spider urls on and on "
"any subdomain of"
"Only spider urls beginning with "
"Only spider urls beginning with "
"Only spider urls from a subdomain of "
"and only using the http, not https, protocol."
"Only spider the single url"
"Spider urls on any subdomain of AND "
"in the /mydir/ directory or subdirectory thereof."
"<td> *boots*</td>"
"Spider urls on any subdomain of AND "
"in the /mydir/ directory or subdirectory thereof "
"AND with the word boots somewhere in the url."
"<td> *boots$</td>"
"Spider urls on any subdomain of AND "
"in the /mydir/ directory or subdirectory thereof "
"AND ENDING in the word boots."
"Spider all local files in the specified directory."
"<td>Exclude all pages from</td>"
"Advanced users only. "
"Tag all urls from with <i>mytag</i> "
"which can be used like <i>tag:mytag</i> in the "
"<a href=/scheduler>spider scheduler</a> for more "
"precise spidering control over url subsets."