more misc updates.

2024-10-04 04:07:13 +03:00 · 2014-04-05 18:09:04 -07:00 · 2014-04-05 18:09:04 -07:00 · ac5cf7971b
commit ac5cf7971b
parent bd82145626
12 changed files with 435 additions and 680 deletions
--- a/Collectiondb.cpp
+++ b/Collectiondb.cpp
@ -138,6 +138,19 @@ bool Collectiondb::loadAllCollRecs ( ) {
 		if ( ! addExistingColl ( coll , collnum ) )
 			return false;
 	}
+	// if no existing recs added... add coll.main.0 always at startup
+	if ( m_numRecs == 0 ) {
+		log("admin: adding main collection.");
+		addNewColl ( "main",
+			     0 , // customCrawl ,
+			     NULL, 
+			     0 ,
+			     true , // bool saveIt ,
+			     // Parms.cpp reserves this so it can be sure
+			     // to add the same collnum to every shard
+			     0 );
+	}
+		
 	// note it
 	//log(LOG_INFO,"db: Loaded data for %li collections. Ranging from "
 	//    "collection #0 to #%li.",m_numRecsUsed,m_numRecs-1);
@ -1840,31 +1853,183 @@ void CollectionRec::setUrlFiltersToDefaults ( ) {

 	long n = 0;

-	//strcpy(m_regExs   [n],"default");
+	/*
 	m_regExs[n].set("default");
 	m_regExs[n].nullTerm();
-	m_numRegExs++;
-
 	m_spiderFreqs     [n] = 30; // 30 days default
-	m_numRegExs2++;
-
 	m_spiderPriorities[n] = 0;
-	m_numRegExs3++;
-
 	m_maxSpidersPerRule[n] = 99;
-	m_numRegExs10++;
-
 	m_spiderIpWaits[n] = 1000;
-	m_numRegExs5++;
-
 	m_spiderIpMaxSpiders[n] = 7;
-	m_numRegExs6++;
-
-	//m_spidersEnabled[n] = 1;
-	//m_numRegExs7++;
-
 	m_harvestLinks[n] = 1;
-	m_numRegExs8++;
+	*/
+
+	m_regExs[n].set("isdocidbased");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 0; // 30 days default
+	m_maxSpidersPerRule  [n] = 99; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 80;
+	n++;
+
+	m_regExs[n].set("ismedia");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 0; // 30 days default
+	m_maxSpidersPerRule  [n] = 99; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = -3; // delete!
+	n++;
+
+	m_regExs[n].set("errorcount>=3 && hastmperror");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 1; // 30 days default
+	m_maxSpidersPerRule  [n] = 1; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 3;
+	n++;
+
+	m_regExs[n].set("errorcount>=1 && hastmperror");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 1; // 30 days default
+	m_maxSpidersPerRule  [n] = 1; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 45;
+	n++;
+
+	m_regExs[n].set("isaddurl");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 7; // 30 days default
+	m_maxSpidersPerRule  [n] = 99; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 85;
+	n++;
+
+	m_regExs[n].set("hopcount==0 && iswww && isnew");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 7; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 50;
+	n++;
+
+	m_regExs[n].set("hopcount==0 && iswww");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 7; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 48;
+	n++;
+
+	m_regExs[n].set("hopcount==0 && isnew");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 7; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 49;
+	n++;
+
+	m_regExs[n].set("hopcount==0");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 10; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 47;
+	n++;
+
+	m_regExs[n].set("hopcount==1 && isnew");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 20; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 40;
+	n++;
+
+	m_regExs[n].set("hopcount==1");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 20; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 39;
+	n++;
+
+	m_regExs[n].set("hopcount==2 && isnew");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 40; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 30;
+	n++;
+
+	m_regExs[n].set("hopcount==2");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 40; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 29;
+	n++;
+
+	m_regExs[n].set("hopcount>=3 && isnew");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 60; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 20;
+	n++;
+
+	m_regExs[n].set("hopcount>=3");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 60; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 19;
+	n++;
+
+	m_regExs[n].set("isnew");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 30; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 2;
+	n++;
+
+	m_regExs[n].set("default");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 30; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 1;
+	n++;
+
+
+	m_numRegExs   = n;
+	m_numRegExs2  = n;
+	m_numRegExs3  = n;
+	m_numRegExs10 = n;
+	m_numRegExs5  = n;
+	m_numRegExs6  = n;
+	m_numRegExs8  = n;
+
+	// more rules
+
+
+

 	//m_spiderDiffbotApiNum[n] = 1;
 	//m_numRegExs11++;
--- a/HttpRequest.h
+++ b/HttpRequest.h
@ -28,10 +28,14 @@
 #include "TcpSocket.h"

 // values for HttpRequest::m_replyFormat
-#define FORMAT_HTML 0
-#define FORMAT_XML  1
-#define FORMAT_JSON 2
-#define FORMAT_CSV  3
+#define FORMAT_HTML 1
+#define FORMAT_XML  2
+#define FORMAT_JSON 3
+#define FORMAT_CSV  4
+#define FORMAT_TXT  5
+#define FORMAT_PROCOG 6
+
+

 class HttpRequest {

--- a/PageBasic.cpp
+++ b/PageBasic.cpp
@ -156,7 +156,7 @@ bool updateSiteList ( collnum_t collnum , bool addSeeds ) {

 	Url u;

-	for ( ; *pn ; pn++ , lineNum++ ) {
+	for ( ; *pn ; lineNum++ ) {

 		// get end
 		char *s = pn;
@ -169,6 +169,9 @@ bool updateSiteList ( collnum_t collnum , bool addSeeds ) {
 	        char *pe = pn;
 		for ( ; pe > s && is_wspace_a(pe[-1]) ; pe-- );

+		// advance over '\n' for next line
+		if ( *pn && *pn == '\n' ) pn++;
+
 		// make hash of the line
 		long h32 = hash32 ( s , pe - s );

@ -728,6 +731,7 @@ bool sendPageBasicStatus ( TcpSocket *socket , HttpRequest *hr ) {

 	char  buf [ 128000 ];
 	SafeBuf sb(buf,128000);
+	sb.reset();

 	char *fs = hr->getString("format",NULL,NULL);
 	char fmt = FORMAT_HTML;
@ -761,7 +765,7 @@ bool sendPageBasicStatus ( TcpSocket *socket , HttpRequest *hr ) {
 	//
 	// show stats
 	//
-	if ( fmt == FMT_HTML ) {
+	if ( fmt == FORMAT_HTML ) {

 		char *seedStr = cr->m_diffbotSeeds.getBufStart();
 		if ( ! seedStr ) seedStr = "";
@ -773,43 +777,17 @@ bool sendPageBasicStatus ( TcpSocket *socket , HttpRequest *hr ) {
 		long sentAlert = (long)ci->m_sentCrawlDoneAlert;
 		if ( sentAlert ) sentAlert = 1;

-		sb.safePrintf(
+		//sb.safePrintf(
+		//	      "<form method=get action=/crawlbot>"
+		//	      "%s"
+		//	      , sb.getBufStart() // hidden input token/name/..
+		//	      );

-			      "<form method=get action=/crawlbot>"
-			      "%s"
-			      , sb.getBufStart() // hidden input token/name/..
-			      );
 		sb.safePrintf("<TABLE border=0>"
 			      "<TR><TD valign=top>"

 			      "<table border=0 cellpadding=5>"

-			      //
-			      "<tr>"
-			      "<td><b>Crawl Name:</td>"
-			      "<td>%s</td>"
-			      "</tr>"
-
-			      "<tr>"
-			      "<td><b>Crawl Type:</td>"
-			      "<td>%li</td>"
-			      "</tr>"
-
-			      //"<tr>"
-			      //"<td><b>Collection Alias:</td>"
-			      //"<td>%s%s</td>"
-			      //"</tr>"
-
-			      "<tr>"
-			      "<td><b>Token:</td>"
-			      "<td>%s</td>"
-			      "</tr>"
-
-			      "<tr>"
-			      "<td><b>Seeds:</td>"
-			      "<td>%s</td>"
-			      "</tr>"
-
 			      "<tr>"
 			      "<td><b>Crawl Status:</td>"
 			      "<td>%li</td>"
@ -820,10 +798,10 @@ bool sendPageBasicStatus ( TcpSocket *socket , HttpRequest *hr ) {
 			      "<td>%s</td>"
 			      "</tr>"

-			      "<tr>"
-			      "<td><b>Rounds Completed:</td>"
-			      "<td>%li</td>"
-			      "</tr>"
+			      //"<tr>"
+			      //"<td><b>Rounds Completed:</td>"
+			      //"<td>%li</td>"
+			      //"</tr>"

 			      "<tr>"
 			      "<td><b>Has Urls Ready to Spider:</td>"
@ -837,11 +815,6 @@ bool sendPageBasicStatus ( TcpSocket *socket , HttpRequest *hr ) {
 			      //"<td>%lli</td>"
 			      //"</tr>"

-			      "<tr>"
-			      "<td><b>Objects Found</b></td>"
-			      "<td>%lli</td>"
-			      "</tr>"
-
 			      "<tr>"
 			      "<td><b>URLs Harvested</b> (inc. dups)</td>"
 			      "<td>%lli</td>"
@ -868,37 +841,11 @@ bool sendPageBasicStatus ( TcpSocket *socket , HttpRequest *hr ) {
 			      "<td>%lli</td>"
 			      "</tr>"

-			      "<tr>"
-			      "<td><b>Page Process Attempts</b></td>"
-			      "<td>%lli</td>"
-			      "</tr>"
-
-			      "<tr>"
-			      "<td><b>Page Process Successes</b></td>"
-			      "<td>%lli</td>"
-			      "</tr>"
-
-			      "<tr>"
-			      "<td><b>Page Process Successes This Round</b></td>"
-			      "<td>%lli</td>"
-			      "</tr>"
-
-			      
-			      , cr->m_diffbotCrawlName.getBufStart()
-			      
-			      , (long)cr->m_isCustomCrawl
-
-			      , cr->m_diffbotToken.getBufStart()
-
-			      , seedStr
-
 			      , crawlStatus
 			      , tmp.getBufStart()
-			      , cr->m_spiderRoundNum
+			      //, cr->m_spiderRoundNum
 			      , cr->m_globalCrawlInfo.m_hasUrlsReadyToSpider

-			      , cr->m_globalCrawlInfo.m_objectsAdded -
-			        cr->m_globalCrawlInfo.m_objectsDeleted
 			      , cr->m_globalCrawlInfo.m_urlsHarvested
 			      //, cr->m_globalCrawlInfo.m_urlsConsidered

@ -906,16 +853,13 @@ bool sendPageBasicStatus ( TcpSocket *socket , HttpRequest *hr ) {
 			      , cr->m_globalCrawlInfo.m_pageDownloadSuccesses
 			      , cr->m_globalCrawlInfo.m_pageDownloadSuccessesThisRound

-			      , cr->m_globalCrawlInfo.m_pageProcessAttempts
-			      , cr->m_globalCrawlInfo.m_pageProcessSuccesses
-			      , cr->m_globalCrawlInfo.m_pageProcessSuccessesThisRound
 			      );

 	}

-	if ( fmt != FORMAT_JSON )
-		// wrap up the form, print a submit button
-		g_pages.printAdminBottom ( &sb );
+	//if ( fmt != FORMAT_JSON )
+	//	// wrap up the form, print a submit button
+	//	g_pages.printAdminBottom ( &sb );

 	return g_httpServer.sendDynamicPage (socket, 
 					     sb.getBufStart(), 
--- a/PageCrawlBot.cpp
+++ b/PageCrawlBot.cpp
@ -25,11 +25,11 @@
 #include "Parms.h"

 // so user can specify the format of the reply/output
-#define FMT_HTML 1
-#define FMT_XML  2
-#define FMT_JSON 3
-#define FMT_CSV  4
-#define FMT_TXT  5
+//#define FMT_HTML 1
+//#define FMT_XML  2
+//#define FMT_JSON 3
+//#define FMT_CSV  4
+//#define FMT_TXT  5

 void doneSendingWrapper ( void *state , TcpSocket *sock ) ;
 bool sendBackDump ( TcpSocket *s,HttpRequest *hr );
@ -158,25 +158,25 @@ bool sendBackDump ( TcpSocket *sock, HttpRequest *hr ) {

 	if ( ( xx = strstr ( path , "_data.json" ) ) ) {
 		rdbId = RDB_TITLEDB;
-		fmt = FMT_JSON;
+		fmt = FORMAT_JSON;
 		downloadJSON = true;
 	}
 	else if ( ( xx = strstr ( path , "_data.csv" ) ) ) {
 		rdbId = RDB_TITLEDB;
 		downloadJSON = true;
-		fmt = FMT_CSV;
+		fmt = FORMAT_CSV;
 	}
 	else if ( ( xx = strstr ( path , "_urls.csv" ) ) ) {
 		rdbId = RDB_SPIDERDB;
-		fmt = FMT_CSV;
+		fmt = FORMAT_CSV;
 	}
 	else if ( ( xx = strstr ( path , "_urls.txt" ) ) ) {
 		rdbId = RDB_SPIDERDB;
-		fmt = FMT_TXT;
+		fmt = FORMAT_TXT;
 	}
 	else if ( ( xx = strstr ( path , "_pages.txt" ) ) ) {
 		rdbId = RDB_TITLEDB;
-		fmt = FMT_TXT;
+		fmt = FORMAT_TXT;
 	}

 	// sanity, must be one of 3 download calls
@ -213,7 +213,7 @@ bool sendBackDump ( TcpSocket *sock, HttpRequest *hr ) {

 	// . if doing download of csv, make it search results now!
 	// . make an httprequest on stack and call it
-	if ( fmt == FMT_CSV && rdbId == RDB_TITLEDB ) {
+	if ( fmt == FORMAT_CSV && rdbId == RDB_TITLEDB ) {
 		char tmp2[5000];
 		SafeBuf sb2(tmp2,5000);
 		long dr = 1;
@ -247,7 +247,7 @@ bool sendBackDump ( TcpSocket *sock, HttpRequest *hr ) {

 	// . if doing download of json, make it search results now!
 	// . make an httprequest on stack and call it
-	if ( fmt == FMT_JSON && rdbId == RDB_TITLEDB ) {
+	if ( fmt == FORMAT_JSON && rdbId == RDB_TITLEDB ) {
 		char tmp2[5000];
 		SafeBuf sb2(tmp2,5000);
 		long dr = 1;
@ -514,13 +514,13 @@ bool StateCD::sendList ( ) {
 	//sb.setLabel("dbotdmp");

 	char *ct = "text/csv";
-	if ( m_fmt == FMT_JSON )
+	if ( m_fmt == FORMAT_JSON )
 		ct = "application/json";
-	if ( m_fmt == FMT_XML )
+	if ( m_fmt == FORMAT_XML )
 		ct = "text/xml";
-	if ( m_fmt == FMT_TXT )
+	if ( m_fmt == FORMAT_TXT )
 		ct = "text/plain";
-	if ( m_fmt == FMT_CSV )
+	if ( m_fmt == FORMAT_CSV )
 		ct = "text/csv";

 	// . if we haven't yet sent an http mime back to the user
@ -545,13 +545,13 @@ bool StateCD::sendList ( ) {

 	//CollectionRec *cr = g_collectiondb.getRec ( m_collnum );

-	if ( ! m_printedFirstBracket && m_fmt == FMT_JSON ) {
+	if ( ! m_printedFirstBracket && m_fmt == FORMAT_JSON ) {
 		sb.safePrintf("[\n");
 		m_printedFirstBracket = true;
 	}

 	// these are csv files not xls
-	//if ( ! m_printedFirstBracket && m_fmt == FMT_CSV ) {
+	//if ( ! m_printedFirstBracket && m_fmt == FORMAT_CSV ) {
 	//	sb.safePrintf("sep=,\n");
 	//	m_printedFirstBracket = true;
 	//}
@ -638,7 +638,7 @@ bool StateCD::sendList ( ) {
 		// use this for printing out urls.csv as well...
 		m_printedEndingBracket = true;
 		// end array of json objects. might be empty!
-		if ( m_rdbId == RDB_TITLEDB && m_fmt == FMT_JSON )
+		if ( m_rdbId == RDB_TITLEDB && m_fmt == FORMAT_JSON )
 			sb.safePrintf("\n]\n");
 		//log("adding ]. len=%li",sb.length());
 		// i'd like to exit streaming mode here. i fixed tcpserver.cpp
@ -853,7 +853,7 @@ void StateCD::printSpiderdbList ( RdbList *list,SafeBuf *sb,char **lastKeyPtr){
 		}

 		// "csv" is default if json not specified
-		if ( m_fmt == FMT_JSON ) 
+		if ( m_fmt == FORMAT_JSON ) 
 			sb->safePrintf("[{"
 				       "{\"url\":"
 				       "\"%s\"},"
@ -997,7 +997,7 @@ void StateCD::printTitledbList ( RdbList *list,SafeBuf *sb,char **lastKeyPtr){

 		// if not json, just print the json item out in csv
 		// moved into PageResults.cpp...
-		//if ( m_fmt == FMT_CSV ) {
+		//if ( m_fmt == FORMAT_CSV ) {
 		//	printJsonItemInCsv ( json , sb );
 		//	continue;
 		//}
@ -1337,7 +1337,7 @@ bool sendReply2 (TcpSocket *socket , long fmt , char *msg ) {

 	// send this back to browser
 	SafeBuf sb;
-	if ( fmt == FMT_JSON ) {
+	if ( fmt == FORMAT_JSON ) {
 		sb.safePrintf("{\n\"response\":\"success\",\n"
 			      "\"message\":\"%s\"\n}\n"
 			      , msg );
@ -1368,7 +1368,7 @@ bool sendErrorReply2 ( TcpSocket *socket , long fmt , char *msg ) {

 	// send this back to browser
 	SafeBuf sb;
-	if ( fmt == FMT_JSON ) {
+	if ( fmt == FORMAT_JSON ) {
 		sb.safePrintf("{\"error\":\"%s\"}\n"
 			      , msg );
 		ct = "application/json";
@ -1476,7 +1476,7 @@ void injectedUrlWrapper ( void *state ) {

 	// send back the html or json response?
 	SafeBuf *response = &sb;
-	if ( st->m_fmt == FMT_JSON ) response = &js;
+	if ( st->m_fmt == FORMAT_JSON ) response = &js;

 	// . this will call g_httpServer.sendReply()
 	// . pass it in the injection response, "sb"
@ -1673,7 +1673,7 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
 	// . now show stats for the current crawl
 	// . put in xml or json if format=xml or format=json or
 	//   xml=1 or json=1 ...
-	char fmt = FMT_JSON;
+	char fmt = FORMAT_JSON;

 	// token is always required. get from json or html form input
 	//char *token = getInputString ( "token" );
@ -1693,21 +1693,21 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
 			name++;
 		}
 		// change default formatting to html
-		fmt = FMT_HTML;
+		fmt = FORMAT_HTML;
 	}


 	char *fs = hr->getString("format",NULL,NULL);
 	// give john a json api
-	if ( fs && strcmp(fs,"html") == 0 ) fmt = FMT_HTML;
-	if ( fs && strcmp(fs,"json") == 0 ) fmt = FMT_JSON;
-	if ( fs && strcmp(fs,"xml") == 0 ) fmt = FMT_XML;
+	if ( fs && strcmp(fs,"html") == 0 ) fmt = FORMAT_HTML;
+	if ( fs && strcmp(fs,"json") == 0 ) fmt = FORMAT_JSON;
+	if ( fs && strcmp(fs,"xml") == 0 ) fmt = FORMAT_XML;
 	// if we got json as input, give it as output
-	//if ( JS.getFirstItem() ) fmt = FMT_JSON;
+	//if ( JS.getFirstItem() ) fmt = FORMAT_JSON;



-	if ( ! token && fmt == FMT_JSON ) { // (cast==0|| fmt == FMT_JSON ) ) {
+	if ( ! token && fmt == FORMAT_JSON ) { // (cast==0|| fmt == FORMAT_JSON ) ) {
 		char *msg = "invalid token";
 		return sendErrorReply2 (socket,fmt,msg);
 	}
@ -1772,7 +1772,7 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
 	//}

 	// just send back a list of all the collections after the delete
-	//if ( delColl && cast && fmt == FMT_JSON ) {
+	//if ( delColl && cast && fmt == FORMAT_JSON ) {
 	//	char *msg = "Collection deleted.";
 	//	return sendReply2 (socket,fmt,msg);
 	//}
@ -2263,7 +2263,7 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
 /*
 bool printUrlFilters ( SafeBuf &sb , CollectionRec *cr , long fmt ) {

-	if ( fmt == FMT_JSON )
+	if ( fmt == FORMAT_JSON )
 		sb.safePrintf("\"urlFilters\":[");

 	// skip first filters that are:
@ -2303,7 +2303,7 @@ bool printUrlFilters ( SafeBuf &sb , CollectionRec *cr , long fmt ) {
 		// urls higher spider priority, so skip it
 		if ( strncmp(expression,"ismanualadd && ",15) == 0 )
 			continue;
-		if ( fmt == FMT_HTML ) {
+		if ( fmt == FORMAT_HTML ) {
 			sb.safePrintf("<tr>"
 				      "<td>Expression "
 				      "<input type=text "
@ -2328,7 +2328,7 @@ bool printUrlFilters ( SafeBuf &sb , CollectionRec *cr , long fmt ) {
 		sb.pushChar('\n');
 	}

-	if ( fmt == FMT_JSON ) {
+	if ( fmt == FORMAT_JSON ) {
 		// remove trailing comma
 		sb.removeLastChar('\n');
 		sb.removeLastChar(',');
@ -2519,7 +2519,7 @@ bool printCrawlDetailsInJson ( SafeBuf *sb , CollectionRec *cx ) {
 	  true // isJSON?
 	  );
 	*/
-	//printUrlFilters ( sb , cx , FMT_JSON );
+	//printUrlFilters ( sb , cx , FORMAT_JSON );
 	// end that collection rec
 	sb->safePrintf("}\n");

@ -2537,7 +2537,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 	// store output into here
 	SafeBuf sb;

-	if ( fmt == FMT_HTML )
+	if ( fmt == FORMAT_HTML )
 		sb.safePrintf(
 			      "<html>"
 			      "<title>Crawlbot - "
@ -2573,7 +2573,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 	lb.urlEncode(name);
 	lb.safePrintf ("&token=");
 	lb.urlEncode(token);
-	if ( fmt == FMT_HTML ) lb.safePrintf("&format=html");
+	if ( fmt == FORMAT_HTML ) lb.safePrintf("&format=html");
 	lb.nullTerm();
 	

@ -2590,7 +2590,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 	//}
 			

-	if ( fmt == FMT_HTML ) {
+	if ( fmt == FORMAT_HTML ) {
 		sb.safePrintf("<table border=0>"
 			      "<tr><td>"
 			      "<b><font size=+2>"
@ -2645,7 +2645,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 	//
 	// print list of collections controlled by this token
 	//
-	for ( long i = 0 ; fmt == FMT_HTML && i<g_collectiondb.m_numRecs;i++ ){
+	for ( long i = 0 ; fmt == FORMAT_HTML && i<g_collectiondb.m_numRecs;i++ ){
 		CollectionRec *cx = g_collectiondb.m_recs[i];
 		if ( ! cx ) continue;
 		// get its token if any
@ -2677,19 +2677,19 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 			sb.safePrintf("</font></b>");
 	}

-	if ( fmt == FMT_HTML )
+	if ( fmt == FORMAT_HTML )
 		sb.safePrintf ( "</center><br/>" );

 	// the ROOT JSON [
-	if ( fmt == FMT_JSON )
+	if ( fmt == FORMAT_JSON )
 		sb.safePrintf("{\n");

 	// injection is currently not in use, so this is an artifact:
-	if ( fmt == FMT_JSON && injectionResponse )
+	if ( fmt == FORMAT_JSON && injectionResponse )
 		sb.safePrintf("\"response\":\"%s\",\n\n"
 			      , injectionResponse->getBufStart() );

-	if ( fmt == FMT_JSON && urlUploadResponse )
+	if ( fmt == FORMAT_JSON && urlUploadResponse )
 		sb.safePrintf("\"response\":\"%s\",\n\n"
 			      , urlUploadResponse->getBufStart() );

@ -2702,14 +2702,14 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,

 	// the items in the array now have type:bulk or type:crawl
 	// so call them 'jobs'
-	if ( fmt == FMT_JSON )
+	if ( fmt == FORMAT_JSON )
 		sb.safePrintf("\"jobs\":[");//\"collections\":");

 	long summary = hr->getLong("summary",0);
 	// enter summary mode for json
-	if ( fmt != FMT_HTML ) summary = 1;
+	if ( fmt != FORMAT_HTML ) summary = 1;
 	// start the table
-	if ( summary && fmt == FMT_HTML ) {
+	if ( summary && fmt == FORMAT_HTML ) {
 		sb.safePrintf("<table border=1 cellpadding=5>"
 			      "<tr>"
 			      "<td><b>Collection</b></td>"
@ -2740,11 +2740,11 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,


 		// just print out single crawl info for json
-		if ( fmt != FMT_HTML && cx != cr && name3 ) 
+		if ( fmt != FORMAT_HTML && cx != cr && name3 ) 
 			continue;

 		// if json, print each collectionrec
-		if ( fmt == FMT_JSON ) {
+		if ( fmt == FORMAT_JSON ) {
 			if ( ! firstOne ) 
 				sb.safePrintf(",\n\t");
 			firstOne = false;
@ -2786,7 +2786,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 			      , cx->m_globalCrawlInfo.m_pageProcessSuccessesThisRound
 			      );
 	}
-	if ( summary && fmt == FMT_HTML ) {
+	if ( summary && fmt == FORMAT_HTML ) {
 		sb.safePrintf("</table></html>" );
 		return g_httpServer.sendDynamicPage (socket, 
 						     sb.getBufStart(), 
@ -2794,7 +2794,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 						     0); // cachetime
 	}

-	if ( fmt == FMT_JSON ) 
+	if ( fmt == FORMAT_JSON ) 
 		// end the array of collection objects
 		sb.safePrintf("\n]\n");

@ -2808,7 +2808,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 	//
 	// show urls being crawled (ajax) (from Spider.cpp)
 	//
-	if ( fmt == FMT_HTML ) {
+	if ( fmt == FORMAT_HTML ) {
 		sb.safePrintf ( "<table width=100%% cellpadding=5 "
 				"style=border-width:1px;border-style:solid;"
 				"border-color:black;>"
@ -2879,7 +2879,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 	rand64 |=  r2;


-	if ( fmt == FMT_HTML ) {
+	if ( fmt == FORMAT_HTML ) {
 		sb.safePrintf("<br>"
 			      "<table border=0 cellpadding=5>"
 			      
@ -2952,12 +2952,12 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 			      );
 	}

-	if ( injectionResponse && fmt == FMT_HTML )
+	if ( injectionResponse && fmt == FORMAT_HTML )
 		sb.safePrintf("<br><font size=-1>%s</font>\n"
 			      ,injectionResponse->getBufStart() 
 			      );

-	if ( fmt == FMT_HTML )
+	if ( fmt == FORMAT_HTML )
 		sb.safePrintf(//"<input type=hidden name=c value=\"%s\">"
 			      //"<input type=hidden name=crawlbotapi value=1>"
 			      "</td>"
@ -2996,7 +2996,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 	//
 	// show stats
 	//
-	if ( fmt == FMT_HTML ) {
+	if ( fmt == FORMAT_HTML ) {

 		char *seedStr = cr->m_diffbotSeeds.getBufStart();
 		if ( ! seedStr ) seedStr = "";
@ -3654,7 +3654,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,


 	// xml or json does not show the input boxes
-	//if ( format != FMT_HTML ) 
+	//if ( format != FORMAT_HTML ) 
 	//	return g_httpServer.sendDynamicPage ( s, 
 	//					      sb.getBufStart(), 
 	//					      sb.length(),
@ -3677,7 +3677,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 		s2 = "";
 	}

-	if ( fmt == FMT_HTML )
+	if ( fmt == FORMAT_HTML )
 		sb.safePrintf(
 			      
 			      "<a onclick="
@ -3721,7 +3721,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 	//
 	// print url filters. HACKy...
 	//
-	if ( fmt == FMT_HTML )
+	if ( fmt == FORMAT_HTML )
 		g_parms.sendPageGeneric ( socket ,
 					  hr ,
 					  PAGE_FILTERS ,
@ -3732,7 +3732,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 	//
 	// end HACKy hack
 	//
-	if ( fmt == FMT_HTML )
+	if ( fmt == FORMAT_HTML )
 		sb.safePrintf(
 			      "</form>"
 			      "</div>"
@ -3760,7 +3760,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 	//
 	// show simpler url filters table
 	//
-	if ( fmt == FMT_HTML ) {
+	if ( fmt == FORMAT_HTML ) {
 		/*
 		sb.safePrintf ( "<table>"
 				"<tr><td colspan=2>"
@ -3796,7 +3796,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 	//
 	// show reset and delete crawl buttons
 	//
-	if ( fmt == FMT_HTML ) {
+	if ( fmt == FORMAT_HTML ) {
 		sb.safePrintf(
 			      "<table cellpadding=5>"
 			      "<tr>"
@ -3859,13 +3859,13 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,


 	// the ROOT JSON }
-	if ( fmt == FMT_JSON )
+	if ( fmt == FORMAT_JSON )
 		sb.safePrintf("}\n");

 	char *ct = "text/html";
-	if ( fmt == FMT_JSON ) ct = "application/json";
-	if ( fmt == FMT_XML ) ct = "text/xml";
-	if ( fmt == FMT_CSV ) ct = "text/csv";
+	if ( fmt == FORMAT_JSON ) ct = "application/json";
+	if ( fmt == FORMAT_XML ) ct = "text/xml";
+	if ( fmt == FORMAT_CSV ) ct = "text/csv";

 	// this could be in html json or xml
 	return g_httpServer.sendDynamicPage ( socket, 
@ -4142,7 +4142,7 @@ bool setSpiderParmsFromJSONPost ( TcpSocket *socket ,
 	char *json = hr->getString("json");
 	if ( ! json ) 
 		return sendReply2 ( socket, 
-				    FMT_JSON,
+				    FORMAT_JSON,
 				    "No &json= provided in request.");


@ -4151,12 +4151,12 @@ bool setSpiderParmsFromJSONPost ( TcpSocket *socket ,

 	// wtf?
 	if ( ! status ) 
-		return sendReply2 ( socket, FMT_JSON,
+		return sendReply2 ( socket, FORMAT_JSON,
 				    "Error with JSON parser.");

 	// error adding it?
 	if ( ! cr )
-		return sendReply2 ( socket,FMT_JSON,
+		return sendReply2 ( socket,FORMAT_JSON,
 				    "Failed to create new collection.");

 	ji = JP.getFirstItem();
--- a/PageRoot.cpp
+++ b/PageRoot.cpp
@ -169,6 +169,11 @@ bool printWebHomePage ( SafeBuf &sb , HttpRequest *r ) {
 	sb.safePrintf("</form>\n");
 	sb.safePrintf("<br>\n");
 	sb.safePrintf("\n");
+
+	// print any red boxes we might need to
+	if ( printRedBox2 ( &sb , true ) )
+		sb.safePrintf("<br>\n");
+
 	sb.safePrintf("<table cellpadding=3>\n");
 	sb.safePrintf("\n");

--- a/Pages.cpp
+++ b/Pages.cpp
@ -50,6 +50,9 @@ static WebPage s_pages[] = {
 	  "dummy page - if set in the users row then user will have master=0 and "
 	  " collection links will be highlighted in red",
 	  NULL, 0 },  
+
+
+
 	//{ PAGE_QUALITY         , "quality",     0, "quality",  0, 0,
 	//  "dummy page - if set in the users row then  \"Quality Control\""
 	//  " will be printed besides the logo for certain pages",
@ -102,12 +105,66 @@ static WebPage s_pages[] = {
 	//  "Basic diffbot page.",  sendPageBasicDiffbot  , 0 } ,
 	{ PAGE_BASIC_SECURITY, "admin/security", 0 , "security",1, 0 , 
 	  "Basic security page.", sendPageGeneric  , 0 } ,
+	{ PAGE_BASIC_SEARCH, "", 0 , "search",1, 0 , 
+	  "Basic search page.", sendPageRoot  , 0 } ,
+


 	{ PAGE_MASTER    , "admin/master"  , 0 , "master controls" ,  1 , 0 , 
 	  //USER_MASTER | USER_PROXY ,
 	  "master controls page",
 	  sendPageGeneric  , 0 } ,
+	{ PAGE_SEARCH    , "admin"   , 0 , "search controls" ,  1 , 1,
+	  //USER_ADMIN | USER_MASTER   , 
+	  "search controls page",
+	  sendPageGeneric  , 0 } ,
+	{ PAGE_SPIDER    , "admin/spider"   , 0 , "spider controls" ,  1 , 0,
+	  //USER_ADMIN | USER_MASTER | USER_PROXY   ,
+	  "spider controls page",
+	  sendPageGeneric  , 0 } ,
+	{ PAGE_LOG       , "admin/log"     , 0 , "log controls"     ,  1 , 0 ,
+	  //USER_MASTER | USER_PROXY,
+	  "log page",
+	  sendPageGeneric  , 0 } ,
+	{ PAGE_SECURITY, "admin/security2", 0 , "security"     ,  1 , 0 ,
+	  //USER_MASTER | USER_PROXY ,
+	  "advanced security page",
+	  sendPageGeneric , 0 } ,
+	{ PAGE_ADDCOLL   , "admin/addcoll" , 0 , "add collection"  ,  1 , 0 ,
+	  //USER_MASTER , 
+	  "add a new collection using this page",
+	  sendPageAddColl  , 0 } ,
+	{ PAGE_DELCOLL   , "admin/delcoll" , 0 , "delete collections" ,  1 ,0,
+	  //USER_MASTER , 
+	  "delete a collection using this page",
+	  sendPageDelColl  , 0 } ,
+	{ PAGE_REPAIR    , "admin/repair"   , 0 , "repair" ,  1 , 0 ,
+	  //USER_MASTER ,
+	  "repair page",
+	  sendPageGeneric   , 0 },
+	{ PAGE_SITES   , "admin/sites", 0 , "site list" ,  1 , 1,
+	  "what sites can be spidered",
+	  sendPageGeneric , 0 } , // sendPageBasicSettings
+	{ PAGE_FILTERS   , "admin/filters", 0 , "url filters" ,  1 , 1,
+	  //USER_ADMIN | USER_MASTER   , 
+	  "prioritize urls for spidering",
+	  sendPageGeneric  , 0 } ,
+	{ PAGE_INJECT    , "admin/inject"   , 0 , "inject url" ,  0 , 1 ,
+	  //USER_ADMIN | USER_MASTER   ,
+	  "inject url in the index here",
+	  sendPageInject   , 2 } ,
+	// this is the addurl page the the admin!
+	{ PAGE_ADDURL2   , "admin/addurl"   , 0 , "add urls" ,  0 , 0 ,
+	  "add url page for admin",
+	  sendPageAddUrl2   , 0 } ,
+	{ PAGE_REINDEX   , "admin/reindex"  , 0 , "query reindex" ,  0 , 0 ,
+	  //USER_ADMIN | USER_MASTER, 
+	  "reindex url page",
+	  sendPageReindex  , 0 } ,
+
+
+
+

 	{ PAGE_HOSTS     , "admin/hosts"   , 0 , "hosts" ,  0 , 0 ,
 	  //USER_MASTER | USER_PROXY,
@ -134,10 +191,7 @@ static WebPage s_pages[] = {
 	  //USER_MASTER | USER_PROXY,
 	  "sockets page",
 	  sendPageSockets  , 0 } ,
-	{ PAGE_LOG       , "admin/log"     , 0 , "log controls"     ,  1 , 0 ,
-	  //USER_MASTER | USER_PROXY,
-	  "log page",
-	  sendPageGeneric  , 0 } ,
+
 	{ PAGE_LOGVIEW    , "admin/logview"   , 0 , "log view" ,  0 , 0 ,
 	  //USER_MASTER ,  
 	  "logview page",
@ -147,18 +201,6 @@ static WebPage s_pages[] = {
 //	  "sync page",
 //	  sendPageGeneric  , 0 } ,

-	{ PAGE_SECURITY, "admin/security2", 0 , "security"     ,  1 , 0 ,
-	  //USER_MASTER | USER_PROXY ,
-	  "advanced security page",
-	  sendPageGeneric , 0 } ,
-	{ PAGE_ADDCOLL   , "admin/addcoll" , 0 , "add collection"  ,  1 , 0 ,
-	  //USER_MASTER , 
-	  "add a new collection using this page",
-	  sendPageAddColl  , 0 } ,
-	{ PAGE_DELCOLL   , "admin/delcoll" , 0 , "delete collections" ,  1 ,0,
-	  //USER_MASTER , 
-	  "delete a collection using this page",
-	  sendPageDelColl  , 0 } ,
 	{ PAGE_AUTOBAN    ,"admin/autoban" , 0 , "autoban" ,  1 , 1 ,
 	  //USER_MASTER | USER_PROXY , 
 	  "autobanned ips",
@ -175,10 +217,6 @@ static WebPage s_pages[] = {
 	  //USER_MASTER ,
 	  "threads page",
 	  sendPageThreads  , 0 },
-	{ PAGE_REPAIR    , "admin/repair"   , 0 , "repair" ,  1 , 0 ,
-	  //USER_MASTER ,
-	  "repair page",
-	  sendPageGeneric   , 0 },
 	//{ PAGE_THESAURUS, "admin/thesaurus",    0 , "thesaurus", 0 , 0 ,
        //  //USER_MASTER ,
 	//  "thesaurus page",
@ -207,14 +245,6 @@ static WebPage s_pages[] = {
 	  "titledb page",
 	  sendPageTitledb  , 2 } ,
 	// 1 = usePost
-	{ PAGE_SEARCH    , "admin"   , 0 , "search controls" ,  1 , 1,
-	  //USER_ADMIN | USER_MASTER   , 
-	  "search controls page",
-	  sendPageGeneric  , 0 } ,
-	{ PAGE_SPIDER    , "admin/spider"   , 0 , "spider controls" ,  1 , 0,
-	  //USER_ADMIN | USER_MASTER | USER_PROXY   ,
-	  "spider controls page",
-	  sendPageGeneric  , 0 } ,

 	{ PAGE_CRAWLBOT    , "crawlbot"   , 0 , "crawlbot" ,  1 , 0,
 	  "simplified spider controls page",
@ -229,30 +259,6 @@ static WebPage s_pages[] = {
 	//  "spider priorities page",
 	//  sendPageGeneric  , 0 } ,

-	{ PAGE_SITES   , "admin/sites", 0 , "site list" ,  1 , 1,
-	  "what sites can be spidered",
-	  sendPageGeneric , 0 } , // sendPageBasicSettings
-
-	{ PAGE_FILTERS   , "admin/filters", 0 , "url filters" ,  1 , 1,
-	  //USER_ADMIN | USER_MASTER   , 
-	  "prioritize urls for spidering",
-	  sendPageGeneric  , 0 } ,
-
-	{ PAGE_INJECT    , "admin/inject"   , 0 , "inject url" ,  0 , 1 ,
-	  //USER_ADMIN | USER_MASTER   ,
-	  "inject url in the index here",
-	  sendPageInject   , 2 } ,
-
-	// this is the addurl page the the admin!
-	{ PAGE_ADDURL2   , "admin/addurl"   , 0 , "add urls" ,  0 , 0 ,
-	  "add url page for admin",
-	  sendPageAddUrl2   , 0 } ,
-
-	{ PAGE_REINDEX   , "admin/reindex"  , 0 , "query reindex" ,  0 , 0 ,
-	  //USER_ADMIN | USER_MASTER, 
-	  "reindex url page",
-	  sendPageReindex  , 0 } ,
-
 	//{ PAGE_KEYWORDS, "admin/queries",0,"queries" ,  0 , 1 ,
 	//  "get queries a url matches",
 	//  sendPageMatchingQueries   , 2 } ,
@ -893,8 +899,6 @@ bool Pages::getNiceness ( long page ) {
 	return s_pages[page].m_niceness;
 }

-bool printRedBox ( SafeBuf *mb ) ;
-
 ///////////////////////////////////////////////////////////
 //
 // Convenient html printing routines
@ -1056,6 +1060,7 @@ bool Pages::printAdminTop (SafeBuf     *sb   ,
 	//if ( page == PAGE_BASIC_DIFFBOT ) isBasic = true;
 	//if ( page == PAGE_BASIC_SEARCH  ) isBasic = true;
 	if ( page == PAGE_BASIC_SECURITY ) isBasic = true;
+	if ( page == PAGE_BASIC_SEARCH ) isBasic = true;

 	//
 	// print breadcrumb. main > Basic > Settings
@ -1791,7 +1796,7 @@ bool  Pages::printAdminLinks ( SafeBuf *sb,
 		// is this page basic?
 		bool pageBasic = false;
 		if ( i >= PAGE_BASIC_SETTINGS &&
-		     i <= PAGE_BASIC_SECURITY )
+		     i <= PAGE_BASIC_SEARCH )
 			pageBasic = true;

 		// print basic pages under the basic menu, advanced pages
@ -2627,9 +2632,18 @@ bool sendPageLogin ( TcpSocket *socket , HttpRequest *hr ) {
 					      NULL);// cookie
 }

+bool printRedBox2 ( SafeBuf *sb , bool isRootWebPage ) {
+	SafeBuf mb;
+	// return false if no red box
+	if ( ! printRedBox ( &mb , isRootWebPage ) ) return false;
+	// otherwise, print it
+	sb->safeStrcpy ( mb.getBufStart() );
+	// return true since we printed one
+	return true;
+}

 // emergency message box
-bool printRedBox ( SafeBuf *mb ) {
+bool printRedBox ( SafeBuf *mb , bool isRootWebPage ) {

 	PingServer *ps = &g_pingServer;

@ -2649,11 +2663,33 @@ bool printRedBox ( SafeBuf *mb ) {
 	char *boxEnd =
 		"</td></tr></table>";

-	bool adds = false;
+	long adds = 0;


 	mb->safePrintf("<div style=max-width:500px;>");

+	// are we just starting off? give them a little help.
+	CollectionRec *cr = g_collectiondb.getRec("main");
+	if ( g_collectiondb.m_numRecs == 1 && 
+	     cr &&
+	     isRootWebPage &&
+	     cr->m_globalCrawlInfo.m_pageDownloadAttempts == 0 ) {
+		if ( adds ) mb->safePrintf("<br>");
+		adds++;
+		mb->safePrintf("%s",box);
+		mb->safePrintf("Welcome to Gigablast. The most powerful "
+			       "search engine you can legally download. "
+			       "Please add the websites you want to spider "
+			       "<a href=/admin/settings?c=main>here</a>."
+			       );
+		mb->safePrintf("%s",boxEnd);
+	}
+
+	if ( isRootWebPage ) {
+		mb->safePrintf("</div>");
+		return (bool)adds;
+	}
+
 	if ( g_conf.m_numConnectIps == 0 && g_conf.m_numMasterPwds == 0 ) {
 		if ( adds ) mb->safePrintf("<br>");
 		adds++;
@ -2738,5 +2774,5 @@ bool printRedBox ( SafeBuf *mb ) {

 	mb->safePrintf("</div>");

-	return adds;
+	return (bool)adds;
 }
--- a/Pages.h
+++ b/Pages.h
@ -5,6 +5,9 @@
 #ifndef _PAGES_H_
 #define _PAGES_H_

+bool printRedBox2 ( SafeBuf *sb , bool isRootWebPage = false ) ;
+bool printRedBox ( SafeBuf *mb , bool isRootWebPage = false ) ;
+
 // for PageEvents.cpp and Accessdb.cpp
 //#define RESULTSWIDTHSTR "550px"

@ -304,25 +307,36 @@ enum {
 	//PAGE_BASIC_SEARCH , // TODO
 	//PAGE_BASIC_DIFFBOT , // TODO
 	PAGE_BASIC_SECURITY ,
+	PAGE_BASIC_SEARCH ,

 	// master admin pages
 	PAGE_MASTER      , 
+	PAGE_SEARCH      ,  
+	PAGE_SPIDER      ,
+	PAGE_LOG         ,
+	PAGE_SECURITY    ,
+	PAGE_ADDCOLL     ,	
+	PAGE_DELCOLL     , 
+	PAGE_REPAIR      ,
+	PAGE_SITES , // site filters
+	PAGE_FILTERS     ,
+	PAGE_INJECT      , 
+	PAGE_ADDURL2     ,
+	PAGE_REINDEX     ,	
+
 	PAGE_HOSTS       ,
 	PAGE_STATS       , // 10
 	PAGE_STATSDB	 ,
 	PAGE_PERF        ,
 	PAGE_SOCKETS     ,
-	PAGE_LOG         ,
+
 	PAGE_LOGVIEW     ,
 //	PAGE_SYNC        , 
-	PAGE_SECURITY    ,
-	PAGE_ADDCOLL     ,	
-	PAGE_DELCOLL     , 
 	PAGE_AUTOBAN     , // 20
 	//PAGE_SPIDERLOCKS ,
 	PAGE_PROFILER    ,
 	PAGE_THREADS     ,
-	PAGE_REPAIR      ,
+
 //	PAGE_THESAURUS   , 

 	// . non master-admin pages (collection controls)
@ -335,16 +349,9 @@ enum {
 	PAGE_TITLEDB     ,  
 	//PAGE_STATSDB	 ,

-	PAGE_SEARCH      ,  
-	PAGE_SPIDER      ,
 	PAGE_CRAWLBOT    , // 35
 	PAGE_SPIDERDB    , 
 	//PAGE_PRIORITIES  ,  // priority queue controls
-	PAGE_SITES , // site filters
-	PAGE_FILTERS     ,
-	PAGE_INJECT      , 
-	PAGE_ADDURL2     ,
-	PAGE_REINDEX     ,	
 	//PAGE_KEYWORDS    ,
 	PAGE_SEO         ,
 	PAGE_ACCESS      ,  //40	
--- a/Parms.cpp
+++ b/Parms.cpp
@ -1888,7 +1888,7 @@ bool Parms::printParm ( SafeBuf* sb,
 					"value=\"%f\" "
 					// 3 was ok on firefox but need 6
 					// on chrome
-					"size=6>",cgi,*(float *)s);
+					"size=7>",cgi,*(float *)s);
 	}
 	else if ( t == TYPE_IP ) {
 		if ( m->m_max > 0 && j == jend ) 
@ -1896,7 +1896,7 @@ bool Parms::printParm ( SafeBuf* sb,
 					"size=12>",cgi);
 		else
 			sb->safePrintf ("<input type=text name=%s value=\"%s\" "
-					"size=6>",cgi,iptoa(*(long *)s));
+					"size=12>",cgi,iptoa(*(long *)s));
 	}
 	else if ( t == TYPE_LONG ) {
 		// just show the parm name and value if printing in json
@ -7534,6 +7534,7 @@ void Parms::init ( ) {
 	m->m_flags = PF_TEXTAREA;
 	m++;

+	/*
 	// the new upload post submit button
 	m->m_title = "upload urls";
 	m->m_desc  = "Upload your file of urls.";
@ -7542,6 +7543,7 @@ void Parms::init ( ) {
 	m->m_obj   = OBJ_NONE;
 	m->m_type  = TYPE_FILEUPLOADBUTTON;
 	m++;
+	*/

 	m->m_title = "strip sessionids";
 	m->m_desc  = "Strip added urls of their session ids.";
@ -7591,6 +7593,7 @@ void Parms::init ( ) {
 	m->m_title = "site list";
 	m->m_xml   = "siteList";
 	m->m_desc  = "List of sites to spider, one per line. "
+		"See <a href=#examples>example site list</a> below. "
 		"Gigablast uses the "
 		"<a href=/admin/filters#insitelist>insitelist</a> "
 		"directive on "
@ -7599,8 +7602,7 @@ void Parms::init ( ) {
 		"that match the site patterns you specify here, other than "
 		"urls you add individually via the add urls or inject url "
 		"tools. "
-		"See <a href=#examples>example site list</a> below. "
-		"Limit list to 300MB. If you have a lot of INDIVIDUAL URLS "
+		"Limit list to 300MB. If you have a lot of INDIVIDUAL urls "
 		"to add then consider using the <a href=/admin/addurl>add "
 		"urls</a> interface.";
 	m->m_cgi   = "sitelist";
@ -7629,6 +7631,7 @@ void Parms::init ( ) {
 	m++;
 	*/

+	/*
 	// the new upload post submit button
 	m->m_title = "upload site list";
 	m->m_desc  = "Upload your file of site patterns. Completely replaces "
@ -7640,12 +7643,13 @@ void Parms::init ( ) {
 	m->m_type  = TYPE_FILEUPLOADBUTTON;
 	m->m_flags = PF_NOSAVE | PF_DUP;
 	m++;
+	*/

 	m->m_title = "restart collection";
-	m->m_desc  = "Remove all documents from this collection and starts "
-		"spidering over again. If you do this accidentally there "
-		"is a <a href=/admin.html#recover>recovery procedure</a> to "
-		"get back the trashed data.";
+	m->m_desc  = "Remove all documents from this collection and restart "
+		"spidering.";// If you do this accidentally there "
+	//"is a <a href=/admin.html#recover>recovery procedure</a> to "
+	//	"get back the trashed data.";
 	m->m_cgi   = "restart";
 	m->m_page  = PAGE_BASIC_SETTINGS;
 	m->m_obj   = OBJ_COLL;
@ -7659,6 +7663,7 @@ void Parms::init ( ) {
 	m->m_title = "site list";
 	m->m_xml   = "siteList";
 	m->m_desc  = "List of sites to spider, one per line. "
+		"See <a href=#examples>example site list</a> below. "
 		"Gigablast uses the "
 		"<a href=/admin/filters#insitelist>insitelist</a> "
 		"directive on "
@ -7667,8 +7672,7 @@ void Parms::init ( ) {
 		"that match the site patterns you specify here, other than "
 		"urls you add individually via the add urls or inject url "
 		"tools. "
-		"See <a href=#examples>example site list</a> below. "
-		"Limit list to 300MB. If you have a lot of INDIVIDUAL URLS "
+		"Limit list to 300MB. If you have a lot of INDIVIDUAL urls "
 		"to add then consider using the <a href=/admin/addurl>addurl"
 		"</a> interface.";
 	m->m_cgi   = "sitelist";
@ -8762,11 +8766,11 @@ void Parms::init ( ) {
 	m++;

 	m->m_title = "max robots.txt cache age";
-	m->m_desc  = "How many second to cache a robots.txt file for. "
+	m->m_desc  = "How many seconds to cache a robots.txt file for. "
 		"86400 is 1 day. 0 means Gigablast will not read from the "
 		"cache at all and will download the robots.txt before every "
 		"page if robots.txt use is enabled above. However, if this is "
-		"0 then Gigablast will still store robots.txt files into the "
+		"0 then Gigablast will still store robots.txt files in the "
 		"cache.";
 	m->m_cgi   = "mrca";
 	m->m_off   = (char *)&cr.m_maxRobotsCacheAge - x;
@ -10639,8 +10643,9 @@ void Parms::init ( ) {
 	m++;

 	m->m_title = "do query expansion";
-	m->m_desc  = "Query expansion will include word stems and synonyms in "
-		"its search results.";
+	m->m_desc  = "If enabled, query expansion will expand your query "
+		"to include word stems and "
+		"synonyms of the query terms.";
 	m->m_def   = "1";
 	m->m_off   = (char *)&cr.m_queryExpansion - x;
 	m->m_soff  = (char *)&si.m_queryExpansion - y;
@ -10653,7 +10658,7 @@ void Parms::init ( ) {

 	// more general parameters
 	m->m_title = "max search results";
-	m->m_desc  = "What is the limit to the total number "
+	m->m_desc  = "What is the maximum total number "
 		"of returned search results.";
 	m->m_cgi   = "msr";
 	m->m_off   = (char *)&cr.m_maxSearchResults - x;
@ -12457,7 +12462,7 @@ void Parms::init ( ) {
 	m++;

 	m->m_title = "max summary line width";
-	m->m_desc  = "<br> tags are inserted to keep the number "
+	m->m_desc  = "&lt;br&gt; tags are inserted to keep the number "
 		"of chars in the summary per line at or below this width. "
 		"Strings without spaces that exceed this "
 		"width are not split.";
--- a/Sections.cpp
+++ b/Sections.cpp
@ -15163,7 +15163,7 @@ bool Sections::printVotingInfoInJSON ( SafeBuf *sb ) {
 		// breathe
 		QUICKPOLL ( m_niceness );
 		// print this section
-		printSectionDiv ( sk , FMT_JSON ); // forProCog );
+		printSectionDiv ( sk , FORMAT_JSON ); // forProCog );
 		// advance
 		long b = sk->m_b;
 		// stop if last
@ -15190,7 +15190,8 @@ bool Sections::print2 ( SafeBuf *sbuf ,
 			HashTableX *st2 ,
 			HashTableX *tt  ,
 			Addresses *aa ,
-			char format ) { // bool forProCog ){//FMT_PROCOG FMT_JSON HTML
+			char format ) { // bool forProCog ){
+	//FORMAT_PROCOG FORMAT_JSON HTML

 	//sbuf->safePrintf("<b>Sections in Document</b>\n");

@ -15244,7 +15245,7 @@ bool Sections::print2 ( SafeBuf *sbuf ,
 		sk = m_sectionPtrs[b];
 	}

-	if ( format != FMT_HTML ) return true; // forProCog
+	if ( format != FORMAT_HTML ) return true; // forProCog

 	// print header
 	char *hdr =
@ -15553,7 +15554,7 @@ bool Sections::printSectionDiv ( Section *sk , char format ) { // bool forProCog
 	//	m_sbuf->safePrintf("A=%li ",sk->m_a);


-	if ( format == FMT_PROCOG && sk->m_stats.m_numUniqueSites >= 2 ) {
+	if ( format == FORMAT_PROCOG && sk->m_stats.m_numUniqueSites >= 2 ) {
 		// do not count our own site!
 		m_sbuf->safePrintf("<i>"
 				   "<font size=-1>"
@ -15573,7 +15574,7 @@ bool Sections::printSectionDiv ( Section *sk , char format ) { // bool forProCog

 	m_sbuf->safePrintf("<i>");

-	if ( format == FMT_PROCOG && (sk->m_flags & SEC_SENTENCE) ) {
+	if ( format == FORMAT_PROCOG && (sk->m_flags & SEC_SENTENCE) ) {
 		sec_t f = sk->m_flags;
 		//if ( f & SEC_SENTENCE )
 		//	m_sbuf->safePrintf("sentence " );
@ -15598,7 +15599,7 @@ bool Sections::printSectionDiv ( Section *sk , char format ) { // bool forProCog
 	//	m_sbuf->safePrintf("notdupvotes=%li ",
 	//			   sk->m_votesForNotDup);
 	
-	if ( format != FMT_PROCOG ) {
+	if ( format != FORMAT_PROCOG ) {
 		// print the flags
 		m_sbuf->safePrintf("A=%li ",sk->m_a);
 		
--- a/Spider.cpp
+++ b/Spider.cpp
@ -12414,7 +12414,7 @@ bool getSpiderStatusMsg ( CollectionRec *cx , SafeBuf *msg , long *status ) {
 	if ( cx->m_isCustomCrawl )
 		return msg->safePrintf("Job is in progress.");
 	else
-		return true;
+		return msg->safePrintf("Spider is in progress.");
 }

 // pattern is a ||-separted list of substrings
--- a/coll.main.0/coll.conf
+++ b/coll.main.0/coll.conf
@ -1,416 +0,0 @@
-# List of sites to spider, one per line. Gigablast uses the <a
-# href=/admin/filters#insitelist>insitelist</a> directive on the <a
-# href=/admin/filters>url filters</a> page to make sure that the spider only
-# indexes urls that match the site patterns you specify here, other than urls
-# you add individually via the add urls or inject url tools. See <a
-# href=#examples>example site list</a> below. Limit list to 300MB. If you have
-# a lot of INDIVIDUAL URLS to add then consider using the <a
-# href=/admin/addurl>addurl</a> interface.
-<siteList><![CDATA[]]></>
-
-# All <, >, " and # characters that are values for a field contained herein
-# must be represented as &lt;, &gt;, &#34; and &#035; respectively.
-
-# Controls just the spiders for this collection.
-<spideringEnabled>1</>
-
-# What is the maximum number of web pages the spider is allowed to download
-# simultaneously PER HOST for THIS collection?
-<maxSpiders>100</>
-
-# make each spider wait this many milliseconds before getting the ip and
-# downloading the page.
-<spiderDelayInMilliseconds>0</>
-
-# If this is true Gigablast will respect the robots.txt convention.
-<useRobotstxt>1</>
-
-# How many second to cache a robots.txt file for. 86400 is 1 day. 0 means
-# Gigablast will not read from the cache at all and will download the
-# robots.txt before every page if robots.txt use is enabled above. However, if
-# this is 0 then Gigablast will still store robots.txt files into the cache.
-<maxRobotstxtCacheAge>86400</>
-
-# Do a tight merge on posdb and titledb at this time every day. This is
-# expressed in MINUTES past midnight UTC. UTC is 5 hours ahead of EST and 7
-# hours ahead of MST. Leave this as -1 to NOT perform a daily merge. To merge
-# at midnight EST use 60*5=300 and midnight MST use 60*7=420.
-<dailyMergeTime>-1</>
-
-# Comma separated list of days to merge on. Use 0 for Sunday, 1 for Monday,
-# ... 6 for Saturday. Leaving this parmaeter empty or without any numbers will
-# make the daily merge happen every day
-<dailyMergeDays><![CDATA[0]]></>
-
-# When the daily merge was last kicked off. Expressed in UTC in seconds since
-# the epoch.
-<dailyMergeLastStarted>-1</>
-
-# If this is true, users will have to pass a simple Turing test to add a url.
-# This prevents automated url submission.
-<turingTestEnabled>0</>
-
-# Maximum number of urls that can be submitted via the addurl interface, per
-# IP domain, per 24 hour period. A value less than or equal to zero implies no
-# limit.
-<maxAddUrls>0</>
-
-# When the spider round started
-<spiderRoundStartTime>0</>
-
-# The spider round number.
-<spiderRoundNum>0</>
-
-# When enabled, the spider will discard web pages which are identical to other
-# web pages that are already in the index. However, root urls, urls that have
-# no path, are never discarded. It most likely has to hit disk to do these
-# checks so it does cause some slow down. Only use it if you need it.
-<dedupingEnabled>0</>
-
-# When enabled, the spider will discard web pages which, when a www is
-# prepended to the page's url, result in a url already in the index.
-<dedupingEnabledForWww>1</>
-
-# Detect and do not index pages which have a 200 status code, but are likely
-# to be error pages.
-<detectCustomErrorPages>1</>
-
-# Should pages be removed from the index if they are no longer accessible on
-# the web?
-<delete404s>1</>
-
-# If this is true, the spider, when a url redirects to a "simpler" url, will
-# add that simpler url into the spider queue and abandon the spidering of the
-# current url.
-<useSimplifiedRedirects>1</>
-
-# If this is true, the spider, when updating a web page that is already in the
-# index, will not even download the whole page if it hasn't been updated since
-# the last time Gigablast spidered it. This is primarily a bandwidth saving
-# feature. It relies on the remote webserver's returned Last-Modified-Since
-# field being accurate.
-<useIfModifiedSince>0</>
-
-# If this is true, do not allow spammy inlinks to vote. This check is too
-# aggressive for some collections, i.e.  it does not allow pages with cgi in
-# their urls to vote.
-<doLinkSpamChecking>1</>
-
-# If this is true Gigablast will only allow one vote per the top 2 significant
-# bytes of the IP address. Otherwise, multiple pages from the same top IP can
-# contribute to the link text and link-based quality ratings of a particular
-# URL. Furthermore, no votes will be accepted from IPs that have the same top
-# 2 significant bytes as the IP of the page being indexed.
-<restrictLinkVotingByIp>1</>
-
-# How often should Gigablast recompute the link info for a url. Also applies
-# to getting the quality of a site or root url, which is based on the link
-# info. In days. Can use decimals. 0 means to update the link info every time
-# the url's content is re-indexed. If the content is not reindexed because it
-# is unchanged then the link info will not be updated. When getting the link
-# info or quality of the root url from an external cluster, Gigablast will
-# tell the external cluster to recompute it if its age is this or higher.
-<updateLinkInfoFrequency>60.000000</>
-
-# If this is eabled the spider will not allow any docs which are determined to
-# be serps.
-<doSerpDetection>1</>
-
-# If this is false then the filter will not be used on html or text pages.
-<applyFilterToTextPages>0</>
-
-# Program to spawn to filter all HTTP replies the spider receives. Leave blank
-# for none.
-<filterName><![CDATA[]]></>
-
-# Kill filter shell after this many seconds. Assume it stalled permanently.
-<filterTimeout>40</>
-
-# Retrieve pages from the proxy at this IP address.
-<proxyIp>0.0.0.0</>
-
-# Retrieve pages from the proxy on this port.
-<proxyPort>0</>
-
-# Index the body of the documents so you can search it. Required for searching
-# that. You wil pretty much always want to keep this enabled.
-<indexBody>1</>
-
-# Send every spidered url to this diffbot.com by appending a &url=<url> to it
-# before trinyg to downloading it. We expect get get back a JSON reply which
-# we index. You will need to supply your token to this as well.
-<diffbotApiUrl><![CDATA[]]></>
-
-# Get scoring information for each result so you can see how each result is
-# scored? You must explicitly request this using &scores=1 for the XML feed
-# because it is not included by default.
-<getDocidScoringInfo>1</>
-
-# Query expansion will include word stems and synonyms in its search results.
-<doQueryExpansion>1</>
-
-# What is the limit to the total number of returned search results.
-<maxSearchResults>1000</>
-
-# What is the limit to the total number of returned search results per query?
-<maxSearchResultsPerQuery>100</>
-
-# What is the maximum number of characters allowed in titles displayed in the
-# search results?
-<maxTitleLen>80</>
-
-# Should search results be site clustered by default?
-<siteClusterByDefault>1</>
-
-# Hide all clustered results instead of displaying two results from each site.
-<hideAllClusteredResults>0</>
-
-# Should duplicate search results be removed by default?
-<dedupResultsByDefault>1</>
-
-# Should we dedup URLs with case insensitivity? This is mainly to correct
-# duplicate wiki pages.
-<dedupURLs>0</>
-
-# If document summary is this percent similar to a document summary above it,
-# then remove it from the search results. 100 means only to remove if exactly
-# the same. 0 means no summary deduping.
-<percentSimilarDedupSummary>90</>
-
-# Sets the number of lines to generate for summary deduping. This is to help
-# the deduping process not thorw out valid summaries when normally displayed
-# summaries are smaller values. Requires percent similar dedup summary to be
-# enabled.
-<numberOfLinesToUseInSummaryToDedup>4</>
-
-# Default language to use for ranking results. Value should be any language
-# abbreviation, for example "en" for English.
-<sortLanguagePreference><![CDATA[en]]></>
-
-# Default country to use for ranking results. Value should be any country code
-# abbreviation, for example "us" for United States.
-<sortCountryPreference><![CDATA[us]]></>
-
-# What is the maximum number of characters displayed in a summary for a search
-# result?
-<maxSummaryLen>512</>
-
-# What is the maximum number of excerpts displayed in the summary of a search
-# result?
-<maxSummaryExcerpts>4</>
-
-# What is the maximum number of characters allowed per summary excerpt?
-<maxSummaryExcerptLength>300</>
-
-# What is the default number of summary excerpts displayed per search result?
-<defaultNumberOfSummaryExcerpts>3</>
-
-# <br> tags are inserted to keep the number of chars in the summary per line
-# at or below this width. Strings without spaces that exceed this width are
-# not split.
-<maxSummaryLineWidth>80</>
-
-# Truncating this will miss out on good summaries, but performance will
-# increase.
-<bytesOfDocToScanForSummaryGeneration>70000</>
-
-# Front html tag used for highlightig query terms in the summaries displated
-# in the search results.
-<frontHighlightTag><![CDATA[&lt;b style=&#34;color:black;background-color:&#035;ffff66&#34;&gt;]]></>
-
-# Front html tag used for highlightig query terms in the summaries displated
-# in the search results.
-<backHighlightTag><![CDATA[&lt;/b&gt;]]></>
-
-# How many search results should we scan for related topics (gigabits) per
-# query?
-<docsToScanForTopics>300</>
-
-# Should Gigablast only get one document per IP domain and per domain for
-# topic (gigabit) generation?
-<ipRestrictionForTopics>0</>
-
-# Should Gigablast remove overlapping topics (gigabits)?
-<removeOverlappingTopics>1</>
-
-# What is the number of related topics (gigabits) displayed per query? Set to
-# 0 to save CPU time.
-<numberOfRelatedTopics>11</>
-
-# Related topics (gigabits) with scores below this will be excluded. Scores
-# range from 0% to over 100%.
-<minTopicsScore>5</>
-
-# How many documents must contain the topic (gigabit) for it to be displayed.
-<minTopicDocCount>2</>
-
-# If a document is this percent similar to another document with a higher
-# score, then it will not contribute to the topic (gigabit) generation.
-<dedupDocPercentForTopics>80</>
-
-# Maximum number of words a topic (gigabit) can have. Affects raw feeds, too.
-<maxWordsPerTopic>6</>
-
-# Max chars to sample from each doc for topics (gigabits).
-<topicMaxSampleSize>4096</>
-
-# If enabled, results in dmoz will display their categories on the results
-# page.
-<displayDmozCategoriesInResults>1</>
-
-# If enabled, results in dmoz will display their indirect categories on the
-# results page.
-<displayIndirectDmozCategoriesInResults>0</>
-
-# If enabled, a link will appear next to each category on each result allowing
-# the user to perform their query on that entire category.
-<displaySearchCategoryLinkToQueryCategoryOfResult>0</>
-
-# Yes to use DMOZ given title when a page is untitled but is in DMOZ.
-<useDmozForUntitled>1</>
-
-# Yes to always show DMOZ summaries with search results that are in DMOZ.
-<showDmozSummaries>1</>
-
-# Yes to display the Adult category in the Top category
-<showAdultCategoryOnTop>0</>
-
-# Before downloading the contents of a URL, Gigablast first chains down this
-# list of expressions</a>, starting with expression #0.  The first expression
-# it matches is the ONE AND ONLY matching row for that url. It then uses the
-# respider frequency, spider priority, etc. on the MATCHING ROW when spidering
-# that URL. If you specify the <i>expression</i> as <i><b>default</b></i> then
-# that MATCHES ALL URLs. URLs with high spider priorities take spidering
-# precedence over URLs with lower spider priorities. The respider frequency
-# dictates how often a URL will be respidered. See the help table below for
-# examples of all the supported expressions. Use the <i>&&</i> operator to
-# string multiple expressions together in the same expression text box. A
-# <i>spider priority</i> of <i>DELETE</i> will cause the URL to not be
-# spidered, or if it has already been indexed, it will be deleted when it is
-# respidered.<br><br>
-<filterExpression><![CDATA[isdocidbased]]></>
-<filterExpression><![CDATA[ismedia]]></>
-<filterExpression><![CDATA[errorcount&gt;=3 &amp;&amp; hastmperror]]></>
-<filterExpression><![CDATA[errorcount&gt;=1 &amp;&amp; hastmperror]]></>
-<filterExpression><![CDATA[isaddurl]]></>
-<filterExpression><![CDATA[hopcount==0 &amp;&amp; iswww &amp;&amp; isnew]]></>
-<filterExpression><![CDATA[hopcount==0 &amp;&amp; iswww]]></>
-<filterExpression><![CDATA[hopcount==0 &amp;&amp; isnew]]></>
-<filterExpression><![CDATA[hopcount==0]]></>
-<filterExpression><![CDATA[hopcount==1 &amp;&amp; isnew]]></>
-<filterExpression><![CDATA[hopcount==1]]></>
-<filterExpression><![CDATA[hopcount==2 &amp;&amp; isnew]]></>
-<filterExpression><![CDATA[hopcount==2]]></>
-<filterExpression><![CDATA[hopcount&gt;=3 &amp;&amp; isnew]]></>
-<filterExpression><![CDATA[hopcount&gt;=3]]></>
-<filterExpression><![CDATA[isnew]]></>
-<filterExpression><![CDATA[default]]></>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<harvestLinks>1</>
-<filterFrequency>0.000000</>
-<filterFrequency>0.000000</>
-<filterFrequency>1.000000</>
-<filterFrequency>1.000000</>
-<filterFrequency>1.000000</>
-<filterFrequency>7.000000</>
-<filterFrequency>7.000000</>
-<filterFrequency>7.000000</>
-<filterFrequency>10.000000</>
-<filterFrequency>20.000000</>
-<filterFrequency>20.000000</>
-<filterFrequency>40.000000</>
-<filterFrequency>40.000000</>
-<filterFrequency>60.000000</>
-<filterFrequency>60.000000</>
-<filterFrequency>30.000000</>
-<filterFrequency>30.000000</>
-
-# Do not allow more than this many outstanding spiders for all urls in this
-# priority.
-<maxSpidersPerRule>99</>
-<maxSpidersPerRule>99</>
-<maxSpidersPerRule>1</>
-<maxSpidersPerRule>1</>
-<maxSpidersPerRule>99</>
-<maxSpidersPerRule>4</>
-<maxSpidersPerRule>2</>
-<maxSpidersPerRule>1</>
-<maxSpidersPerRule>2</>
-<maxSpidersPerRule>99</>
-<maxSpidersPerRule>1</>
-<maxSpidersPerRule>99</>
-<maxSpidersPerRule>1</>
-<maxSpidersPerRule>99</>
-<maxSpidersPerRule>1</>
-<maxSpidersPerRule>99</>
-<maxSpidersPerRule>99</>
-
-# Allow this many spiders per IP.
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-<maxSpidersPerIp>1</>
-
-# Wait at least this long before downloading urls from the same IP address.
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<spiderIpWait>1000</>
-<filterPriority>80</>
-<filterPriority>-3</>
-<filterPriority>3</>
-<filterPriority>45</>
-<filterPriority>85</>
-<filterPriority>50</>
-<filterPriority>48</>
-<filterPriority>49</>
-<filterPriority>47</>
-<filterPriority>40</>
-<filterPriority>39</>
-<filterPriority>30</>
-<filterPriority>29</>
-<filterPriority>20</>
-<filterPriority>19</>
-<filterPriority>1</>
-<filterPriority>0</>
--- a/html/help.html
+++ b/html/help.html
@ -127,11 +127,14 @@ a{cursor:hand;cursor:pointer;text-decoration:none;color:blue;}
            <td style="padding-bottom:12px">&nbsp;</td>
            <td style="padding-bottom:12px">&nbsp;</td>
          </tr>
-<!--
-          <tr bgcolor="#006699"> 
-            <th><a name="boolean" id="boolean"></a><font color="#FFFFFF">Boolean Search</font></th>
-            <th><font color="#FFFFFF">Description</font></th>
+
+<tr bgcolor="#0340fd">
+
+            <th><font color=33dcff>Boolean Search</font></th>
+            <th><font color=33dcff>Description</font></th>
+
          </tr>
+
          <tr> 
            <td colspan="2" bgcolor="#FFFFCC"><center>
                Note: boolean operators must be in UPPER CASE. 
@ -214,16 +217,17 @@ a{cursor:hand;cursor:pointer;text-decoration:none;color:blue;}
              expressions and can be optionally enclosed in parentheses. A NOT 
              operator can optionally preceed the left or the right operand.</td>
          </tr>
-->
+
        </table>



 </td></tr>
 </table>
+<br>

 <center>
-Copyright &copy; 2013. All rights reserved.
+Copyright &copy; 2014. All rights reserved.
 </center>
 </body>
 </html>