update crawlbot api.

2024-10-04 04:07:13 +03:00 · 2013-09-18 17:13:36 -07:00 · 2013-09-18 17:13:36 -07:00 · a3ea867305
commit a3ea867305
parent 022caeec04
2 changed files with 10 additions and 9 deletions
--- a/Diffbot.cpp
+++ b/Diffbot.cpp
@ -1740,12 +1740,12 @@ bool printCrawlBotPage ( TcpSocket *s ,
 	for ( long i = 0 ; i < (long)MAX_SPIDERS ; i++ ) {
 		// get it
 		XmlDoc *xd = docs[i];
-		// skip if not our coll rec!
-		if ( xd->m_cr != cr ) continue;
 		// skip if empty
 		if ( ! xd ) continue;
 		// sanity check
 		if ( ! xd->m_oldsrValid ) { char *xx=NULL;*xx=0; }
+		// skip if not our coll rec!
+		if ( xd->m_cr != cr ) continue;
 		// grab it
 		SpiderRequest *oldsr = &xd->m_oldsr;
 		// get status
--- a/Parms.cpp
+++ b/Parms.cpp
@ -1083,7 +1083,8 @@ bool printDropDown ( long n , SafeBuf* sb, char *name, long select,
 		if ( i == select ) s = " selected";
 		else               s = "";
 		if      ( i == -3 ) 
-			sb->safePrintf ("<option value=%li%s>FILTERED",i,s);
+			//sb->safePrintf ("<option value=%li%s>FILTERED",i,s);
+			sb->safePrintf ("<option value=%li%s>IGNORED",i,s);
 		else if ( i == -2 ) 
 			sb->safePrintf ("<option value=%li%s>BANNED",i,s);
 		else if ( i == -1 ) 
@ -12391,17 +12392,17 @@ void Parms::init ( ) {
 		"starting with expression #0.  "
 		//"This table is also consulted "
 		//"for every outlink added to spiderdb. "
-		"When it finds an "
-		"expression that "
-		"matches that URL, it assigns the corresponding "
+		"The first expression it matches is the ONE AND ONLY "
+		"matching row for that url. "
+		"It then uses the "
 		//"<a href=/overview.html#spiderfreq>"
 		"respider frequency, "
 		//"<a href=/overview.html#spiderpriority>"
-		"spider priority, etc. to "
+		"spider priority, etc. on the MATCHING ROW when spidering "
 		//"and <a href=/overview.html#ruleset>ruleset</a> to "
 		"that URL. "
-		"If no expression is matched, then the "
-		"<i><b>default</b></i> line is used. "
+		"The "
+		"<i><b>default</b></i> line MATCHES ALL URLs. "
 		"URLs with high spider priorities take spidering "
 		"precedence over "
 		"URLs with lower spider priorities. "