diff --git a/Collectiondb.cpp b/Collectiondb.cpp
index b15e1ffe..7aa6859a 100644
--- a/Collectiondb.cpp
+++ b/Collectiondb.cpp
@@ -636,6 +636,23 @@ bool Collectiondb::deleteRec ( char *coll , WaitEntry *we ) {
 }
 */
 
+// if there is an outstanding disk read thread or merge thread then
+// Spider.cpp will handle the delete in the callback.
+void Collectiondb::deleteSpiderColl ( SpiderColl *sc ) {
+
+	sc->m_deleteMyself = true;
+
+	// if not currently being accessed nuke it now
+	if ( ! sc->m_msg5.m_waitingForList &&
+	     ! sc->m_msg5.m_waitingForMerge &&
+	     ! sc->m_msg5b.m_waitingForList &&
+	     ! sc->m_msg5b.m_waitingForMerge ) {
+		mdelete ( sc, sizeof(SpiderColl),"nukecr2");
+		delete ( sc );
+		return;
+	}
+}
+
 bool Collectiondb::deleteRec2 ( collnum_t collnum ) { //, WaitEntry *we ) {
 	// do not allow this if in repair mode
 	if ( g_repairMode > 0 ) {
@@ -723,10 +740,14 @@ bool Collectiondb::deleteRec2 ( collnum_t collnum ) { //, WaitEntry *we ) {
 	SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull(collnum);
 	if ( sc ) {
 		// remove locks from lock table:
-		sc->clear();
+		sc->clearLocks();
 		//sc->m_collnum = newCollnum;
-		sc->reset();
-		mdelete ( sc, sizeof(SpiderColl),"nukecr2");
+		//sc->reset();
+		// this will put it on "death row" so it will be deleted
+		// once Msg5::m_waitingForList/Merge is NULL
+		deleteSpiderColl ( sc );
+		//mdelete ( sc, sizeof(SpiderColl),"nukecr2");
+		//delete ( sc );
 		cr->m_spiderColl = NULL;
 	}
 
@@ -925,8 +946,19 @@ bool Collectiondb::resetColl2( collnum_t oldCollnum,
 	// reset spider info
 	SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull(oldCollnum);
 	if ( sc ) {
-		sc->clear();
-		sc->m_collnum = newCollnum;
+		// remove locks from lock table:
+		sc->clearLocks();
+		// don't do this anymore, just nuke it in case
+		// m_populatingDoledb was true etc. there are too many
+		// flags to worry about
+		//sc->m_collnum = newCollnum;
+		//sc->reset();
+		// this will put it on "death row" so it will be deleted
+		// once Msg5::m_waitingForList/Merge is NULL
+		deleteSpiderColl ( sc );
+		//mdelete ( sc, sizeof(SpiderColl),"nukecr2");
+		//delete ( sc );
+		cr->m_spiderColl = NULL;
 	}
 
 	// reset spider round
@@ -1903,6 +1935,9 @@ bool CollectionRec::rebuildUrlFilters ( ) {
 		// just turn off spidering. if we were to set priority to
 		// filtered it would be removed from index!
 		m_spidersEnabled     [i] = 0;
+		// temp hack so it processes in xmldoc.cpp::getUrlFilterNum()
+		// which has been obsoleted, but we are running old code now!
+		m_spiderDiffbotApiUrl[i].set ( api );
 		i++;
 	}
 	// if collectiverespiderfreq is 0 or less then do not RE-spider
@@ -1916,6 +1951,9 @@ bool CollectionRec::rebuildUrlFilters ( ) {
 		// just turn off spidering. if we were to set priority to
 		// filtered it would be removed from index!
 		m_spidersEnabled     [i] = 0;
+		// temp hack so it processes in xmldoc.cpp::getUrlFilterNum()
+		// which has been obsoleted, but we are running old code now!
+		m_spiderDiffbotApiUrl[i].set ( api );
 		i++;
 	}
 
diff --git a/Collectiondb.h b/Collectiondb.h
index 9ae29e2e..043959bf 100644
--- a/Collectiondb.h
+++ b/Collectiondb.h
@@ -126,6 +126,8 @@ class Collectiondb  {
 	//bool updateRec ( CollectionRec *newrec );
 	bool deleteRecs ( class HttpRequest *r ) ;
 
+	void deleteSpiderColl ( class SpiderColl *sc );
+
 	// returns false if blocked, true otherwise. 
 	//bool resetColl ( char *coll , WaitEntry *we , bool purgeSeeds );
 	bool resetColl2 ( collnum_t oldCollnum, 
diff --git a/DiskPageCache.cpp b/DiskPageCache.cpp
index 5f1be93a..1633d316 100644
--- a/DiskPageCache.cpp
+++ b/DiskPageCache.cpp
@@ -349,7 +349,7 @@ void DiskPageCache::getPages   ( long       vfd         ,
 		// dumping more than what was end the tree because stuff was
 		// added to the tree while dumping!
 		log("db: pagecache: Caught get breach. "
-		    "ep=%li max=%li", ep,m_maxPagesInFile[vfd] );
+		    "ep=%li max=%li vfd=%li", ep,m_maxPagesInFile[vfd] ,vfd);
 		return;
 		//char *xx = NULL; *xx = 0; 
 	}
diff --git a/DiskPageCache.h b/DiskPageCache.h
index ac637e09..0801f318 100644
--- a/DiskPageCache.h
+++ b/DiskPageCache.h
@@ -40,7 +40,8 @@
 #define MAX_PAGE_SETS 128
 
 // how many BigFiles can be using the same DiskPageCache?
-#define MAX_NUM_VFDS2 1024
+#include "File.h"
+#define MAX_NUM_VFDS2 MAX_NUM_VFDS
 
 extern void freeAllSharedMem ( long max );
 
diff --git a/File.h b/File.h
index c44c8f07..6138f3ab 100644
--- a/File.h
+++ b/File.h
@@ -21,7 +21,8 @@
 
 // . max # of VIRTUAL file descriptors
 // . man, chris has 958 files, lets crank it up from 2k to 5k
-#define MAX_NUM_VFDS (5*1024)
+// . boost up to 50,000 since we are hitting this limit with crawlbot
+#define MAX_NUM_VFDS (50*1024)
 
 #include <sys/types.h>       // for open/lseek
 #include <sys/stat.h>        // for open
diff --git a/Json.cpp b/Json.cpp
index d4bda183..09355e35 100644
--- a/Json.cpp
+++ b/Json.cpp
@@ -95,6 +95,14 @@ JsonItem *Json::parseJsonStringIntoJsonItems ( char *json , long niceness ) {
 	need += p - json;
 	// plus a \0 for the value and a \0 for the name of each jsonitem
 	need += 2;
+	// prevent cores for now
+	need += 10;
+	// . to prevent safebuf from reallocating do this
+	// . safeMemcpy() calls reserve(m_length+len) and reserves
+	//   tries to alloc m_length + (m_length+len) so since,
+	//   m_length+len should never be more than "need" we need to
+	//   double up here
+	need *= 2;
 	// this should be enough
 	if ( ! m_sb.reserve ( need ) ) return NULL;
 	// for testing if we realloc
diff --git a/Linkdb.cpp b/Linkdb.cpp
index ac18417c..45a9c00a 100644
--- a/Linkdb.cpp
+++ b/Linkdb.cpp
@@ -109,6 +109,8 @@ bool Linkdb::init ( ) {
 	long maxTreeNodes = maxTreeMem /(sizeof(key224_t)+16);
 	// disk page cache mem, 100MB on gk0 now
 	long pcmem = 0; // g_conf.m_linkdbMaxDiskPageCacheMem;
+	// give it a little
+	pcmem = 10000000; // 10MB
 	// keep this low if we are the tmp cluster
 	//if ( g_hostdb.m_useTmpCluster ) pcmem = 0;
 	// TODO: would be nice to just do page caching on the satellite files;
diff --git a/Loop.cpp b/Loop.cpp
index 6a5e2456..6d271e5d 100644
--- a/Loop.cpp
+++ b/Loop.cpp
@@ -1791,6 +1791,11 @@ void Loop::quickPoll(long niceness, const char* caller, long lineno) {
 	if(m_inQuickPoll) {
 		log(LOG_WARN, 
 		    "admin: tried to quickpoll from inside quickpoll");
+		// this happens when handleRequest3f is called from
+		// a quickpoll and it deletes a collection and BigFile::close
+		// calls ThreadQueue::removeThreads and Msg3::doneScanning()
+		// has niceness 2 and calls quickpoll again!
+		return;
 		//if(g_conf.m_quickpollCoreOnError) { 
 		char*xx=NULL;*xx=0;
 		//		}
diff --git a/Msg3.cpp b/Msg3.cpp
index 266c3d3c..8fb499aa 100644
--- a/Msg3.cpp
+++ b/Msg3.cpp
@@ -930,9 +930,12 @@ bool Msg3::doneScanning ( ) {
 					      ff->getFilename() ,
 					      m_niceness ) ) {
 			log("net: Had error while constraining list read from "
-			    "%s: %s. This is likely caused by corrupted "
+			    "%s: %s%s. vfd=%li parts=%li. "
+			    "This is likely caused by corrupted "
 			    "data on disk.", 
-			    mstrerror(g_errno), ff->getFilename());
+			    mstrerror(g_errno), ff->m_dir ,
+			    ff->getFilename(), ff->m_vfd , 
+			    (long)ff->m_numParts );
 		}
 	}
 
diff --git a/Msg5.cpp b/Msg5.cpp
index 831e573d..00c61b67 100644
--- a/Msg5.cpp
+++ b/Msg5.cpp
@@ -22,6 +22,7 @@ long g_numCorrupt = 0;
 
 Msg5::Msg5() {
 	m_waitingForList = false;
+	m_waitingForMerge = false;
 	m_numListPtrs = 0;
 	m_mergeLists = true;
 	reset();
@@ -33,7 +34,7 @@ Msg5::~Msg5() {
 
 // frees m_treeList
 void Msg5::reset() {
-	if ( m_waitingForList ) {
+	if ( m_waitingForList || m_waitingForMerge ) {
 		log("disk: Trying to reset a class waiting for a reply.");
 		// might being doing an urgent exit (mainShutdown(1)) or
 		// g_process.shutdown(), so do not core here
@@ -1365,6 +1366,8 @@ bool Msg5::gotList2 ( ) {
 	// skip it for now
 	//goto skipThread;
 
+	m_waitingForMerge = true;
+
 	// . if size is big, make a thread
 	// . let's always make niceness 0 since it wasn't being very
 	//   aggressive before
@@ -1374,6 +1377,9 @@ bool Msg5::gotList2 ( ) {
 			      threadDoneWrapper   ,
 			      mergeListsWrapper_r ) ) 
 		return false;
+
+	m_waitingForMerge = false;
+
 	// thread creation failed
 	if ( ! g_threads.areThreadsDisabled() )
 		log(LOG_INFO,
@@ -1704,6 +1710,8 @@ void Msg5::mergeLists_r ( ) {
 // . we are left with an empty list
 bool Msg5::doneMerging ( ) {
 
+	m_waitingForMerge = false;
+
 	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
 	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_coll))) return true;
 
@@ -1722,8 +1730,8 @@ bool Msg5::doneMerging ( ) {
 	//   our first merge
 	if ( m_hadCorruption ) {
 		// log it here, cuz logging in thread doesn't work too well
-		log("net: Encountered a corrupt list in rdb=%s",
-		    base->m_dbname);
+		log("net: Encountered a corrupt list in rdb=%s coll=%s",
+		    base->m_dbname,m_coll);
 		// remove error condition, we removed the bad data in thread
 		
 		m_hadCorruption = false;
diff --git a/Msg5.h b/Msg5.h
index 3427b553..34dfb699 100644
--- a/Msg5.h
+++ b/Msg5.h
@@ -292,6 +292,7 @@ class Msg5 {
 	bool  m_mergeLists;
 
 	char m_waitingForList;
+	char m_waitingForMerge;
 	
 	// actually part of a different algo than m_waitingForList!
 	unsigned long long m_waitingKey;
diff --git a/PageAddUrl.cpp b/PageAddUrl.cpp
index b94d661b..34886256 100644
--- a/PageAddUrl.cpp
+++ b/PageAddUrl.cpp
@@ -254,6 +254,10 @@ bool sendPageAddUrl ( TcpSocket *s , HttpRequest *r ) {
 	long long probDocId = g_titledb.getProbableDocId ( st1->m_url );
 	// make one up, like we do in PageReindex.cpp
 	long firstIp = (probDocId & 0xffffffff);
+
+	// avoid ips of 0 or -1
+	if ( firstIp == 0 || firstIp == -1 ) firstIp = 1;
+
 	// . now fill it up
 	// . TODO: calculate the other values... lazy!!! (m_isRSSExt, 
 	//         m_siteNumInlinks,...)
diff --git a/PageCrawlBot.cpp b/PageCrawlBot.cpp
index 09e47b90..b2dceeab 100644
--- a/PageCrawlBot.cpp
+++ b/PageCrawlBot.cpp
@@ -3841,6 +3841,9 @@ bool getSpiderRequestMetaList ( char *doc ,
 		SpiderRequest sreq;
 		sreq.reset();
 		sreq.m_firstIp = url.getHostHash32(); // fakeip!
+		// avoid ips of 0 or -1
+		if ( sreq.m_firstIp == 0 || sreq.m_firstIp == -1 )
+			sreq.m_firstIp = 1;
 		sreq.m_hostHash32 = url.getHostHash32();
 		sreq.m_domHash32  = url.getDomainHash32();
 		sreq.m_siteHash32 = url.getHostHash32();
diff --git a/PageStats.cpp b/PageStats.cpp
index a3deada1..feb1ccef 100644
--- a/PageStats.cpp
+++ b/PageStats.cpp
@@ -488,7 +488,7 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
 		  "<tr><td><b>Kernel Version</b></td><td>%s</td></tr>\n"
 		  //"<tr><td><b>Gigablast Version</b></td><td>%s %s</td></tr>\n"
 		  "<tr><td><b>Parsing Inconsistencies</b></td><td>%li</td>\n"
-		  "<tr><td><b>Indexdb Splits</b></td><td>%li</td>\n"
+		  "<tr><td><b>Indexdb Shards</b></td><td>%li</td>\n"
 		  //"<tr><td><b>Fully Split</b></td><td>%li</td>\n"
 		  //"<tr><td><b>Tfndb Extension Bits</b></td><td>%li</td>\n"
 		  "</tr>\n"
diff --git a/Parms.cpp b/Parms.cpp
index a0d91670..a5e22477 100644
--- a/Parms.cpp
+++ b/Parms.cpp
@@ -17531,7 +17531,12 @@ bool Parms::doParmSendingLoop ( ) {
 						 NULL, // retslot
 						 (void *)h->m_hostId , // state
 						 gotParmReplyWrapper ,
-						 4 ) ) { // timeout secs
+						 30 , // timeout secs
+						 -1 , // backoff
+						 -1 , // maxwait
+						 NULL , // replybuf
+						 0 , // replybufmaxsize
+						 0 ) ) { // niceness
 			log("parms: faild to send: %s",mstrerror(g_errno));
 			continue;
 		}
diff --git a/Posdb.cpp b/Posdb.cpp
index 510b88e0..135743cc 100644
--- a/Posdb.cpp
+++ b/Posdb.cpp
@@ -122,12 +122,12 @@ bool Posdb::init ( ) {
 	long nodeSize      = (sizeof(key144_t)+12+4) + sizeof(collnum_t);
 	long maxTreeNodes = maxTreeMem  / nodeSize ;
 
-	//long pageSize = GB_INDEXDB_PAGE_SIZE;
+	long pageSize = GB_INDEXDB_PAGE_SIZE;
 	// we now use a disk page cache as opposed to the
 	// old rec cache. i am trying to do away with the Rdb::m_cache rec
 	// cache in favor of cleverly used disk page caches, because
 	// the rec caches are not real-time and get stale. 
-	long pcmem    = 50000000; // 50MB
+	long pcmem    = 30000000; // 30MB
 	// make sure at least 30MB
 	//if ( pcmem < 30000000 ) pcmem = 30000000;
 	// keep this low if we are the tmp cluster, 30MB
@@ -136,12 +136,12 @@ bool Posdb::init ( ) {
 	// prevent swapping
 	if ( g_hostdb.m_useTmpCluster ) pcmem = 0;
 	// save more mem!!! allow os to cache it i guess...
-	pcmem = 0;
+	// let's go back to using it
+	//pcmem = 0;
 	// disable for now... for rebuild
 	//pcmem = 0;
 	// . init the page cache
 	// . MDW: "minimize disk seeks" not working otherwise i'd enable it!
-	/*
 	if ( ! m_pc.init ( "posdb",
 			   RDB_POSDB,
 			   pcmem    ,
@@ -149,7 +149,6 @@ bool Posdb::init ( ) {
 			   true     ,  // use RAM disk?
 			   false    )) // minimize disk seeks?
 		return log("db: Posdb init failed.");
-	*/
 
 	// . set our own internal rdb
 	// . max disk space for bin tree is same as maxTreeMem so that we
@@ -174,7 +173,7 @@ bool Posdb::init ( ) {
 			   // newer systems have tons of ram to use
 			   // for their disk page cache. it is slower than
 			   // ours but the new engine has much slower things
-			   NULL,//&m_pc                       ,
+			   &m_pc                       ,
 			   false , // istitledb?
 			   false , // preloaddiskpagecache?
 			   sizeof(key144_t)
diff --git a/Process.cpp b/Process.cpp
index 19c6ae55..22259f30 100644
--- a/Process.cpp
+++ b/Process.cpp
@@ -104,7 +104,7 @@ char *g_files[] = {
 	"antiword" ,  // msword
 	"pdftohtml",  // pdf
 	"pstotext" ,  // postscript
-	"ppthtml"  ,  // powerpoint
+	//"ppthtml"  ,  // powerpoint
 	
 	//"dict/unifiedDict",
 	//"dict/thesaurus.txt",
diff --git a/RdbBase.cpp b/RdbBase.cpp
index d88c7dc6..5f34202a 100644
--- a/RdbBase.cpp
+++ b/RdbBase.cpp
@@ -360,6 +360,7 @@ bool RdbBase::init ( char  *dir            ,
 	// now fill up the page cache
 	// preload:
 	if ( ! preloadDiskPageCache ) return true;
+	if ( ! m_pc ) return true;
 	char buf [ 512000 ];
 	long total = m_pc->getMemMax();
 	log(LOG_DEBUG,"db: %s: Preloading page cache. Total mem to use =%lu",
diff --git a/RdbDump.cpp b/RdbDump.cpp
index 686892e1..b76222a4 100644
--- a/RdbDump.cpp
+++ b/RdbDump.cpp
@@ -204,11 +204,14 @@ void RdbDump::doneDumping ( ) {
 	     m_totalPosDumped , m_totalNegDumped ,
 	     m_totalPosDumped + m_totalNegDumped );
 
-	// map verify
-	log("db: map # pos=%lli neg=%lli",
-	    m_map->getNumPositiveRecs(),
-	    m_map->getNumNegativeRecs()
-	    );
+	// . map verify
+	// . if continueDumping called us with no collectionrec, it got
+	//   deleted so RdbBase::m_map is nuked too i guess
+	if ( saved != ENOCOLLREC )
+		log("db: map # pos=%lli neg=%lli",
+		    m_map->getNumPositiveRecs(),
+		    m_map->getNumNegativeRecs()
+		    );
 
 	// free the list's memory
 	if ( m_list ) m_list->freeList();
@@ -1015,11 +1018,16 @@ void RdbDump::continueDumping() {
 
 	// if someone reset/deleted the collection we were dumping...
 	CollectionRec *cr = g_collectiondb.getRec ( m_collnum );
-	if ( ! cr ) g_errno = ENOCOLLREC;
-
+	if ( ! cr ) {
+		g_errno = ENOCOLLREC;
+		// m_file is invalid if collrec got nuked because so did
+		// the Rdbbase which has the files
+		log("db: continue dumping lost collection");
+	}
 	// bitch about errors
-	if (g_errno)log("db: Dump to %s had error writing: %s.",
-			m_file->getFilename(),mstrerror(g_errno));
+	else if (g_errno)log("db: Dump to %s had error writing: %s.",
+			     m_file->getFilename(),mstrerror(g_errno));
+
 	// go back now if we were NOT dumping a tree
 	if ( ! (m_tree || m_buckets) ) {
 		m_isDumping = false;
diff --git a/RdbMap.cpp b/RdbMap.cpp
index e4cb34e5..c4e15053 100644
--- a/RdbMap.cpp
+++ b/RdbMap.cpp
@@ -6,6 +6,8 @@
 
 RdbMap::RdbMap() {
 	m_numSegments = 0;
+	m_numSegmentPtrs = 0;
+	m_numSegmentOffs = 0;
 	reset ( );
 }
 
@@ -61,6 +63,14 @@ void RdbMap::reset ( ) {
 		m_keys   [i] = NULL;
 		m_offsets[i] = NULL;
 	}
+
+	// the ptrs themselves are now a dynamic array to save mem
+	// when we have thousands of collections
+	mfree(m_keys,m_numSegmentPtrs*sizeof(char *),"MapPtrs");
+	mfree(m_offsets,m_numSegmentOffs*sizeof(short *),"MapPtrs");
+	m_numSegmentPtrs = 0;
+	m_numSegmentOffs = 0;
+
 	m_needToWrite     = false;
 	m_fileStartOffset = 0LL;
 	m_numSegments     = 0;
@@ -1192,6 +1202,40 @@ long long RdbMap::getMemAlloced ( ) {
 	return (long long)m_numSegments * space;
 }
 
+bool RdbMap::addSegmentPtr ( long n ) {
+	// realloc
+	if ( n >= m_numSegmentPtrs ) {
+		char **k;
+		long nn = (long)((float)n * 1.20) + 1;
+		k = (char **) mrealloc (m_keys,
+					m_numSegmentPtrs * sizeof(char *) ,
+					nn * sizeof(char *) ,
+					"MapPtrs" );
+		// failed?
+		if ( ! k ) return false;
+		// succeeded
+		m_numSegmentPtrs = nn;
+		m_keys = k;
+	}
+
+	// try offsets 
+	if ( n >= m_numSegmentOffs ) {
+		short **o;
+		long nn = (long)((float)n * 1.20) + 1;
+		o = (short **) mrealloc (m_offsets,
+					 m_numSegmentOffs * sizeof(short *) ,
+					 nn * sizeof(short *) ,
+					 "MapPtrs" );
+		// failed?
+		if ( ! o ) return false;
+		// succeeded
+		m_numSegmentOffs = nn;
+		m_offsets = o;
+	}
+	return true;
+}
+	
+
 // . add "n" segments
 // . returns false and sets g_errno on error
 bool RdbMap::addSegment (  ) {
@@ -1202,8 +1246,17 @@ bool RdbMap::addSegment (  ) {
 	long n   = m_numSegments;
 	long pps = PAGES_PER_SEGMENT;
 	// ensure doesn't exceed the max
-	if ( n >= MAX_SEGMENTS ) return log("db: Mapped file is "
-					    "too big. Critical error.");
+	//if ( n >= MAX_SEGMENTS ) return log("db: Mapped file is "
+	//				    "too big. Critical error.");
+
+	// the array of up to MAX_SEGMENT pool ptrs is now dynamic too!
+	// because diffbot uses thousands of collections, this will save
+	// over 1GB of ram!
+	if ( ! addSegmentPtr ( n ) )
+		return log("db: Failed to allocate memory for adding seg ptr "
+			   "for map file %s.", m_file.getFilename());
+
+
 	// alloc spaces for each key segment
 	// allocate new segments now 
 	//m_keys[n]    = (key_t         *) mmalloc ( ks * pps , "RdbMap" );
diff --git a/RdbMap.h b/RdbMap.h
index beeffb52..0d703c7e 100644
--- a/RdbMap.h
+++ b/RdbMap.h
@@ -59,7 +59,7 @@
 #define PAGES_PER_SEGMENT (2*1024)
 #define PAGES_PER_SEG     (PAGES_PER_SEGMENT)
 // MAX_SEGMENTS of 16*1024 allows for 32 million pages = 256gigs of disk data
-#define MAX_SEGMENTS      (16*1024)  
+//#define MAX_SEGMENTS      (16*1024)  
 
 class RdbMap {
 
@@ -284,6 +284,8 @@ class RdbMap {
 	// . used to grow the map, too
 	//bool setMapSize ( long maxNumPages );
 
+	bool addSegmentPtr ( long n ) ;
+
 	// called by setMapSize() to increase the # of segments
 	bool addSegment (  ) ;
 
@@ -328,10 +330,17 @@ class RdbMap {
 	// . IMPORTANT: if growing m_pageSize might need to change m_offsets 
 	//   from short to long
 	//key_t         *m_keys    [ MAX_SEGMENTS ]; 
-	char          *m_keys    [ MAX_SEGMENTS ]; 
+	//char          *m_keys    [ MAX_SEGMENTS ]; 
+	char          **m_keys;
+	long            m_numSegmentPtrs;
 	//key96_t      **m_keys96; // set to m_keys
 	//key128_t     **m_keys128; // set to m_keys
-	short         *m_offsets [ MAX_SEGMENTS ]; 
+
+	//short         *m_offsets [ MAX_SEGMENTS ]; 
+	short         **m_offsets;
+	long            m_numSegmentOffs;
+
+
 
 	// number of valid pages in the map.
 	long          m_numPages;     
diff --git a/Spider.cpp b/Spider.cpp
index ceb452ec..b5ffc198 100644
--- a/Spider.cpp
+++ b/Spider.cpp
@@ -537,7 +537,6 @@ bool Spiderdb::init ( ) {
 	long pcmem = 20000000;//g_conf.m_spiderdbMaxDiskPageCacheMem;
 	// keep this low if we are the tmp cluster
 	if ( g_hostdb.m_useTmpCluster ) pcmem = 0;
-
 	// key parser checks
 	//long      ip         = 0x1234;
 	char      priority   = 12;
@@ -571,7 +570,7 @@ bool Spiderdb::init ( ) {
 			   RDB_SPIDERDB ,
 			   pcmem     ,
 			   pageSize  ,
-			   true      ,  // use shared mem?
+			   false     ,  // use shared mem?
 			   false     )) // minimizeDiskSeeks?
 		return log(LOG_INIT,"spiderdb: Init failed.");
 
@@ -1014,9 +1013,11 @@ SpiderColl *SpiderCache::getSpiderColl ( collnum_t collnum ) {
 /////////////////////////
 
 SpiderColl::SpiderColl () {
+	m_deleteMyself = false;
 	m_gettingList1 = false;
 	m_gettingList2 = false;
 	m_lastScanTime = 0;
+	m_isPopulating = false;
 	m_numAdded = 0;
 	m_numBytesScanned = 0;
 	m_lastPrintCount = 0;
@@ -1488,7 +1489,7 @@ SpiderColl::~SpiderColl () {
 }
 
 // we call this now instead of reset when Collectiondb::resetColl() is used
-void SpiderColl::clear ( ) {
+void SpiderColl::clearLocks ( ) {
 
 	// remove locks from locktable for all spiders out i guess
 	HashTableX *ht = &g_spiderLoop.m_lockTable;
@@ -1508,6 +1509,7 @@ void SpiderColl::clear ( ) {
 		goto top;
 	}
 
+	/*
 	// reset these for SpiderLoop;
 	m_nextDoledbKey.setMin();
 	m_didRound = false;
@@ -1541,6 +1543,7 @@ void SpiderColl::clear ( ) {
 	// assume the whole thing is not empty
 	m_allDoledbPrioritiesEmpty = 0;//false;
 	m_lastEmptyCheck = 0;
+	*/
 }
 
 void SpiderColl::reset ( ) {
@@ -1554,6 +1557,8 @@ void SpiderColl::reset ( ) {
 	m_twinDied = false;
 	m_lastUrlFiltersUpdate = 0;
 
+	m_isPopulating = false;
+
 	char *coll = "unknown";
 	if ( m_coll[0] ) coll = m_coll;
 	log(LOG_DEBUG,"spider: resetting spider cache coll=%s",coll);
@@ -2251,6 +2256,7 @@ bool SpiderColl::addToWaitingTree ( uint64_t spiderTimeMS , long firstIp ,
 	// what is this?
 	if ( firstIp == 0 || firstIp == -1 ) {
 		log("spider: got ip of %s. wtf?",iptoa(firstIp) );
+		return false;
 		char *xx=NULL; *xx=0;
 	}
 
@@ -2447,6 +2453,11 @@ void SpiderColl::populateWaitingTreeFromSpiderdb ( bool reentry ) {
 		if ( ! m_waitingTreeNeedsRebuild ) return;
 		// a double call? can happen if list read is slow...
 		if ( m_gettingList2 ) return;
+
+		// . borrow a msg5
+		// . if none available just return, we will be called again
+		//   by the sleep/timer function
+
 		// . read in a replacement SpiderRequest to add to doledb from
 		//   this ip
 		// . get the list of spiderdb records
@@ -2460,7 +2471,7 @@ void SpiderColl::populateWaitingTreeFromSpiderdb ( bool reentry ) {
 		// flag it
 		m_gettingList2 = true;
 		// make state
-		long state2 = (long)m_cr->m_collnum;
+		//long state2 = (long)m_cr->m_collnum;
 		// read the list from local disk
 		if ( ! m_msg5b.getList ( RDB_SPIDERDB   ,
 					 m_cr->m_coll   ,
@@ -2473,7 +2484,7 @@ void SpiderColl::populateWaitingTreeFromSpiderdb ( bool reentry ) {
 					 0              , // max cache age
 					 0              , // startFileNum
 					 -1             , // numFiles (all)
-					 (void *)state2,//this//state
+					 this,//(void *)state2,//this//state
 					 gotSpiderdbListWrapper2 ,
 					 MAX_NICENESS   , // niceness
 					 true          )) // do error correct?
@@ -2774,20 +2785,35 @@ void SpiderColl::populateDoledbFromWaitingTree ( bool reentry ) {
 	//   calls this function again with re-entry set to true
 	if ( ! scanSpiderdb ( true ) ) return;
 	// oom error? i've seen this happen and we end up locking up!
-	if ( g_errno ) return;
+	if ( g_errno ) { 
+		log("spider: scandspiderdb: %s",mstrerror(g_errno));
+		m_isPopulating = false; 
+		return; 
+	}
 	// try more
 	goto loop;
 }
 
 static void gotSpiderdbListWrapper ( void *state , RdbList *list , Msg5 *msg5){
 
-	collnum_t collnum = (collnum_t)(long)state;
+	//collnum_t collnum = (collnum_t)(long)state;
+	//SpiderColl *THIS = g_spiderCache.getSpiderColl(collnum);
+	//if ( ! THIS ) {
+	//	log("spider: lost1 collnum %li while scanning spiderdb",
+	//	    (long)collnum);
+	//	return;
+	//}
 
-	SpiderColl *THIS = g_spiderCache.getSpiderColl(collnum);
+	SpiderColl *THIS = (SpiderColl *)state;
 
-	if ( ! THIS ) {
-		log("spider: lost1 collnum %li while scanning spiderdb",
-		    (long)collnum);
+	// did our collection rec get deleted? since we were doing a read
+	// the SpiderColl will have been preserved in that case but its
+	// m_deleteMyself flag will have been set.
+	if ( THIS->m_deleteMyself &&
+	     ! THIS->m_msg5b.m_waitingForMerge &&
+	     ! THIS->m_msg5b.m_waitingForList ) {
+		mdelete ( THIS , sizeof(SpiderColl),"postdel1");
+		delete ( THIS );
 		return;
 	}
 
@@ -2800,6 +2826,10 @@ static void gotSpiderdbListWrapper ( void *state , RdbList *list , Msg5 *msg5){
 	// . finish processing the list we read now
 	// . if that blocks, it will call doledWrapper
 	if ( ! THIS->scanSpiderdb ( false ) ) return;
+
+	// no longer populating doledb. we also set to false in doledwrapper
+	//THIS->m_isPopulating = false;
+
 	// . otherwise, do more from tree
 	// . re-entry is true because we just got the msg5 reply
 	THIS->populateDoledbFromWaitingTree ( true );
@@ -2807,16 +2837,29 @@ static void gotSpiderdbListWrapper ( void *state , RdbList *list , Msg5 *msg5){
 
 static void gotSpiderdbListWrapper2( void *state , RdbList *list , Msg5 *msg5){
 
-	collnum_t collnum = (collnum_t)(long)state;
+	//collnum_t collnum = (collnum_t)(long)state;
+	//SpiderColl *THIS = g_spiderCache.getSpiderColl(collnum);
+	//if ( ! THIS ) {
+	//	log("spider: lost2 collnum %li while scanning spiderdb",
+	//	    (long)collnum);
+	//	return;
+	//}
 
-	SpiderColl *THIS = g_spiderCache.getSpiderColl(collnum);
 
-	if ( ! THIS ) {
-		log("spider: lost2 collnum %li while scanning spiderdb",
-		    (long)collnum);
+	SpiderColl *THIS = (SpiderColl *)state;
+
+	// did our collection rec get deleted? since we were doing a read
+	// the SpiderColl will have been preserved in that case but its
+	// m_deleteMyself flag will have been set.
+	if ( THIS->m_deleteMyself &&
+	     ! THIS->m_msg5.m_waitingForMerge &&
+	     ! THIS->m_msg5.m_waitingForList ) {
+		mdelete ( THIS , sizeof(SpiderColl),"postdel1");
+		delete ( THIS );
 		return;
 	}
 
+
 	//SpiderColl *THIS = (SpiderColl *)state;
 	// re-entry is true because we just got the msg5 reply
 	THIS->populateWaitingTreeFromSpiderdb ( true );
@@ -2829,6 +2872,10 @@ static void doledWrapper ( void *state ) {
 	// msg4 is available again
 	THIS->m_msg4Avail = true;
 
+	// no longer populating doledb. we also set to false in 
+	// gotSpiderListWrapper
+	//THIS->m_isPopulating = false;
+
 	long long now = gettimeofdayInMilliseconds();
 	long long diff = now - THIS->m_msg4Start;
 	// we add recs to doledb using msg1 to keep things fast because
@@ -2969,7 +3016,7 @@ bool SpiderColl::scanSpiderdb ( bool needList ) {
 		// flag it
 		m_gettingList1 = true;
 		// make state
-		long state2 = (long)m_cr->m_collnum;
+		//long state2 = (long)m_cr->m_collnum;
 		// . read the list from local disk
 		// . if a niceness 0 intersect thread is taking a LONG time
 		//   then this will not complete in a long time and we
@@ -2987,7 +3034,7 @@ bool SpiderColl::scanSpiderdb ( bool needList ) {
 					0              , // max cache age
 					0              , // startFileNum
 					-1             , // numFiles (all)
-					(void *)state2,//this,//state 
+					this,//(void *)state2,//this,//state 
 					gotSpiderdbListWrapper ,
 					MAX_NICENESS   , // niceness
 					true          )) // do error correct?
@@ -9346,6 +9393,10 @@ long getUrlFilterNum2 ( SpiderRequest *sreq       ,
 				     to_lower_a(ext[2]) == 'm' &&
 				     to_lower_a(ext[3]) == 'v' )
 					goto gotOne;
+				if ( to_lower_a(ext[1]) == 'w' &&
+				     to_lower_a(ext[2]) == 'a' &&
+				     to_lower_a(ext[3]) == 'v' )
+					goto gotOne;
 				if ( to_lower_a(ext[1]) == 'j' &&
 				     to_lower_a(ext[2]) == 'p' &&
 				     to_lower_a(ext[3]) == 'g' )
diff --git a/Spider.h b/Spider.h
index 00cb2744..ab95f7ed 100644
--- a/Spider.h
+++ b/Spider.h
@@ -981,7 +981,7 @@ class SpiderColl {
 	~SpiderColl ( );
 	SpiderColl  ( ) ;
 
-	void clear();
+	void clearLocks();
 
 	// called by main.cpp on exit to free memory
 	void      reset();
@@ -1125,6 +1125,8 @@ class SpiderColl {
 	long       m_scanningIp;
 	bool       m_gotNewRequestsForScanningIp;
 
+	char m_deleteMyself;
+
 	// start key for reading doledb
 	key_t m_msg5StartKey;
 
diff --git a/Threads.cpp b/Threads.cpp
index fb4628b8..1e0657a5 100644
--- a/Threads.cpp
+++ b/Threads.cpp
@@ -284,7 +284,7 @@ bool Threads::init ( ) {
 	//   with high niceness cuz it would hold up high priority ones!
 	// . TODO: is there a better way? cancel it when UdpServer calls
 	//   Threads::suspendLowPriorityThreads() ?
-	if ( ! g_threads.registerType ( MERGE_THREAD , 2/*maxThreads*/,100) ) 
+	if ( ! g_threads.registerType ( MERGE_THREAD , 2/*maxThreads*/,1000) ) 
 		return log("thread: Failed to register thread type." );
 	// will raising this from 1 to 2 make it faster too?
 	// i raised since global specs new servers have 2 (hyperthreaded?) cpus
@@ -1120,7 +1120,7 @@ void makeCallback ( ThreadEntry *t ) {
 	// then set it
 	if ( t->m_niceness >= 1 ) g_niceness = 1;
 	else                      g_niceness = 0;
-	
+
 	t->m_callback ( t->m_state , t );
 
 	// time it?
diff --git a/Titledb.cpp b/Titledb.cpp
index a20788d2..5a73cf7b 100644
--- a/Titledb.cpp
+++ b/Titledb.cpp
@@ -51,20 +51,18 @@ bool Titledb::init ( ) {
 	// . just hard-code 30MB for now
 	long pcmem    = 30000000; // = g_conf.m_titledbMaxDiskPageCacheMem;
 	// fuck that we need all the mem!
-	pcmem = 0;
+	//pcmem = 0;
 	// do not use any page cache if doing tmp cluster in order to
 	// prevent swapping
 	if ( g_hostdb.m_useTmpCluster ) pcmem = 0;
-	//long pageSize = GB_INDEXDB_PAGE_SIZE;
+	long pageSize = GB_INDEXDB_PAGE_SIZE;
 	// init the page cache
 	// . MDW: "minimize disk seeks" not working otherwise i'd enable it!
-	/*
 	if ( ! m_pc.init ( "titledb",
 			   RDB_TITLEDB,
 			   pcmem    ,
 			   pageSize ) )
 		return log("db: Titledb init failed.");
-	*/
 
 	// each entry in the cache is usually just a single record, no lists
 	//long maxCacheNodes = g_conf.m_titledbMaxCacheMem / (10*1024);
@@ -90,7 +88,7 @@ bool Titledb::init ( ) {
 			    0,//maxCacheNodes               ,
 			    false                       ,// half keys?
 			    false                       ,// g_conf.m_titledbSav
-			    NULL,//&m_pc               , // page cache ptr
+			    &m_pc               , // page cache ptr
 			    true                        ) )// is titledb?
 		return false;
 	return true;
diff --git a/ppthtml b/ppthtml
deleted file mode 100755
index 57bd055a..00000000
Binary files a/ppthtml and /dev/null differ