open-source-search-engine/Process.cpp

#include "gb-include.h"

#include "Process.h"
#include "Rdb.h"
//#include "Checksumdb.h"
#include "Clusterdb.h"
#include "Hostdb.h"
#include "Tagdb.h"
#include "Catdb.h"
#include "Posdb.h"
#include "Cachedb.h"
#include "Monitordb.h"
#include "Datedb.h"
#include "Titledb.h"
//#include "Revdb.h"
#include "Sections.h"
#include "Spider.h"
#include "Statsdb.h"
//#include "Tfndb.h"
#include "Threads.h"
#include "PingServer.h"
#include "Dns.h"
#include "Repair.h"
#include "RdbCache.h"
#include "Spider.h"
//#include "Classifier.h"
//#include "PageTopDocs.h"
#include "HttpServer.h"
#include "Speller.h"
//#include "Thesaurus.h"
#include "Spider.h"
#include "Profiler.h"
//#include "PageNetTest.h"
#include "LangList.h"
#include "AutoBan.h"
//#include "SiteBonus.h"
#include "Msg4.h"
#include "Msg5.h"
//#include "PageTurk.h"
//#include "Syncdb.h"
//#include "Placedb.h"
#include "Wiki.h"
#include "Wiktionary.h"
#include "Users.h"
#include "Proxy.h"
#include "Rebalance.h"
#include "SpiderProxy.h"
#include "PageInject.h"

// the query log hashtable defined in XmlDoc.cpp
//extern HashTableX g_qt;

// normally in seo.cpp, but here so it compiles
SafeBuf    g_qbuf;
int32_t       g_qbufNeedSave = 0;

// for resetAll()
//#include "Msg6.h"
extern void resetPageAddUrl    ( );
extern void resetHttpMime      ( );
extern void reset_iana_charset ( );
//extern void resetAdultBit      ( );
extern void resetDomains       ( );
extern void resetEntities      ( );
extern void resetQuery         ( );
extern void resetStopWords     ( );
extern void resetAbbrTable     ( );
extern void resetUnicode       ( );


// our global instance
Process g_process;

//static int32_t s_flag = 1;
static int32_t s_nextTime = 0;

char *g_files[] = {
	//"gb.conf",

	// might have localhosts.conf
	//"hosts.conf",

	"catcountry.dat",
	"badcattable.dat",

	"ucdata/cd_data.dat",
	"ucdata/cdmap.dat",
	"ucdata/combiningclass.dat",
	"ucdata/kd_data.dat",
	"ucdata/kdmap.dat",
	"ucdata/lowermap.dat",
	"ucdata/properties.dat",
	"ucdata/scripts.dat",
	"ucdata/uppermap.dat",

	// called by gb via system() to convert non-html doc to html
	//"gbfilter",

	// need for checking hard drive temperature
	//"/usr/sbin/hddtemp",

	// used by tagdb i guess
	//"top100000Alexa.txt",

	//"7za" ,  // 7-zip compression

	// 'gbfilter' calls these filters to convert various doc types
	// into html before being fed to parser
	"antiword" ,  // msword
	"pdftohtml",  // pdf
	"pstotext" ,  // postscript
	//"ppthtml"  ,  // powerpoint

	// required for SSL server support for both getting web pages
	// on https:// sites and for serving https:// pages
	"gb.pem",

	// the main binary!
	"gb",

	//"dict/unifiedDict",
	//"dict/thesaurus.txt",

	// for spell checking
	//"dict/en/en_phonet.dat",
	//"dict/en/en.query.phonet",

	"antiword-dir/8859-1.txt",
	"antiword-dir/8859-10.txt",
	"antiword-dir/8859-13.txt",
	"antiword-dir/8859-14.txt",
	"antiword-dir/8859-15.txt",
	"antiword-dir/8859-16.txt",
	"antiword-dir/8859-2.txt",
	"antiword-dir/8859-3.txt",
	"antiword-dir/8859-4.txt",
	"antiword-dir/8859-5.txt",
	"antiword-dir/8859-6.txt",
	"antiword-dir/8859-7.txt",
	"antiword-dir/8859-8.txt",
	"antiword-dir/8859-9.txt",
	"antiword-dir/Default",
	"antiword-dir/Example",
	"antiword-dir/MacRoman.txt",
	"antiword-dir/UTF-8.txt",
	"antiword-dir/Unicode",
	"antiword-dir/cp1250.txt",
	"antiword-dir/cp1251.txt",
	"antiword-dir/cp1252.txt",
	"antiword-dir/cp437.txt",
	"antiword-dir/cp850.txt",
	"antiword-dir/cp852.txt",
	"antiword-dir/fontnames",
	"antiword-dir/fontnames.russian",
	"antiword-dir/koi8-r.txt",
	"antiword-dir/koi8-u.txt",
	"antiword-dir/roman.txt",

	// . thumbnail generation
	// . i used 'apt-get install netpbm' to install
	"bmptopnm",
	"giftopnm",
	"jpegtopnm",
	"libjpeg.so.62",
	"libnetpbm.so.10",
	"libpng12.so.0",
	"libtiff.so.4",
	//"libz.so.1",
	"LICENSE",
	"pngtopnm",
	"pnmscale",
	"ppmtojpeg",
	"tifftopnm",

	"mysynonyms.txt",

	//"smartctl",

	"wikititles.txt.part1",
	"wikititles.txt.part2",

	"wiktionary-buf.txt",
	"wiktionary-lang.txt",
	"wiktionary-syns.dat",

	// gives us siteranks for the most popular sites:
	"sitelinks.txt",

	"unifiedDict.txt",
	//"unifiedDict-buf.txt",
	//"unifiedDict-map.dat",

	//
	// this junk can be generated
	//
	//"wikiwords.dat",//enwikitionary.xml",
	//"zips.dat",
	//"timezones.dat",
	//"aliases.dat",
	//"cities.dat",

	NULL
};


///////
//
// used to make package to install files for the package.
// so do not include hosts.conf or gb.conf
//
///////
bool Process::getFilesToCopy ( char *srcDir , SafeBuf *buf ) {

	// sanirty
	int32_t slen = gbstrlen(srcDir);
	if ( srcDir[slen-1] != '/' ) { char *xx=NULL;*xx=0; }

	for ( int32_t i = 0 ; i < (int32_t)sizeof(g_files)/4 ; i++ ) {
		// terminate?
		if ( ! g_files[i] ) break;
		// skip subdir shit it won't work
		if ( strstr(g_files[i],"/") ) continue;
		// if not first
		if ( i > 0 ) buf->pushChar(' ');
		// append it
		buf->safePrintf("%s%s"
				, srcDir
				, g_files[i] );
	}

	// and the required runtime subdirs
	buf->safePrintf(" %santiword-dir",srcDir);
	buf->safePrintf(" %sucdata",srcDir);
	buf->safePrintf(" %shtml",srcDir);

	return true;
}


bool Process::checkFiles ( char *dir ) {

	/*
	// check these by hand since you need one or the other
	File f1;
	File f2;
	File f3;
	File f4;
	f1.set ( dir , "allCountries.txt" );
	f2.set ( dir , "postalCodes.txt" );
	//f3.set ( dir , "places.dat" );
	f4.set ( dir , "zips.dat" );
	if ( //( ! f3.doesExist() || ! f4.doesExist() ) &&
	    ( ! f4.doesExist() ) &&
	     ( ! f1.doesExist() || ! f2.doesExist() ) ) {
		log("db: need either (%s and %s) or (%s and %s)",
		    f3.getFilename() ,
		    f4.getFilename() ,
		    f1.getFilename() ,
		    f2.getFilename() );
		//return false;
	}
	*/

	// check for email subdir
	//f1.set ( dir , "/html/email/");
	//if ( ! f1.doesExist() ) {
	//	log("db: email subdir missing. add html/email");
	//	return false;
	//}

	// make sure we got all the files
	//if ( ! g_conf.m_isLive ) return true;
	bool needsFiles = false;

	for ( int32_t i = 0 ; i < (int32_t)sizeof(g_files)/4 ; i++ ) {
		// terminate?
		if ( ! g_files[i] ) break;
		File f;
		char *dd = dir;
		if ( g_files[i][0] != '/' )
			f.set ( dir , g_files[i] );
		else {
			f.set ( g_files[i] );
			dd = "";
		}
		if ( ! f.doesExist() ) {
			log("db: %s%s file missing."
			    ,dd,g_files[i]);
			//log("db: %s%s missing. Copy over from "
			//    "titan:/gb/conf/%s",dd,g_files[i],g_files[i]);

			// i like to debug locally without having to load this!
			//if ( ! g_conf.m_isLive &&
			//     ! strcmp(g_files[i],"dict/unifiedDict") )
			//     continue;

			// get subdir in working dir
			//char subdir[512];
			//char *p = g_files[i];
			//char *last = NULL;
			//for ( ; *p ; p++ )
			//	if ( *p == '/' ) last = p;

			// try copying
			//char cmd[1024];
			//sprintf(cmd,"cp -p /home/mwells/gigablast/%s "
			//	"%s%s",g_files[i],g_hostdb.m_dir,g_files[i]);
			//log("db: trying to copy: \"%s\"",cmd);
			//system(cmd);

			needsFiles = true;
		}

	}

	if ( needsFiles ) {
		log("db: Missing files. See above. Exiting.");
		return false;
	}

	//if ( needsFiles ) {
	//  log("db: use 'apt-get install -y netpbm' to install "
	//      "pnmfiles");
	//  return false;
	//}

	// . check for tagdb files tagdb0.xml to tagdb50.xml
	// . MDW - i am phased these annoying files out 100%
	//for ( int32_t i = 0 ; i <= 50 ; i++ ) {
	//	char tmp[100];
	//	sprintf ( tmp , "tagdb%"INT32".xml" , i );
	//	File f;
	//	f.set ( dir , tmp );
	//	if ( ! f.doesExist() )
	//		return log("db: %s%s missing. Copy over from "
	//			   "titan:/gb/conf/%s",dir,tmp,tmp);
	//}


	if ( ! g_conf.m_isLive ) return true;

	m_swapEnabled = 0;

	// first check to make sure swap is off
	SafeBuf psb;
	if ( psb.fillFromFile("/proc/swaps") < 0 ) {
		log("gb: failed to read /proc/swaps");
		//if ( ! g_errno ) g_errno = EBADENGINEER;
		//return true;
		// if we don't know if swap is enabled or not, use -1
		m_swapEnabled = -1;
	}

	/*
	File f;
	f.set ("/proc/swaps");
	int32_t size = f.getFileSize() ;
	char *buf = (char *)mmalloc ( size+1, "S99" );
	if ( ! buf ) return false;
	if ( ! f.open ( O_RDONLY ) )
		return log("gb: failed to open %s",f.getFilename());
	if ( size != f.read ( buf , size , 0 ) )
		return log("gb: failed to read %s: %s",f.getFilename() ,
			   mstrerror(g_errno));
	buf[size] = '\0';
	*/

	// we should redbox this! or at least be on the optimizations page
	if ( m_swapEnabled == 0 ) {
		char *buf = psb.getBufStart();
		if ( strstr ( buf,"dev" ) )
			//return log("gb: can not start live gb with swap "
			//"enabled.");
			m_swapEnabled = 1;
	}

	// . make sure elvtune is being set right
	// . must be in /etc/rcS.d/S99local
	/*
	f.set ("/etc/rcS.d/S99local" );
	size = f.getFileSize() ;
	buf = (char *)mmalloc ( size+1, "S99" );
	if ( ! buf ) return false;
	if ( ! f.open ( O_RDONLY ) )
		return log("gb: failed to open %s",f.getFilename());
	if ( size != f.read ( buf , size , 0 ) )
		return log("gb: failed to read %s",f.getFilename() );
	buf[size]='\0';
	if ( ! strstr (buf,"\n/usr/sbin/elvtune -w 32 /dev/sda") ||
	     ! strstr (buf,"\n/usr/sbin/elvtune -w 32 /dev/sdb") ||
	     ! strstr (buf,"\n/usr/sbin/elvtune -w 32 /dev/sdc") ||
	     ! strstr (buf,"\n/usr/sbin/elvtune -w 32 /dev/sdd")   )
		// just note it now and do not exit since 2.6's elevator
		// tuning is totally different. NO! we are not using 2.6
		// cuz it sux...
		return log("gb: %s does not contain "
			   "/usr/sbin/elvtune -w 32 /dev/sd[a-d]" ,
			   f.getFilename());
	mfree ( buf , size+1, "S99" );
	*/

	// now that we are open source skip the checks below
	return true;

	// check kernel version
	FILE *fd;
	fd = fopen ( "/proc/version" , "r" );
	if ( ! fd ) {
		log("gb: could not open /proc/version to check kernel version:%s",
		    strerror(errno));
		return false;
	}
	// read in version
	char vbuf[4000];
	fgets ( vbuf , 3900 , fd );
	fclose ( fd );
	// compare it
	if ( strcmp ( vbuf , "Linux version 2.4.31-bigcore "
		      "(jolivares@voyager) (gcc version 2.95.4 20011002 "
		      "(Debian prerelease)) #2 SMP Fri Apr 14 12:48:46 "
		      "MST 2006\n") == 0 )
		return true;
	if ( strcmp ( vbuf , "Linux version 2.4.31-bigcore "
		      "(msabino@voyager) (gcc version 2.95.4 20011002 "
		      "(Debian prerelease)) #7 SMP Mon Aug 21 18:09:30 "
		      "MDT 2006\n")  == 0 )
		return true;
	// this one is for the dual and quad core machines i think
	if ( strcmp ( vbuf , "Linux version 2.4.34-e755 (jolivares@titan) "
		      "(gcc version 2.95.4 20011002 (Debian prerelease)) "
		      "#22 SMP Tue May 15 02:22:43 MDT 2007\n")==0 )
		return true;
	// temp hack test
	//if ( strcmp ( vbuf , "Linux version 2.6.30 (mwells@titan) (gcc "
	//	      "version 4.1.2 20061115 (prerelease) (Debian 4.1.1-"
	//	      "21)) #4 SMP Thu Jun 18 12:56:50 MST 2009\n")==0 )
	//	return true;
	// this is used for router0 and router1
	if ( g_hostdb.m_myHost->m_isProxy &&
	     strcmp ( vbuf , "Linux version 2.6.25.10 (mwells@titan) "
		      "(gcc version 4.1.2 20061115 (prerelease) "
		      "(Debian 4.1.1-21)) #9 SMP Sun Oct 12 15:23:40 "
		      "MST 2008\n")== 0)
		return true;
	log("gb: kernel version is not an approved version.");
	//return false;

	return true;
}

static void powerMonitorWrapper ( int fd , void *state ) ;
static void fanSwitchCheckWrapper ( int fd , void *state ) ;
static void gotPowerWrapper ( void *state , TcpSocket *s ) ;
static void doneCmdWrapper        ( void *state ) ;
static void  hdtempWrapper        ( int fd , void *state ) ;
static void  hdtempDoneWrapper    ( void *state , ThreadEntry *t ) ;
static void *hdtempStartWrapper_r ( void *state , ThreadEntry *t ) ;
static void heartbeatWrapper    ( int fd , void *state ) ;
//static void diskHeartbeatWrapper ( int fd , void *state ) ;
static void processSleepWrapper ( int fd , void *state ) ;

Process::Process ( ) {
	m_mode = NO_MODE;
	m_exiting = false;
	m_powerIsOn = true;
	m_totalDocsIndexed = -1LL;
}

bool Process::init ( ) {
	// -1 means unknown
	m_diskUsage = -1.0;
	m_diskAvail = -1LL;
	// we do not know if the fans are turned off or on
	m_currentFanState = -1;
	m_threadOut = false;
	m_powerReqOut = false;
	m_powerIsOn = true;
	m_numRdbs = 0;
	m_suspendAutoSave = false;
	// . init the array of rdbs
	// . primary rdbs
	// . let's try to save tfndb first, that is the most important,
	//   followed by titledb perhaps...
	//m_rdbs[m_numRdbs++] = g_tfndb.getRdb       ();
	m_rdbs[m_numRdbs++] = g_titledb.getRdb     ();
	//m_rdbs[m_numRdbs++] = g_revdb.getRdb       ();
	m_rdbs[m_numRdbs++] = g_sectiondb.getRdb   ();
	m_rdbs[m_numRdbs++] = g_posdb.getRdb     ();
	//m_rdbs[m_numRdbs++] = g_datedb.getRdb      ();
	m_rdbs[m_numRdbs++] = g_spiderdb.getRdb    ();
	m_rdbs[m_numRdbs++] = g_clusterdb.getRdb   ();
	m_rdbs[m_numRdbs++] = g_tagdb.getRdb      ();
	m_rdbs[m_numRdbs++] = g_catdb.getRdb       ();
	m_rdbs[m_numRdbs++] = g_statsdb.getRdb     ();
	m_rdbs[m_numRdbs++] = g_linkdb.getRdb      ();
	m_rdbs[m_numRdbs++] = g_cachedb.getRdb      ();
	m_rdbs[m_numRdbs++] = g_serpdb.getRdb      ();
	m_rdbs[m_numRdbs++] = g_monitordb.getRdb      ();
	//m_rdbs[m_numRdbs++] = g_placedb.getRdb     ();
	// save what urls we have been doled
	m_rdbs[m_numRdbs++] = g_doledb.getRdb      ();
	//m_rdbs[m_numRdbs++] = g_syncdb.getRdb      ();
	// secondary rdbs (excludes catdb)
	//m_rdbs[m_numRdbs++] = g_tfndb2.getRdb      ();
	m_rdbs[m_numRdbs++] = g_titledb2.getRdb    ();
	//m_rdbs[m_numRdbs++] = g_revdb2.getRdb      ();
	m_rdbs[m_numRdbs++] = g_sectiondb2.getRdb  ();
	m_rdbs[m_numRdbs++] = g_posdb2.getRdb    ();
	//m_rdbs[m_numRdbs++] = g_datedb2.getRdb     ();
	m_rdbs[m_numRdbs++] = g_spiderdb2.getRdb   ();
	//m_rdbs[m_numRdbs++] = g_checksumdb2.getRdb ();
	m_rdbs[m_numRdbs++] = g_clusterdb2.getRdb  ();
	//m_rdbs[m_numRdbs++] = g_tagdb2.getRdb     ();
	//m_rdbs[m_numRdbs++] = g_statsdb2.getRdb    ();
	m_rdbs[m_numRdbs++] = g_linkdb2.getRdb     ();
	//m_rdbs[m_numRdbs++] = g_placedb2.getRdb    ();
	m_rdbs[m_numRdbs++] = g_tagdb2.getRdb      ();
	/////////////////
	// CAUTION!!!
	/////////////////
	// Add any new rdbs to the END of the list above so
	// it doesn't screw up Rebalance.cpp which uses this list too!!!!
	/////////////////


	//call these back right before we shutdown the
	//httpserver.
	m_callbackState = NULL;
	m_callback      = NULL;

	// do not do an autosave right away
	m_lastSaveTime = 0;//gettimeofdayInMillisecondsLocal();
	// reset this
	m_sentShutdownNote = false;
	// this is used for shutting down as well
	m_blockersNeedSave = true;
	m_repairNeedsSave  = true;
	// count tries
	m_try = 0;
	// reset this timestamp
	m_firstShutdownTime = 0;
	// set the start time, local time
	m_processStartTime = gettimeofdayInMillisecondsLocal();
	// reset this
	m_lastHeartbeatApprox = 0;
	m_calledSave = false;

	// heartbeat check
	if ( ! g_loop.registerSleepCallback(100,NULL,heartbeatWrapper,0))
		return false;
	// we use SSDs now so comment this out
	//if ( !g_loop.registerSleepCallback(500,NULL,diskHeartbeatWrapper,0))
	//	return false;

	// get first snapshot of load average...
	//update_load_average(gettimeofdayInMillisecondsLocal());
	// . continually call this once per second
	// . once every half second now so that autosaves are closer together
	//   in time between all hosts
	if ( ! g_loop.registerSleepCallback(500,NULL,processSleepWrapper))
		return false;

	// . hard drive temperature
	// . now that we use intel ssds that do not support smart, ignore this
	// . well use it for disk usage i guess
	if ( ! g_loop.registerSleepCallback(10000,NULL,hdtempWrapper,0))
		return false;

	// power monitor, every 30 seconds
	if ( ! g_loop.registerSleepCallback(30000,NULL,powerMonitorWrapper,0))
		return false;

	// check temps to possible turn fans on/off every 60 seconds
	if ( !g_loop.registerSleepCallback(60000,NULL,fanSwitchCheckWrapper,0))
		return false;

	// -99 means unknown
	m_dataCtrTemp = -99;
	m_roofTemp    = -99;

	// success
	return true;
}

bool Process::isAnyTreeSaving ( ) {
	for ( int32_t i = 0 ; i < m_numRdbs ; i++ ) {
		Rdb *rdb = m_rdbs[i];
		if ( rdb->m_isCollectionLess ) continue;
		if ( rdb->isSavingTree() ) return true;
		// we also just disable writing below in Process.cpp
		// while saving other files. so hafta check that as well
		// since we use isAnyTreeSaving() to determine if we can
		// write to the tree or not.
		if ( ! rdb->isWritable() ) return true;
	}
	return false;
}

void powerMonitorWrapper ( int fd , void *state ) {
	if ( g_isYippy ) return;

	// only if in matt wells datacenter
	if ( ! g_conf.m_isMattWells )
		return;

	// are we in group #0
	bool checkPower = false;
	// get our host
	Host *me = g_hostdb.m_myHost;
	// if we are not host #0 and host #0 is dead, we check it
	if ( me->m_shardNum == 0 && g_hostdb.isDead((int32_t)0) )
		checkPower = true;
	// if we are host #0 we always check it
	if ( me->m_hostId == 0 ) checkPower = true;
	// proxy never checks power
	if ( me->m_isProxy ) checkPower = false;
	// if not checking, all done
	if ( ! checkPower ) return;
	// only if live
	//if ( ! g_conf.m_isLive ) return;
	// skip if request out already
	if ( g_process.m_powerReqOut ) return;
	// the url
	char *url = "http://10.5.0.9/getData.htm";
	// download it
	//log(LOG_INFO,"powermo: getting %s",url);
	// for httpserver
	//Url u; u.set ( url , gbstrlen(url) );
	// mark the request as outstanding so we do not overlap it
	g_process.m_powerReqOut = true;
	// get it
	bool status = g_httpServer.
		getDoc ( url             , // url to download
			 0               , // ip
			 0               , // offset
			 -1              , // size
			 0               , // ifModifiedSince
			 NULL            , // state
			 gotPowerWrapper , // callback
			 30*1000         , // timeout
			 0               , // proxy ip
			 0               , // proxy port
			 1*1024*1024     , // maxLen
			 1*1024*1024     , // maxOtherLen
			 "Mozilla/4.0 "
			 "(compatible; MSIE 6.0; Windows 98; "
			 "Win 9x 4.90)"  ,
			 //false           , // respect download limit?
			 "HTTP/1.1"      );// fake 1.1 otherwise we get error!
	// wait for it
	if ( ! status ) return;
	// i guess it is back!
	g_process.m_powerReqOut = false;
	// call this to wrap things up
	g_process.gotPower ( NULL );
}

void gotPowerWrapper ( void *state , TcpSocket *s ) {
	g_process.gotPower ( s );
}

// . returns false if blocked, true otherwise
// . returns true and sets g_errno on error
bool Process::gotPower ( TcpSocket *s ) {

	// i guess it is back!
	g_process.m_powerReqOut = false;

	if ( ! s ) {
		log("powermo: got NULL socket");
		return true;
	}

	// point into buffer
	char *buf     ;
	int32_t  bufSize ;

	// assume power is on
	int32_t val = 0;
	HttpMime mime;
	char *content;
	int32_t  contentLen;
	char *p;
	char *dataCtrTempStr;
	char *roofTempStr;
	char *tag1,*tag2;
	float newTemp;

	if ( g_errno ) {
		log("powermo: had error getting power state: %s. assuming "
		    "power on.",
		    mstrerror(g_errno));
		//return true;
		// assume power went off
		//val = 1;
		goto skip;
	}
	// point into buffer
	buf     = s->m_readBuf;
	bufSize = s->m_readOffset;

	// note it
	//log(LOG_INFO,"powermo: got power reply");

	if ( ! buf ) {
		log(LOG_INFO,"powermo: got empty reply. assuming power on.");
		// return true;
		// assume power went off
		//val = 1;
		goto skip;
	}

	mime.set ( buf , bufSize , NULL );
	content    = buf     + mime.getMimeLen();
	contentLen = bufSize - mime.getMimeLen();
	content[contentLen]='\0';

	// get the state of the power!
	p = strstr ( content ,"\"power\",status:" );
	// panic?
	if ( ! p ) {
		log("powermo: could not parse out power from room alert. "
		    "assuming power on. "
		    "content = %s",content);
		//return true;
		// assume power went off
		//val = 1;
		goto skip;
	}

	// . get the value
	// . val is 0 if the power is ON!!!!
	// . val is non-zero if the power is OFF!!!
	val = atoi ( p + 15 );
	// random values for testing!!
	//val = rand()%2;


	//
	// . now get the temperature in the data ctr and the roof
	// . log it every hour i guess for shits and giggles
	// . if the roof temp is less than the data ctr temp then
	//   we want to keep the fans on, otherwise we need to send an
	//   http request to the power strip control to turn the fans off
	//
	tag1 = "\"Exit Temp\",tempf:\"" ;
	dataCtrTempStr = strstr( content, tag1 );
	if ( ! dataCtrTempStr ) {
		log("powermo: could not parse our data ctr temp from "
		    "room alert.");
		goto skip;
	}
	newTemp = atof ( dataCtrTempStr+ gbstrlen(tag1) );
	if ( newTemp != m_dataCtrTemp )
		log("powermo: data ctr temp changed from %0.1f to %.01f",
		    m_dataCtrTemp,newTemp);
	m_dataCtrTemp = newTemp;


	tag2 = "\"Roof Temp\",tempf:\"";
	roofTempStr = strstr( content, tag2 );
	if ( ! dataCtrTempStr ) {
		log("powermo: could not parse out roof temp from "
		    "room alert.");
		goto skip;
	}

	newTemp = atof ( roofTempStr+gbstrlen(tag2));
	if ( newTemp != m_roofTemp )
		log("powermo: roof     temp changed from %0.1f to %.01f",
		    m_roofTemp,newTemp);
	m_roofTemp = newTemp;


 skip:

	// 0 means the alert is not triggered and power is on
	if ( val == 0 && m_powerIsOn == true ) {
		//log("powermo: power is still ON.");
		return true;
	}
	// if it is off and was off before, don't do anything
	if ( val && m_powerIsOn == false ) {
		log("powermo: power is still OFF.");
		return true;
	}

	char *up = NULL;

	// if it was off before, tell everyone it is back on
	if ( val == 0 && m_powerIsOn == false ) {
		log("powermo: power is back ON!");
		up = "/master?haspower=1&username=msg28&cast=0";
		// update ourselves to prevent sending these multiple times
		//m_powerIsOn = true;
	}
	else if ( val && m_powerIsOn == true ) {
		log("powermo: power is OFF!");
		up = "/master?haspower=0&username=msg28&cast=0";
		// . update ourselves to prevent sending these multiple times
		// . no, we need to make sure to save in Parms.cpp::
		//   CmdPower
		//m_powerIsOn = false;
	}

	// how did this happen?
	if ( m_powerReqOut ) return true;

	// the request url
	//Url ru; ru.set ( up , gbstrlen(up) );
	// set the http reqeust
	if ( ! m_r.set ( up ) ) {
		log("powermo: got httpreqeust set error: %s",
		    mstrerror(g_errno));
		return true;
	}

	// we are out again...
	g_process.m_powerReqOut = true;

	log("powermo: sending notice to all hosts.");

	SafeBuf parmList;

	// add the parm rec as a parm cmd
	if ( ! g_parms.addNewParmToList1 ( &parmList,
					   (collnum_t)-1,
					   NULL, // parmval (argument)
					   -1, // collnum (-1 -> globalconf)
					   "poweron") ) // CommandPowerOn()!
		return true;

	// . use the broadcast call here so things keep their order!
	// . we do not need a callback when they have been completely
	//   broadcasted to all hosts so use NULL for that
	g_parms.broadcastParmList ( &parmList , NULL , NULL );

	// . turn off spiders
	// . also show that power is off now!
	//if ( ! m_msg28.massConfig ( m_r.getRequest() ,
	//			    NULL           , // state
	//			    doneCmdWrapper ) )
	//	// return false if this blocked
	//	return false;

	// . hmmm.. it did not block
	// . this does not block either
	doneCmdWrapper ( NULL );
	return true;
}

void doneCmdWrapper ( void *state ) {
	// we are back
	g_process.m_powerReqOut = false;
	// note it
	log("powermo: DONE sending notice to all hosts.");
}

void hdtempWrapper ( int fd , void *state ) {

	// also download test urls from spider proxies to ensure they
	// are up and running properly
	downloadTestUrlFromProxies();

	// reset this... why?
	g_errno = 0;
	// do not get if already getting
	if ( g_process.m_threadOut ) return;
	// skip if exiting
	if ( g_process.m_mode == EXIT_MODE ) return;
	// current local time
	int32_t now = getTime();
	// or if haven't waited int32_t enough
	if ( now < s_nextTime ) return;

	// set it
	g_process.m_threadOut = true;
	// . call thread to call popen
	// . callThread returns true on success, in which case we block
	if ( g_threads.call ( FILTER_THREAD        ,
			      MAX_NICENESS         ,
			      NULL                 , // this
			      hdtempDoneWrapper    ,
			      hdtempStartWrapper_r ) ) return;
	// back
	g_process.m_threadOut = false;
	// . call it directly
	// . only mention once to avoid log spam
	static bool s_first = true;
	if ( s_first ) {
		s_first = false;
		log("build: Could not spawn thread for call to get hd temps. "
		    "Ignoring hd temps. Only logging once.");
	}
	// MDW: comment these two guys out to avoid calling it for now
	// get the data
	//hdtempStartWrapper_r ( false , NULL ); // am thread?
	// and finish it off
	//hdtempDoneWrapper ( NULL , NULL );
}

// come back here
void hdtempDoneWrapper ( void *state , ThreadEntry *t ) {
	// we are back
	g_process.m_threadOut = false;
	// current local time
	int32_t now = getTime();
	// if we had an error, do not schedule again for an hour
	//if ( s_flag ) s_nextTime = now + 3600;
	// reset it
	//s_flag = 0;
	// send email alert if too hot
	Host *h = g_hostdb.m_myHost;
	// get max temp
	int32_t max = 0;
	for ( int32_t i = 0 ; i < 4 ; i++ ) {
		int16_t t = h->m_pingInfo.m_hdtemps[i];
		if ( t > max ) max = t;
	}
	// . leave if ok
	// . the seagates tend to have a max CASE TEMP of 69 C
	// . it says the operating temps are 0 to 60 though, so
	//   i am assuming that is ambient?
	// . but this temp is probably the case temp that we are measuring
	if ( max <= g_conf.m_maxHardDriveTemp ) return;
	// leave if we already sent and alert within 5 mins
	static int32_t s_lasttime = 0;
	if ( now - s_lasttime < 5*60 ) return;
	// prepare msg to send
	char msgbuf[1024];
	Host *h0 = g_hostdb.getHost ( 0 );
	snprintf(msgbuf, 1024,
		 "hostid %"INT32" has overheated HD at %"INT32" C "
		 "cluster=%s (%s). Disabling spiders.",
		 h->m_hostId,
		 (int32_t)max,
		 g_conf.m_clusterName,
		 iptoa(h0->m_ip));
	// send it, force it, so even if email alerts off, it sends it
	g_pingServer.sendEmail ( NULL   , // Host *h
				 msgbuf , // char *errmsg = NULL ,
				 true   , // bool sendToAdmin = true ,
				 false  , // bool oom = false ,
				 false  , // bool kernelErrors = false ,
				 false  , // bool parmChanged  = false ,
				 true   );// bool forceIt      = false );

	s_lasttime = now;
}


// set Process::m_diskUsage
float getDiskUsage ( int64_t *diskAvail ) {

	// first get disk usage now
	char cmd[10048];
	char out[1024];
	sprintf(out,"%sdiskusage",g_hostdb.m_dir);
	snprintf(cmd,10000,"df -ka %s | tail -1 | "
		 "awk '{print $4\" \"$5}' > %s",
		 g_hostdb.m_dir,
		 out);
	errno = 0;
	int err = system ( cmd );
	if ( err == 127 ) {
		log("build: /bin/sh does not exist. can not get disk usage.");
		return -1.0; // unknown
	}
	// this will happen if you don't upgrade glibc to 2.2.4-32 or above
	if ( err != 0 ) {
		log("build: Call to system(\"%s\") had error: %s",
		    cmd,mstrerror(errno));
		return -1.0; // unknown
	}

	// read in temperatures from file
	int fd = open ( out , O_RDONLY );
	if ( fd < 0 ) {
		//m_errno = errno;
		log("build: Could not open %s for reading: %s.",
		    out,mstrerror(errno));
		return -1.0; // unknown
	}
	char buf[2000];
	int32_t r = read ( fd , buf , 2000 );
	// did we get an error
	if ( r <= 0 ) {
		//m_errno = errno;
		log("build: Error reading %s: %s.",out,mstrerror(errno));
		close ( fd );
		return -1.0; // unknown
	}
	// clean up shop
	close ( fd );

	float usage;
	int64_t avail;
	sscanf(buf,"%"INT64" %f",&avail,&usage);
	// it is in KB so make it into bytes
	if ( diskAvail ) *diskAvail = avail * 1000LL;
	return usage;
}

// . sets m_errno on error
// . taken from Msg16.cpp
void *hdtempStartWrapper_r ( void *state , ThreadEntry *t ) {

	// run the df -ka cmd
	g_process.m_diskUsage = getDiskUsage( &g_process.m_diskAvail );


	// ignore temps now. ssds don't have it
	return NULL;


	static char *s_parm = "ata";
	// make a system call to /usr/sbin/hddtemp /dev/sda,b,c,d
	//char *cmd =
	//	"/usr/sbin/hddtemp /dev/sda >  /tmp/hdtemp ;"
	//	"/usr/sbin/hddtemp /dev/sdb >> /tmp/hdtemp ;"
	//	"/usr/sbin/hddtemp /dev/sdc >> /tmp/hdtemp ;"
	//	"/usr/sbin/hddtemp /dev/sdd >> /tmp/hdtemp  ";
 retry:
	// linux 2.4 does not seem to like hddtemp
	char *path = g_hostdb.m_dir;
	//char *path = "/usr/sbin/";
	char cmd[10048];
	sprintf ( cmd ,
		  "%ssmartctl -Ad %s /dev/sda | grep Temp | awk '{print $10}' >  /tmp/hdtemp2;"
		  "%ssmartctl -Ad %s /dev/sdb | grep Temp | awk '{print $10}' >> /tmp/hdtemp2;"
		  "%ssmartctl -Ad %s /dev/sdc | grep Temp | awk '{print $10}' >> /tmp/hdtemp2;"
		  "%ssmartctl -Ad %s /dev/sdd | grep Temp | awk '{print $10}' >> /tmp/hdtemp2" ,
		  path,s_parm ,
		  path,s_parm ,
		  path,s_parm ,
		  path,s_parm );
	// the output
	char *out = "/tmp/hdtemp2";
	// timeout of 5 seconds
	//int err = my_system_r ( cmd , 5 );
	int err = system ( cmd );
	//logf(LOG_DEBUG,"proc: system \"%s\"",cmd);
	if ( err == 127 ) {
		//m_errno = EBADENGINEER;
		log("build: /bin/sh does not exist.");
		return NULL;
	}
	// this will happen if you don't upgrade glibc to 2.2.4-32 or above
	if ( err != 0 ) {
		//m_errno = EBADENGINEER;
		log("build: Call to system(\"%s\") had error.",cmd);
		//s_flag = 1;
		// wait 5 minutes
		s_nextTime = getTime() + 300; // 3600;
		return NULL;
	}
	// read in temperatures from file
	int fd = open ( "/tmp/hdtemp2" , O_RDONLY );
	if ( fd < 0 ) {
		//m_errno = errno;
		log("build: Could not open %s for reading: %s.",
		    out,mstrerror(errno));
		return NULL;
	}
	char buf[2000];
	int32_t r = read ( fd , buf , 2000 );
	// maybe try the marvell option?
	if ( r == 0 && s_parm[0]!='m' ) {
		log("gb: smartctl did not work. Trying marvell option.");
		s_parm = "marvell";
		goto retry;
	}
	else if ( r == 0 ) {
		log("gb: Please run apt-get install smartmontools to install "
		    "smartctl and then chown root:root %ssmartctl ; "
		    "chmod +s %ssmartctl. cmd=%s",path,path,cmd);
		// wait 5 mins
		s_nextTime = getTime() + 300;
	}
	// did we get an error
	if ( r < 0 ) {
		//m_errno = errno;
		log("build: Error reading %s: %s.",out,mstrerror(errno));
		close ( fd );
		return NULL;
	}
	// clean up shop
	close ( fd );
	// . typical file from hddtemp:
	//   /dev/sda: ST3400620AS: 39 C
	//   /dev/sdb: ST3400620AS: 39 C
	//   /dev/sdc: ST3400620AS: 39 C
	//   /dev/sdd: ST3400620AS: 39 C
	// . typical file from smartctl
	//   39\n37\n37\n37\n
	char *p = buf;
	// end
	char *pend = buf + gbstrlen(buf);
	// store the temps here
	int16_t *temp = g_hostdb.m_myHost->m_pingInfo.m_hdtemps;
	// there are 4
	int16_t *tempEnd = temp + 4;

	//
	// parse output from smartctl
	//
	while ( temp < tempEnd ) {
		// get temp
		*temp++ = atoi(p);
		// skip til after \n
		while ( p < pend && *p != '\n' ) p++;
		// skip \n
		p++;
		// done? strange.
		if ( p >= pend ) return NULL;
	}
	// done
	return NULL;

	//
	// parse output from hddtemp
	//

	// get all 4
	while ( temp < tempEnd ) {
		// skip till after 2nd colon
		while ( p < pend && *p!=':' ) p++;
		// skip over colon
		p++;
		// skip until we hit 2nd colon
		while ( p < pend && *p!=':' ) p++;
		// skip colon and space
		p += 2;
		// get temp
		*temp++ = atoi(p);
	}
	return NULL;
}

void Process::callHeartbeat () {
	heartbeatWrapper ( 0 , NULL );
}

void heartbeatWrapper ( int fd , void *state ) {
	static int64_t s_last = 0LL;
	static int64_t s_lastNumAlarms = 0LL;
	int64_t now = gettimeofdayInMilliseconds();
	if ( s_last == 0LL ) {
		s_last = now;
		s_lastNumAlarms = g_numAlarms;
		return;
	}
	// . log when we've gone 100+ ms over our scheduled beat
	// . this is a sign things are jammed up
	int64_t elapsed = now - s_last;
	if ( elapsed > 200 )
		// now we print the # of elapsed alarms. that way we will
		// know if the alarms were going off or not...
		// this happens if the rt sig queue is overflowed.
		// check the "cat /proc/<pid>/status | grep SigQ" output
		// to see if its overflowed. hopefully i will fix this by
		// queue the signals myself in Loop.cpp.
		log("db: missed heartbeat by %"INT64" ms. Num elapsed alarms = "
		    "%"INT32"", elapsed-100,(int32_t)(g_numAlarms - s_lastNumAlarms));
	s_last = now;
	s_lastNumAlarms = g_numAlarms;

	// save this time so the sig alarm handler can see how long
	// it has been since we've been called, so after 10000 ms it
	// can dump core and we can see what is holding things up
	g_process.m_lastHeartbeatApprox = g_nowApprox;
}


/*
void diskHeartbeatWrapper ( int fd , void *state ) {

	// skip this now that we use SSDs
	return;

	bool stuck = false;

	// do we have reads waiting?
	bool isWaiting =
		( g_threads.m_threadQueues[DISK_THREAD].m_hiReturned <
		  g_threads.m_threadQueues[DISK_THREAD].m_hiLaunched ) ;

	// . must have been more than 1.5 secs since last read finished
	// . if the disk read queue is empty when we add a new read thread
	//   request in BigFile.cpp, we set g_diskRequestAdded to g_now
	if ( isWaiting &&
	     g_now - g_lastDiskReadCompleted >= 1500 &&
	     g_now - g_lastDiskReadStarted   >= 1500  )
		stuck = true;

	// return if not stuck
	if ( ! stuck ) {
		// if we just got unstuck, log that
		if ( g_diskIsStuck )
			log("gb: disk is now unstuck.");
		g_diskIsStuck = false;
		return;
	}

	// if first time, log that
	if ( ! g_diskIsStuck )
		log("gb: disk appears to be stuck.");

	// flag it so BigFile.cpp and File.cpp just return EDISKSTUCK and so
	// we do not kill all disk read threads again
	g_diskIsStuck = true;

	// now call the callback of all disk read threads that have niceness
	// 0 but set g_errno to EDISKSTUCK. when the actual read finally does
	// complete it should just basically stop...
	//
	// take this out now that we have solid states!!!!!!!!!!!!!
	//
	//g_threads.bailOnReads();
}
*/

// called by PingServer.cpp only as of now
int64_t Process::getTotalDocsIndexed() {
	if ( m_totalDocsIndexed == -1LL ) {
		Rdb *rdb = g_clusterdb.getRdb();
		// useCache = true
		m_totalDocsIndexed = rdb->getNumTotalRecs(true);
	}
	return m_totalDocsIndexed;
}

void processSleepWrapper ( int fd , void *state ) {

        if ( g_process.m_mode == EXIT_MODE ) {g_process.shutdown2(); return; }
        if ( g_process.m_mode == SAVE_MODE ) {g_process.save2    (); return; }
        if ( g_process.m_mode == LOCK_MODE ) {g_process.save2    (); return; }
	if ( g_process.m_mode != NO_MODE   )                         return;

	// update global rec count
        static int32_t s_rcount = 0;
	// every 2 seconds
	if ( ++s_rcount >= 4 ) {
		s_rcount = 0;
		// PingServer.cpp uses this
		Rdb *rdb = g_clusterdb.getRdb();
		g_process.m_totalDocsIndexed = rdb->getNumTotalRecs();
	}

	// do not do autosave if no power
	if ( ! g_process.m_powerIsOn ) return;

	// . i guess try to autoscale the cluster in cast hosts.conf changed
	// . if all pings came in and all hosts have the same hosts.conf
	//   and if we detected any shard imbalance at startup we have to
	//   scan all rdbs for records that don't belong to us and send them
	//   where they should go
	// . returns right away in most cases
	g_rebalance.rebalanceLoop();

	// in PageInject.cpp startup up any imports that might have been
	// going on before we shutdown last time.
	resumeImports();

	// if doing the final part of a repair.cpp loop where we convert
	// titledb2 files to titledb etc. then do not save!
	if ( g_repairMode == 7 ) return;

	// autosave? override this if power is off, we need to save the data!
	//if (g_conf.m_autoSaveFrequency <= 0 && g_process.m_powerIsOn) return;
	if ( g_conf.m_autoSaveFrequency <= 0 ) return;
	// never if in read only mode
	if ( g_conf.m_readOnlyMode ) return;

	// skip autosave while sync in progress!
	if ( g_process.m_suspendAutoSave ) return;

	// need to have a clock unified with host #0. i guess proxy
	// does not sync with host #0 though
	//if ( ! isClockInSync() && ! g_hostdb.m_myHost->m_isProxy ) return;

	// get time the day started
	int32_t now;
	if ( g_hostdb.m_myHost->m_isProxy ) now = getTimeLocal();
	else {
		// need to be in sync with host #0's clock
		if ( ! isClockInSync() ) return;
		// that way autosaves all happen at about the same time
		now = getTimeGlobal();
	}

	// set this for the first time
	if ( g_process.m_lastSaveTime == 0 )
		g_process.m_lastSaveTime = now;

	//
	// we now try to align our autosaves with start of the day so that
	// all hosts autosave at the exact same time!! this should keep
	// performance somewhat consistent.
	//

	// get frequency in minutes
	int32_t freq = (int32_t)g_conf.m_autoSaveFrequency ;
	// convert into seconds
	freq *= 60;
	// how many seconds into the day has it been?
	int32_t offset   = now % (24*3600);
	int32_t dayStart = now - offset;
	// how many times should we have autosaved so far for this day?
	int32_t autosaveCount = offset / freq;
	// convert to when it should have been last autosaved
	int32_t nextLastSaveTime = (autosaveCount * freq) + dayStart;

	// if we already saved it for that time, bail
	if ( g_process.m_lastSaveTime >= nextLastSaveTime ) return;

	//int64_t now = gettimeofdayInMillisecondsLocal();
	// . get a snapshot of the load average...
	// . MDW: disable for now. not really used...
	//update_load_average(now);
	// convert from minutes in milliseconds
	//int64_t delta = (int64_t)g_conf.m_autoSaveFrequency * 60000LL;
	// if power is off make this every 30 seconds temporarily!
	//if ( ! g_process.m_powerIsOn ) delta = 30000;
	// return if we have not waited int32_t enough
	//if ( now - g_process.m_lastSaveTime < delta ) return;
	// update
	g_process.m_lastSaveTime = nextLastSaveTime;//now;
	// save everything
	logf(LOG_INFO,"db: Autosaving.");
	g_process.save();
}

bool Process::save ( ) {
	// never if in read only mode
	if ( g_conf.m_readOnlyMode ) return true;
	// bail if doing something already
	if ( m_mode != 0 ) return true;
	// log it
	logf(LOG_INFO,"db: Entering lock mode for saving.");
	m_mode   = LOCK_MODE; // SAVE_MODE;
	m_urgent = false;
	m_calledSave = false;
	return save2();
}

bool Process::shutdown ( bool urgent ,
			 void  *state,
			 void (*callback) (void *state )) {
	// bail if doing something already
	if ( m_mode != 0 ) {
		// if already in exit mode, just return
		if ( m_mode == EXIT_MODE )
			return true;
		// otherwise, log it!
		log("process: shutdown called, but mode is %"INT32"",
		    (int32_t)m_mode);
		return true;
	}

	m_mode   = EXIT_MODE;
	m_urgent = urgent;

	m_calledSave = false;

	// check memory buffers for overruns/underrunds to see if that
	// caused this core
	if ( urgent ) g_mem.printBreeches(false);

	if(!shutdown2()) {
		m_callbackState = state;
		m_callback = callback;
		return false;
	}
	return true;
}

// return false if blocked/waiting
bool Process::save2 ( ) {

	// MDW: why was this here? i commented it out. we need to do
	//      quickpolls when autosaving for sure.
	//g_loop.disableTimer();

	// only the main process can call this
	if ( g_threads.amThread() ) return true;

	// . wait for any dump to complete
	// . when merging titldb, it sets Rdb::m_dump.m_isDumping to true
	//   because it is dumping the results of the merge to a file.
	//   occasionally it will initiate a dump of tfndb which will not be
	//   possible because Rdb/RdbDump checks g_process.m_mode == SAVE_MODE,
	//   and do not allow dumps to begin if that is true! so we end up in
	//   deadlock! the save can not complete
	if ( isRdbDumping() ) return false;

	// ok, now nobody is dumping, etc. make it so no dumps can start.
	// Rdb.cpp/RdbDump.cpp check for this and will not dump if it is
	// set to SAVE_MODE
	m_mode = SAVE_MODE;

	logf(LOG_INFO,"gb: Saving data to disk. Disabling writes.");

	// . disable adds/deletes on all rdb trees
	// . Msg1 requests will get ETRYAGAIN error replies
	// . this is instantaneous because all tree mods happen in this
	//   main process, not in a thread
	disableTreeWrites( false );

	bool useThreads = true;

	// . tell all rdbs to save trees
	// . will return true if no rdb tree needs a save
	if ( ! saveRdbTrees ( useThreads , false ) ) return false;

	// . save all rdb maps if they need it
	// . will return true if no rdb map needs a save
	// . save these last since maps can be auto-regenerated at startup
	if ( ! saveRdbMaps ( useThreads ) ) return false;

	// . save the conf files and caches. these block the cpu.
	// . save these first since more important than the stuff below
	// . no, to avoid saving multiple times, put this last since the
	//   stuff above may block and we have to re-call this function
	if ( ! saveBlockingFiles1() ) return false;

	// save addsInProgress.dat etc. if power goes off. this should be the
	// one time we are called from power going off... since we do not
	// do autosave when the power is off. this just blocks and never
	// returns false, so call it with checking the return value.
	if ( ! g_process.m_powerIsOn ) saveBlockingFiles2() ;
	// for Test.cpp parser test we want to save the waitingtree.dat
	else if ( g_threads.m_disabled ) saveBlockingFiles2() ;

	// until all caches have saved, disable them
	g_cacheWritesEnabled = false;

	// . save caches
	// . returns true if NO cache needs to be saved
	//if ( ! saveRdbCaches ( useThreads ) ) return false;

	// bring them back
	g_cacheWritesEnabled = true;

	// reenable tree writes since saves were completed
	enableTreeWrites( false );

	log(LOG_INFO,"gb: Saved data to disk. Re-enabling Writes.");

	// update
	//m_lastSaveTime = gettimeofdayInMillisecondsLocal();

	// unlock
	m_mode = NO_MODE;

	return true;
}


// . return false if blocked/waiting
// . this is the SAVE BEFORE EXITING
bool Process::shutdown2 ( ) {
	g_loop.disableTimer();
	// only the main process can call this
	if ( g_threads.amThread() ) return true;

	if ( m_urgent )
		log(LOG_INFO,"gb: Shutting down urgently. "
		    "Timed try #%"INT32".",
		    m_try++);
	else
		log(LOG_INFO,"gb: Shutting down. Timed try #%"INT32".",
		    m_try++);


	// switch to urgent if having problems
	if ( m_try >= 10 )
		m_urgent = true;

	// turn off statsdb so it does not try to add records for these writes
	g_statsdb.m_disabled = true;

	if ( g_threads.areThreadsEnabled () ) {
		log("gb: disabling threads");
		// now disable threads so we don't exit while threads are
		// outstanding
		g_threads.disableThreads();
	}

	// . suspend all merges
	g_merge.suspendMerge () ;
	g_merge2.suspendMerge() ;

	// assume we will use threads
	// no, not now that we disabled them
	bool useThreads = false;//true;

	// if urgent do not allow any further threads to be spawned unless
	// they were already queued
	if ( m_urgent ) {
		// do not use thread spawning
		useThreads = false;
		// turn off all threads just in case
		if ( ! useThreads ) g_threads.disableThreads();
	}

	static bool s_printed = false;

	// wait for all threads to return
	int32_t n = g_threads.getNumThreadsOutOrQueued() ;
	if ( n != 0 && ! m_urgent ) {
		log(LOG_INFO,"gb: Has %"INT32" threads out. Waiting for "
		    "them to finish.",n);
		return false;
	}
	else if ( ! s_printed && ! m_urgent ) {
		s_printed = true;
		log(LOG_INFO,"gb: No threads out.");
	}


	// disable all spidering
	// we can exit while spiders are in the queue because
	// if they are in the middle of being added they will be
	// saved by spider restore
	// wait for all spiders to clear

	// don't shut the crawler down on a core
	//g_conf.m_spideringEnabled = false;

	//g_conf.m_injectionEnabled = false;

	// make sure they are in a saveable state. we need to make sure
	// they have dumped out the latest merged list and updated the
	// appropriate RdbMap so we can save it below
	bool wait = false;
	if ( g_merge.m_isMerging  && ! g_merge.m_isReadyToSave  ) wait = true;
	if ( g_merge2.m_isMerging && ! g_merge2.m_isReadyToSave ) wait = true;
	// wait for any dump to complete
	if ( isRdbDumping() ) wait = true;
	// . wait for the merge or dump to complete
	// . but NOT if urgent...
	if ( wait && ! m_urgent ) return false;

	// . disable adds/deletes on all rdb trees
	// . Msg1 requests will get ECLOSING error msgs
	// . this is instantaneous because all tree mods happen in this
	//   main process, not in a thread
	disableTreeWrites( true );

	// . tell all rdbs to save trees
	// . will return true if no rdb tree needs a save
	if ( ! saveRdbTrees ( useThreads , true ) )
		if ( ! m_urgent ) return false;

	// save this right after the trees in case we core
	// in saveRdbMaps() again due to the core we are
	// handling now corrupting memory
	if ( m_repairNeedsSave ) {
		m_repairNeedsSave = false;
		g_repair.save();
	}

	// . save all rdb maps if they need it
	// . will return true if no rdb map needs a save
	if ( ! saveRdbMaps ( useThreads ) )
		if ( ! m_urgent ) return false;

	int64_t now = gettimeofdayInMillisecondsLocal();
	if ( m_firstShutdownTime == 0 ) m_firstShutdownTime = now;

	// these udp servers will not read in new requests or allow
	// new requests to be sent. they will timeout any outstanding
	// UdpSlots, and when empty they will return true here. they will
	// close their m_sock and set it to -1 which should force their
	// thread to exit.
	// if not urgent, they will wait for a while for the
	// sockets/slots to clear up.
	// however, if 5 seconds or more have elapsed then force it
	bool udpUrgent = m_urgent;
	if ( now - m_firstShutdownTime >= 3000 ) udpUrgent = true;

	if ( ! g_dns.m_udpServer.shutdown ( udpUrgent ) )
		if ( ! udpUrgent ) return false;

	// . send notes to all the hosts in the network telling them we're
	//   shutting down
	// . this returns false if it blocks
	// . we don't care if it blocks or not
	// . don't bother asking the hosts to send an email alert for us
	//   since we're going down gracefully by letting everyone know
	// . don't send this unless we are very sure we can shutdown NOW
	// . i.e. no blocking after this call!
	if ( ! m_sentShutdownNote && ! m_urgent ) {
		log(LOG_INFO,"gb: Broadcasting shutdown notice.");
		m_sentShutdownNote = true;
		g_pingServer.broadcastShutdownNotes ( false , //sendEmailAlert?
						      NULL  ,
						      NULL  );
	}
	//broadcastShutdownNotes uses g_udpServer so we do this last.
	if ( ! g_udpServer.shutdown ( udpUrgent ) )
		if ( ! udpUrgent ) return false;


	g_profiler.stopRealTimeProfiler();
	g_profiler.cleanup();

	// save the conf files and caches. these block the cpu.
	if ( m_blockersNeedSave ) {
		m_blockersNeedSave = false;
		if (!g_conf.m_readOnlyMode)
			logf(LOG_INFO,"gb: Saving miscellaneous data files.");
		saveBlockingFiles1() ;
		saveBlockingFiles2() ;
	}

	// . save all rdb caches if they need it
	// . do this AFTER udp server is shut down so cache should not
	//   be accessed any more
	// . will return true if no rdb cache needs a save
	//if ( ! saveRdbCaches ( useThreads ) ) return false;

	// always diable threads at this point so g_threads.call() will
	// always return false and we do not queue any new threads for
	// spawning
	g_threads.disableThreads();

	// urgent means we need to dump core, SEGV or something
	if ( m_urgent ) {
		// log it
		log("gb: Dumping core after saving.");
		// at least destroy the page caches that have shared memory
		// because they seem to not clean it up
		resetPageCaches();

		// let's ensure our core file can dump
		struct rlimit lim;
		lim.rlim_cur = lim.rlim_max = RLIM_INFINITY;
		if ( setrlimit(RLIMIT_CORE,&lim) )
			log("gb: setrlimit: %s.", mstrerror(errno) );

		// . force an abnormal termination which will cause a core dump
		// . do not dump core on SIGHUP signals any more though
		abort();
		// keep compiler happy
		return true;
	}


	// cleanup threads, this also launches them too
	g_threads.timedCleanUp(0x7fffffff,MAX_NICENESS);

	// wait for all threads to complete...
	//int32_t n = g_threads.getNumThreadsOutOrQueued() ;
	//if ( n > 0 )
	//	return log(LOG_INFO,
	//		   "gb: Waiting for %"INT32" threads to complete.",n);

	//log(LOG_INFO,"gb: Has %"INT32" threads out.",n);


	//ok, resetAll will close httpServer's socket so now is the time to
	//call the callback.
	if(m_callbackState) (*m_callback)(m_callbackState);

	// tell Mutlicast::reset() not to destroy all the slots! that cores!
	m_exiting = true;

	// let everyone free their mem
	resetAll();

	// show what mem was not freed
	g_mem.printMem();

	// kill any outstanding hd temp thread?
	if ( g_process.m_threadOut )
		log(LOG_INFO,"gb: still has hdtemp thread");

	// exit abruptly
	exit(0);

	// keep compiler happy
	return true;
}

void Process::disableTreeWrites ( bool shuttingDown ) {
	// loop over all Rdbs
	for ( int32_t i = 0 ; i < m_numRdbs ; i++ ) {
		Rdb *rdb = m_rdbs[i];
		// if we save doledb while spidering it screws us up
		// because Spider.cpp can not directly write into the
		// rdb tree and it expects that to always be available!
		if ( ! shuttingDown && rdb->m_rdbId == RDB_DOLEDB )
			continue;
		rdb->disableWrites();
	}
	// don't save spider related trees if not shutting down
	if ( ! shuttingDown ) return;
	// disable all spider trees and tables
	for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
		SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull(i);
		if ( ! sc ) continue;
		sc->m_waitingTree .disableWrites();
		sc->m_waitingTable.disableWrites();
		sc->m_doleIpTable .disableWrites();
	}

}

void Process::enableTreeWrites ( bool shuttingDown ) {
	// loop over all Rdbs
	for ( int32_t i = 0 ; i < m_numRdbs ; i++ ) {
		Rdb *rdb = m_rdbs[i];
		rdb->enableWrites();
	}
	// don't save spider related trees if not shutting down
	if ( ! shuttingDown ) return;
	// enable all waiting trees
	for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
		SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull(i);
		if ( ! sc ) continue;
		sc->m_waitingTree .enableWrites();
		sc->m_waitingTable.enableWrites();
		sc->m_doleIpTable .enableWrites();
	}
}

// . returns false if blocked, true otherwise
// . calls callback when done saving
bool Process::isRdbDumping ( ) {
	// loop over all Rdbs and save them
	for ( int32_t i = 0 ; i < m_numRdbs ; i++ ) {
		Rdb *rdb = m_rdbs[i];
		if ( rdb->m_dump.m_isDumping ) return true;
	}
	return false;
}

bool Process::isRdbMerging ( ) {
	// loop over all Rdbs and save them
	for ( int32_t i = 0 ; i < m_numRdbs ; i++ ) {
		Rdb *rdb = m_rdbs[i];
		if ( rdb->isMerging() ) return true;
	}
	return false;
}

// . returns false if blocked, true otherwise
// . calls callback when done saving
bool Process::saveRdbTrees ( bool useThread , bool shuttingDown ) {
	// never if in read only mode
	if ( g_conf.m_readOnlyMode ) return true;
	// no thread if shutting down
	if ( shuttingDown ) useThread = false;
	// debug note
	log("gb: shuttingdown=%i",(int)shuttingDown);
	// turn off statsdb until everyone is done
	//g_statsdb.m_disabled = true;
	// loop over all Rdbs and save them
	for ( int32_t i = 0 ; i < m_numRdbs ; i++ ) {
		if ( m_calledSave ) {
			log("gb: already saved trees, skipping.");
			break;
		}
		Rdb *rdb = m_rdbs[i];
		// if we save doledb while spidering it screws us up
		// because Spider.cpp can not directly write into the
		// rdb tree and it expects that to always be available!
		if ( ! shuttingDown && rdb->m_rdbId == RDB_DOLEDB )
			continue;
		// note it
		if ( ! rdb->m_dbname || ! rdb->m_dbname[0] )
			log("gb: calling save tree for rdbid %i",
			    (int)rdb->m_rdbId);
		else
			log("gb: calling save tree for %s",
			    rdb->m_dbname);

		rdb->saveTree ( useThread );
	}

	// . save waitingtrees for each collection, blocks.
	// . can we make this non-blocking?
	// . true = "usethreads"
	// . all writes have been disabled, so should be cleanly saved
	// . if this did not block that means it does not need any saving
	// . this just launched all the write threads for the trees/tables
	//   that need to be saved. it sets m_isSaving once they are all
	//   launched.
	// . and sets m_isSaving=false on SpiderCache::doneSaving when they
	//   are all done.
	if ( shuttingDown ) g_spiderCache.save ( useThread );

	// do not re-save the stuff we just did this round
	m_calledSave = true;
	// quickly re-enable if statsdb tree does not need save any more
	//if ( ! g_statsdb.m_rdb.needsSave() ) g_statsdb.m_disabled = false;
	// check if any need to finish saving
	for ( int32_t i = 0 ; i < m_numRdbs ; i++ ) {
		Rdb *rdb = m_rdbs[i];
		// do not return until all saved if we are shutting down
		if ( shuttingDown ) break;
		//if ( rdb->needsSave ( ) ) return false;
		// we disable the tree while saving so we can't really add recs
		// to one rdb tree while saving, but for crawlbot
		// we might have added or deleted collections.
		if ( rdb->isSavingTree ( ) ) return false;
	}

	// only save spiderdb based trees if shutting down so we can
	// still write to them without writes being disabled
	if ( ! shuttingDown ) return true;

	// . check spider cache files (doleiptable waitingtree etc.)
	// . this should return true if it still has some files that haven't
	//   saved to disk yet... so if it returns true we return false
	//   indicating that we are still waiting!
	if ( ! shuttingDown && g_spiderCache.needsSave () ) return false;

	// reset for next call
	m_calledSave = false;
	// everyone is done saving
	return true;
}

// . returns false if blocked, true otherwise
// . calls callback when done saving
bool Process::saveRdbMaps ( bool useThread ) {
	// never if in read only mode
	if ( g_conf.m_readOnlyMode ) return true;
	useThread = false;
	// loop over all Rdbs and save them
	for ( int32_t i = 0 ; i < m_numRdbs ; i++ ) {
		Rdb *rdb = m_rdbs[i];
		rdb->saveMaps ( useThread );
	}
	// everyone is done saving
	return true;
}

// . returns false if blocked, true otherwise
// . calls callback when done saving
/*
bool Process::saveRdbCaches ( bool useThread ) {
	// never if in read only mode
	if ( g_conf.m_readOnlyMode ) return true;
	//useThread = false;
	// loop over all Rdbs and save them
	for ( int32_t i = 0 ; i < m_numRdbs ; i++ ) {
		Rdb *rdb = m_rdbs[i];
		// . returns true if cache does not need save
		// . returns false if blocked and is saving
		// . returns true if useThreads is false
		// . we return false if it blocks
		if ( ! rdb->saveCache ( useThread ) ) return false;
	}
	// everyone is done saving
	return true;
}
*/

bool Process::saveBlockingFiles1 ( ) {
	// never if in read only mode
	if ( g_conf.m_readOnlyMode ) return true;

	// save user accounting files. 3 of them.
	if ( g_hostdb.m_myHost && g_hostdb.m_myHost->m_isProxy )
		g_proxy.saveUserBufs();

	// save the Conf file now
	g_conf.save();
	// save the conf files
        g_collectiondb.save();
	// . save repair state
	// . this is repeated above too
	// . keep it here for auto-save
	g_repair.save();

	// save our place during a rebalance
	g_rebalance.saveRebalanceFile();

	// save the login table
	g_users.save();

	// save stats on spider proxies if any
	saveSpiderProxyStats();

	// save the query log buffer if it was modified by the
	// runSeoQueryLoop() in seo.cpp which updates its
	// QueryLogEntry::m_minTop50Score member and corresponding timestamp
	if ( g_qbufNeedSave ) {
		char fname[1024];
		sprintf(fname,"querylog.host%"INT32".dat",g_hostdb.m_hostId);
		g_qbuf.saveToFile(g_hostdb.m_dir,fname);
		log("process: saving changes to %s",fname);
		g_qbufNeedSave = false;
	}

	// . save the add state from Msg4.cpp
	// . these are records in the middle of being added to rdbs across
	//   the cluster
	// . saves to "addsinprogress.saving" and moves to .saved
	// . eventually this may replace "spiderrestore.dat"
	if ( g_repair.isRepairActive() ) saveAddsInProgress ( "repair-" );
	else                             saveAddsInProgress ( NULL      );

	// . save the syncdb quicktree and insync.dat file, very important!!
	// . must do this LAST so we truly no if in sync or not!!
	//g_syncdb.save();

	// in fctypes.cpp. save the clock offset from host #0's clock so
	// our startup is fast again
	saveTimeAdjustment();

	return true;
}

#include "PageTurk.h"

bool Process::saveBlockingFiles2 ( ) {
	// never if in read only mode
	if ( g_conf.m_readOnlyMode ) return true;

	// the spider dup request cache
	//g_spiderCache.m_dupCache.save( false ); // use threads?

	// save waitingtrees for each collection, blocks.
	//if ( ! g_spiderCache.save() ) return false;

	// save what templates each turk has turked
	//g_templateTable.save( g_hostdb.m_dir , "turkedtemplates.dat" );

	// the robots.txt cache
        Msg13::getHttpCacheRobots()->save( false ); // use threads?

        // save our caches
        for ( int32_t i = 0; i < MAX_GENERIC_CACHES; i++ ) {
                if ( g_genericCache[i].useDisk() )
			// do not use threads
			g_genericCache[i].save( false );
        }
	// save dead wait cache
	//if ( g_deadWaitCache.useDisk     () )
	//	g_deadWaitCache    .save ();
	//if ( g_forcedCache.useDisk       () )
	//	g_forcedCache      .save ( false ); // use threads?
	//if ( g_alreadyAddedCache.useDisk () )
	//	g_alreadyAddedCache.save ( false ); // use threads?
        // save dns caches
        RdbCache *c ;
	c = g_dns.getCache();
        if ( c->useDisk() ) c->save( false ); // use threads?
	// save quota cache
	//c = &g_qtable;
        //if ( c->useDisk() ) c->save( false ); // use threads?
	// save current spidering process, "spiderrestore.dat"
	//g_spiderLoop.saveCurrentSpidering();
	// save autoban stuff
	g_autoBan.save();

	// if doing titlerec imports in PageInject.cpp, save cursors,
	// i.e. file offsets
	saveImportStates();

        // this one too
	//      g_classifier.save();
        //g_siteBonus.save();
	// save state for top docs
	//g_pageTopDocs.saveStateToDisk();

	// save the turk url cache, urls and user states
	//g_pageTurk.saveCache();

	return true;
}

void Process::resetAll ( ) {
	g_log             .reset();
	g_hostdb          .reset();
	g_hostdb2         .reset();
	g_spiderLoop      .reset();

	for ( int32_t i = 0 ; i < m_numRdbs ; i++ ) {
		Rdb *rdb = m_rdbs[i];
		rdb->reset();
	}

	g_catdb           .reset();
	g_collectiondb    .reset();
	g_categories1     .reset();
	g_categories2     .reset();
	//g_robotdb       .reset();
	g_dns             .reset();
	g_udpServer       .reset();
	//g_dnsServer       .reset();
	//g_udpServer2    .reset();
	g_httpServer      .reset();
	g_loop            .reset();
	g_speller         .reset();
	//g_thesaurus       .reset();
	g_spiderCache     .reset();
	g_threads         .reset();
	g_ucUpperMap      .reset();
	g_ucLowerMap      .reset();
	g_ucProps         .reset();
	g_ucCombiningClass.reset();
	g_ucScripts       .reset();
	g_profiler        .reset();
	g_langList        .reset();
	g_autoBan         .reset();
	//g_qtable          .reset();
	//g_pageTopDocs     .destruct();
	//g_pageNetTest     .destructor();

	for ( int32_t i = 0; i < MAX_GENERIC_CACHES; i++ )
		g_genericCache[i].reset();

	// reset disk page caches
	resetPageCaches();

	// termfreq cache in Posdb.cpp
	g_termFreqCache.reset();
	// in Msg0.cpp
	//g_termListCache.reset();
	// in msg5.cpp
	//g_waitingTable.reset();

	g_wiktionary.reset();

	g_countryCode.reset();

	s_clusterdbQuickCache.reset();
	s_hammerCache.reset();
	s_table32.reset();

	resetDecompTables();
	//resetCompositionTable();
	//resetMsg6();
	resetPageAddUrl();
	resetHttpMime();
	reset_iana_charset();
	//resetAdultBit();
	resetDomains();
	resetEntities();
	resetQuery();
	resetStopWords();
	resetAbbrTable();
	resetUnicode();
	//resetMsg20Cache();
	//resetMsg12();
	//resetLoadAvg();

	// reset other caches
	//g_robotdb.m_rdbCache.reset();
	g_dns.reset();
	//g_alreadyAddedCache.reset();
	//g_forcedCache.reset();
	// Msg20.cpp's parser cache
	//resetMsg20Cache();
	g_spiderCache.reset();
	g_spiderLoop.reset();
	g_wiki.reset();
	// query log table
	//g_qt.reset();
	// query log buffer
	g_qbuf.reset();
	g_profiler.reset();
	g_testResultsTree.reset();
	g_users.m_ht.reset();
	g_users.m_loginTable.reset();
	resetAddressTables();
	resetMsg13Caches();
	resetStopWordTables();
	//resetSynonymTables();
	resetDateTables();
	resetTestIpTable();
}

void Process::resetPageCaches ( ) {
	log("gb: Resetting page caches.");
	g_posdb           .getDiskPageCache()->reset();
	//g_datedb          .getDiskPageCache()->reset();
	g_linkdb          .getDiskPageCache()->reset();
	g_titledb         .getDiskPageCache()->reset();
	g_sectiondb       .getDiskPageCache()->reset();
	g_tagdb           .getDiskPageCache()->reset();
	g_spiderdb        .getDiskPageCache()->reset();
	//g_tfndb           .getDiskPageCache()->reset();
	//g_checksumdb      .getDiskPageCache()->reset();
	g_clusterdb       .getDiskPageCache()->reset();
	g_catdb           .getDiskPageCache()->reset();
	//g_placedb         .getDiskPageCache()->reset();
	g_doledb          .getDiskPageCache()->reset();
	//g_statsdb	  .getDiskPageCache()->reset();
}

// ============================================================================
// load average shedding via /proc/loadavg and an async BigFile
typedef struct {
	char		buf[20];		// read buffer
	double		load_average;		// last parsed load avg.
	int64_t		time_req;		// time of last parse
	int64_t		time_parse;
	bool		waiting;		// waiting on async result?
	bool		closing;		// shutting down...
	BigFile		bigfile;
	FileState	filestate;
} loadavg_state;

static loadavg_state		s_st_lavg;
/*
static void loadavg_callback(loadavg_state* state) {
	if (state == NULL)
		return;
	if (s_st_lavg.closing)
		return;
	// MDW: stop doing it for now, it is not accurate
	state->load_average = 0.00;
	return;
	if (s_st_lavg.filestate.m_errno != 0) {
		// do not thrash!
		// leave time_req alone so next open will occur in 5 seconds...
		// do not deadlock!
		// set load_average=0 until file can be successfully re-read.
		s_st_lavg.load_average = 0.0;
		s_st_lavg.bigfile.close();
		s_st_lavg.bigfile.setNonBlocking();
		s_st_lavg.bigfile.open(O_RDONLY);
		log(LOG_INFO, "build: errno %"INT32" reading /proc/loadavg",
			s_st_lavg.filestate.m_errno);
		s_st_lavg.filestate.m_errno = 0;
		return;
	}
	state->time_parse = gettimeofdayInMilliseconds();
	state->waiting = false;
	state->load_average = atof(state->buf);
	log(LOG_DEBUG, "build: loadavg currently: %.2f latency %lld ms",
		state->load_average, state->time_parse - state->time_req);
}
*/

static loadavg_state*		s_state_ptr	=	NULL;
/*
static void update_load_average(int64_t now) {
	// initialize loadavg collection...
	if (s_state_ptr == NULL) {
		s_st_lavg.load_average = 0.0;
		s_st_lavg.time_req = 0;
		s_st_lavg.time_parse = 0;
		s_st_lavg.waiting = false;
		s_st_lavg.closing = false;
		s_st_lavg.bigfile.set("/proc", "loadavg");
		s_st_lavg.bigfile.setNonBlocking();
		s_st_lavg.bigfile.open(O_RDONLY);
		s_state_ptr = &s_st_lavg;
	}
	if (s_st_lavg.closing)
		return;
	if (s_st_lavg.waiting)
		return;
	// the 2.4 kernel updates /proc/loadavg on a 5-second interval
	if (s_st_lavg.waiting == false && now - s_st_lavg.time_req < (5 * 1000))
		return;

	s_st_lavg.time_req = now;
	s_st_lavg.waiting = true;
	s_st_lavg.filestate.m_errno = 0;
	if (!s_st_lavg.bigfile.read(	s_st_lavg.buf,
					sizeof(s_st_lavg.buf),
					0,
					&s_st_lavg.filestate))
		return;
	// if we did not block (as is normal for _this_ file), then
	// call callback directly and update state struct.
	loadavg_callback(s_state_ptr);
	return;
}
*/

double Process::getLoadAvg() {
	return s_st_lavg.load_average;
}
void Process::resetLoadAvg() {
	if (s_state_ptr == NULL)
		return;
	s_st_lavg.closing = true;
	s_state_ptr = NULL;
	s_st_lavg.bigfile.close();
}
//
// ============================================================================


/*
/////////////////////////////////////
//
// event nightly stats process
//
//////////////////////////////////////

//
// copied from main.cpp dumpEvents() function
//

static int32_t s_lastRunTime = 0;

void eventStatSleepWrapper ( void *state , int fd ) {

	// why even register it if not host #0?
	if ( g_hostdb.m_myHostId != 0 ) { char *xx=NULL;*xx=0; }
	// local time. we are on host #0
	int32_t now = getTimeLocal();
	// wait at least one hour
	if ( now - s_lastRunTime  < 3600 ) return;
	// wait until midnight us time
	int32_t tod = now % 86400;
	// or int16_tly after
	if ( tod > 1500 ) return;
	// ok, execute it
	s_lastRunTime = now;
	// send to everyhost
	for ( int32_t i = 0 ;i < g_hostdb.m_numHosts ; i++ ) {
		Host *h = g_hostdb.getHost(i);
		// reset his stats
		h->m_eventStats.clear();
		// skip if dead
		if ( h->isDead() ) continue;
		g_udpServer.sendRequest ( 0xdd ,
					  NULL ,
					  gotStatReply ,
					  &h->m_eventStats );//store reply here
		s_numRequests++;
	}
	// wait for replies!
	s_numReplies = 0;
}

void gotStatReply ( UdpSlot *slot ) {

	s_numReplies++;
	// wait for all replies to come in
	if ( s_numReplies < s_numRequests ) return;

	// ok, tally up
	EventStats total;
	total.clear();

	for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
		Host *h = g_hostdb.getHost(i);
		EventStats *es = &h->m_eventStats;
		total.m_active += es->m_active;
	}

	SafeBuf sb;

	// email cruft
	sb.safePrintf("EHLO gigablast.com\r\n"
		      //"MAIL from:<eventguru@eventguru.com>\r\n"
		      "MAIL From:<mwells2@gigablast.com>\r\n"
		      "RCPT To:<%s>\r\n"
		      "DATA\r\n"
		      "From: mwells <mwells2@gigablast.com>\r\n"
		      "MIME-Version: 1.0\r\n"
		      "To: %s\r\n"
		      "Subject: Event Stats\r\n"
		      "Content-Type: text/html; charset=UTF-8; format=flowed\r\n"
		      "Content-Transfer-Encoding: 8bit\r\n"
		      // mime header must be separated from body by
		      // an extra \r\n
		      "\r\n"
		      "\r\n"
		      );

	sb.safePrintf("total expired events %"INT32"\n\n", total.m_expired );
	sb.safePrintf("total active events %"INT32"\n\n", total.m_active );

	// print the stats now a
	fprintf(stdout,"expired %"INT32"\n",expiredCount);
	fprintf(stdout,"active %"INT32"\n",activeCount);
	fprintf(stdout,"expired+active %"INT32"\n",expiredCount+activeCount);
	fprintf(stdout,"activeresultset1 %"INT32"\n",activeResultSet1Count);
	fprintf(stdout,"activeexperimental %"INT32"\n",activeExperimentalCount);
	fprintf(stdout,"activeresultset1+activeexperimental %"INT32"\n",
		activeResultSet1Count+activeExperimentalCount);
	fprintf(stdout,"activefacebook %"INT32"\n",facebookCount);
	fprintf(stdout,"activebadgeocoder %"INT32"\n",badGeocoderCount);
	// by country
	fprintf(stdout,"active by country\n");
	for ( int32_t i = 0 ;i < 256 ; i++ ) {
		if ( ! cctable[i] ) continue;
		char *cs = getCountryCode ( (uint8_t)i );
		if ( ! cs ) continue;
		fprintf(stdout,"%s %"INT32"\n",cs,cctable[i]);
	}

	sb.safePrintf("%"INT32" of %"INT32" hosts reporting.\n\n",
		      s_numReplies, g_hostdb.m_numHosts );

	// email that to mwells2@gigablast.com
	int32_t ip = atoip ( "10.5.54.47" ); // gk37, our mail server
	if ( ! ts->sendMsg ( ip,
			     25, // smtp (send mail transfer protocol) port
			     sb.getBufStart(),
			     sb.length(),
			     sb.length(),
			     sb.length(),
			     NULL, // es,
			     NULL, // gotEmailReplyWrapper,
			     60*1000,
			     1000*1024,
			     1000*1024 ) )
		log("estats: sent event stats email to mwells2@gigablast.com");
	// we did not block, so update facebook rec with timestamps
	//gotEmailReply( es , NULL );
	// we did not block
	log("estats: tcp sendMsg did not block!");
}

// defined in XmlDoc.cpp:
bool isExpired ( EventDisplay *ed , int32_t nowUTC , int32_t niceness );

// defined in Address.cpp
uint8_t getCountryIdFromAddrStr ( char *addr );

// . host #0 call this around midnight on every host...
// . dd is the stat dump
// . returns the stats
void handleRequestdd ( UdpSlot *slot , int32_t netnice ) {

	// set stats
	EventStats es;

 loop:
	// use msg5 to get the list, should ALWAYS block since no threads
	if ( ! msg5.getList ( RDB_TITLEDB   ,
			      coll          ,
			      &list         ,
			      startKey      ,
			      endKey        ,
			      minRecSizes   ,
			      true,//includeTree   ,
			      false         , // add to cache?
			      0             , // max cache age
			      0,//startFileNum  ,
			      -1,//numFiles      ,
			      NULL          , // state
			      NULL          , // callback
			      0             , // niceness
			      false         , // err correction?
			      NULL          , // cache key ptr
			      0             , // retry num
			      -1            , // maxRetries
			      true          , // compensate for merge
			      -1LL          , // sync point
			      &msg5b        )){
		log(LOG_LOGIC,"db: getList did not block.");
		return;
	}
	// all done if empty
	if ( list.isEmpty() ) goto done;

	// loop over entries in list
	for ( list.resetListPtr() ; ! list.isExhausted() ;
	      list.skipCurrentRecord() ) {
		key_t k       = list.getCurrentKey();
		char *rec     = list.getCurrentRec();
		int32_t  recSize = list.getCurrentRecSize();
		int64_t docId       = g_titledb.getDocIdFromKey ( k );
		if ( k <= lastKey )
			log("key out of order. "
			    "lastKey.n1=%"XINT32" n0=%"XINT64" "
			    "currKey.n1=%"XINT32" n0=%"XINT64" ",
			    lastKey.n1,lastKey.n0,
			    k.n1,k.n0);
		lastKey = k;
		// print deletes
		if ( (k.n0 & 0x01) == 0) {
			fprintf(stdout,"n1=%08"XINT32" n0=%016"XINT64" docId=%012"INT64" "
			       "(del)\n",
			       k.n1 , k.n0 , docId );
			continue;
		}

		// . make this
		// . (there's a mem leak so just new each time!)
		XmlDoc *xd;
		try { xd = new (XmlDoc); }
		catch ( ... ) {
			fprintf(stdout,"could not alloc for xmldoc\n");
			exit(-1);
		}

		// uncompress the title rec
		if ( ! xd->set2 ( rec , recSize , coll ,NULL , 0 ) )
			continue;


		// now log each event we got that we hashed
		char *p    = xd->ptr_eventData;
		char *pend = xd->ptr_eventData + xd->size_eventData;
		// scan them
		for ( ; p < pend ; ) {
			// cast it
			EventDisplay *ed = (EventDisplay *)p;
			// skip this event display blob
			p += ed->m_totalSize;
			// ok, transform the offsets into ptrs
			ed->m_desc  = (EventDesc *)((int32_t)ed->m_desc  +
						    xd->ptr_eventData);
			ed->m_addr  = (char *)((int32_t)ed->m_addr  +
					       xd->ptr_eventData);
			ed->m_int   = (int32_t *)((int32_t)ed->m_int   +
					       xd->ptr_eventData);
			ed->m_normDate=(char *)((int32_t)ed->m_normDate +
						xd->ptr_eventData);
			// do not repeat!
			ed->m_eventFlags |= EV_DESERIALIZED;

			// compile into EventStats class
			addInEventStats ( es , &es, nowUTC ) ;
		}

		mdelete ( xd , sizeof(XmlDoc) , "mainxd" );
		delete xd;

	}
	startKey = *(key_t *)list.getLastKey();
	startKey += (uint32_t) 1;
	// watch out for wrap around
	if ( startKey >= *(key_t *)list.getLastKey() ) goto loop;

}


void addInEventStats ( EventDisplay *ed , EventStats *es , int32_t nowUTC ) {

	// count expired
	if ( isExpired(ed,nowUTC,MAX_NICENESS)) {
		es->m_expired++;
		return;
	}

	es->m_active++;
	// count bad geocoder (lat=999.000|888.000)
	if ( ed->m_geocoderLat > 180.0 ||
	     ed->m_geocoderLon < -180.0 )
		es->m_badGeocoder++;
	// count resultset1 unexpired
	bool hasTitle = false;
	if ( ed->m_eventFlags & EV_HASTITLEWORDS) hasTitle = true;
	if ( ed->m_eventFlags & EV_HASTITLEBYVOTES) hasTitle = true;
	bool hasDate = false;
	if ( ed->m_eventFlags & EV_HASTIGHTDATE ) hasDate = true;
	if ( hasTitle && hasDate ) es->m_resultSet1++;
	else es->m_otherResultSet++;
	// facebook
	if ( ed->m_eventFlags & EV_FACEBOOK ) es->m_facebook++;
	// counts by country. if 'us' will be empty
	uint8_t crid = getCountryIdFromAddrStr(ed->m_addr);
	es->m_cctable[crid]++;
}

*/


static void gotFanReplyWrapper ( void *state , TcpSocket *s ) {
	g_process.gotFanReply ( s );
}

//
// FAN SWITCH CHECKER
//
void fanSwitchCheckWrapper ( int fd , void *state ) {
	g_process.checkFanSwitch ();
}

void Process::checkFanSwitch ( ) {

	// skip if you are not me, because this controls my custom fan
	if ( ! g_conf.m_isMattWells )
		return;

	// are we in group #0
	bool check = false;
	// get our host
	Host *me = g_hostdb.m_myHost;
	// if we are not host #0 and host #0 is dead, we check it
	if ( me->m_shardNum == 0 && g_hostdb.isDead((int32_t)0) )
		check = true;
	// if we are host #0 we always check it
	if ( me->m_hostId == 0 ) check = true;
	// proxy never checks power
	if ( me->m_isProxy ) check = false;
	// if not checking, all done
	if ( ! check ) return;
	// only if live
	//if ( ! g_conf.m_isLive ) return;
	// skip if request out already
	if ( m_fanReqOut ) return;
	// both must be legit
	if ( m_roofTemp    <= -99.0 ) return;
	if ( m_dataCtrTemp <= -99.0 ) return;

	// for shits and giggles log it every 10 minutes
	int32_t now = getTimeLocal();
	static int32_t s_lastLogTime = 0;
	if ( s_lastLogTime - now > 60*10 ) {
		s_lastLogTime = now;
		log("powermo: dataCtrTemp=%.1f roofTemp=%.1f",
		    m_dataCtrTemp,
		    m_roofTemp );
	}

	// what is the desired state? assume fans on.
	m_desiredFanState = 1;
	// if roof is hotter then fans off! we don't want hotter air.
	if ( //m_roofTemp > m_dataCtrTemp &&
	     // even if roof temp is slightly cooler, turn off fans. it
	     // needs to be more than 5 degrees cooler.
	     m_roofTemp + 5.0 > m_dataCtrTemp )
		// 0 means we want fans to be off
		m_desiredFanState = 0;
	// if matches, leave alone
	if ( m_currentFanState == m_desiredFanState ) return;

	// ok change! the url
	// . the IP9258 power controller
	// . default ip=192.168.1.100
	// . default user=admin
	// . default pwd=12345678
	// . default mac=00:92:00:00:00:3D
	// . the instruction sheet says to run IPEDIT on the cd with your
	//   computer directly connected to the IP9258 via the eth port in
	//   order to get the default ip address of it.
	// . i changed the ip to 10.5.0.8 since the roomalert is at 10.5.0.9
	// . turn all 4 ports on or off so we can plug the fans into two
	//   separate ports
	/*
	char *url ;
	if ( m_desiredFanState )
		url = "http://10.5.0.8/tgi/iocontrol.tgi?"
			"pw1Name=&"
			"P60=On&"     // THIS IS WHAT CHANGES!
			"P60_TS=0&"   // timer seconds?
			"P60_TC=Off&" // timer control?
			"pw2Name=&"
			"P61=On&"
			"P61_TS=0&"
			"P61_TC=Off&"
			"pw3Name=&"
			"P62=On&"
			"P62_TS=0&"
			"P62_TC=Off&"
			"pw4Name=&"
			"P63=On&"
			"P63_TS=0&"
			"P63_TC=Off&"
			"Apply=Apply"
			;
	else
		url = "http://10.5.0.8/tgi/iocontrol.tgi?"
			"pw1Name=&"
			"P60=Off&"    // THIS IS WHAT CHANGES!
			"P60_TS=0&"   // timer seconds?
			"P60_TC=Off&" // timer control?
			"pw2Name=&"
			"P61=Off&"
			"P61_TS=0&"
			"P61_TC=Off&"
			"pw3Name=&"
			"P62=Off&"
			"P62_TS=0&"
			"P62_TC=Off&"
			"pw4Name=&"
			"P63=Off&"
			"P63_TS=0&"
			"P63_TC=Off&"
			"Apply=Apply"
			;
	// . make a cookie with the login info
	// . on chrome open the console and click "Network" tab
	//   to view the http network requests and replies
	char *cookie = "admin=12345678; Taifatech=yes";
	*/

	//
	// the new power switch is hopefully less flaky!
	//
	SafeBuf urlBuf;

	if ( m_desiredFanState ) {
		// this turns it on
		if ( !urlBuf.safePrintf("http://10.5.0.10/outlet.cgi?outlet=1&"
				  "command=1&time=%"UINT32"",
					(uint32_t)getTimeGlobal()) )
			return;
	}
	else {
		// this turns it off
		if ( !urlBuf.safePrintf("http://10.5.0.10/outlet.cgi?outlet=1&"
				  "command=0&time=%"UINT32"",
					(uint32_t)getTimeGlobal()) )
			return;
	}

	// . make a cookie with the login info
	// . on chrome open the console and click "Network" tab
	//   to view the http network requests and replies
	//char *cookie = "admin=12345678; Taifatech=yes";
	char *cookie = NULL;


	// mark the request as outstanding so we do not overlap it
	m_fanReqOut = true;

	log("process: trying to set fan state to %"INT32"",m_desiredFanState);

	// get it
	bool status = g_httpServer.
		getDoc ( urlBuf.getBufStart() , // url to download
			 0               , // ip
			 0               , // offset
			 -1              , // size
			 0               , // ifModifiedSince
			 NULL            , // state
			 gotFanReplyWrapper , // callback
			 30*1000         , // timeout
			 0               , // proxy ip
			 0               , // proxy port
			 1*1024*1024     , // maxLen
			 1*1024*1024     , // maxOtherLen
			 "Mozilla/4.0 "
			 "(compatible; MSIE 6.0; Windows 98; "
			 "Win 9x 4.90)"  ,
			 //false           , // respect download limit?
			 "HTTP/1.1"      ,// fake 1.1 otherwise we get error!
			 true , // doPost? converts cgi str to post
			 cookie ,
			 // additional mime headers
			 "Authorization: Basic YWRtaW46^C");
	// wait for it
	if ( ! status ) return;
	// i guess it is back!
	m_fanReqOut = false;
	// call this to wrap things up
	gotFanReply ( NULL );
}


// . returns false if blocked, true otherwise
// . returns true and sets g_errno on error
bool Process::gotFanReply ( TcpSocket *s ) {

	// i guess it is back!
	m_fanReqOut = false;

	if ( ! s ) {
		log("powermo: got NULL socket in fan reply");
		return true;
	}

	if ( g_errno ) {
		log("powermo: had error getting fan state: %s.",
		    mstrerror(g_errno));
		return true;
	}
	// point into buffer
	char *buf     = s->m_readBuf;
	int32_t  bufSize = s->m_readOffset;

	if ( ! buf ) {
		log(LOG_INFO,"powermo: got empty fan state reply.");
		return true;
	}

	HttpMime mime;
	mime.set ( buf , bufSize , NULL );
	char *content    = buf     + mime.getMimeLen();
	int32_t  contentLen = bufSize - mime.getMimeLen();
	content[contentLen]='\0';

	// get the state of the power! (from old power switch)
	//char *p = strstr ( content ,"\"power\",status:" );

	// get the state of the power! (from new power switch)
	char *tag = "<outlet1_status>";
	int32_t tagLen = gbstrlen(tag);
	char *p = strstr ( content, tag );
	// panic?
	if ( ! p ) {
		log("powermo: could not parse out fan power state "
		    "from power strip. "
		    "content = %s",content);
		return true;
	}

	// . get the value
	// . val is 0 if the fan power off, 1 if on?
	int32_t val = atoi ( p + tagLen );

	m_currentFanState = val;

	if ( m_currentFanState == m_desiredFanState )
		log("powermo: desired fan state, %"INT32", achieved",
		    m_currentFanState);
	else
		log("powermo: fan state is %"INT32", but needs to be %"INT32"",
		    m_currentFanState,
		    m_desiredFanState);

	return true;
}


// make sure ntpd is running, we can't afford to get our clock
// out of sync for credit card transactions
bool Process::checkNTPD ( ) {

	if ( ! g_conf.m_isLive ) return true;

	FILE *pd = popen("ps auxww | grep ntpd | grep -v grep","r");
	if ( ! pd ) {
		log("gb: failed to ps auxww ntpd");
		if ( ! g_errno ) g_errno = EBADENGINEER;
		return false;
	}
	char tmp[1024];
	char *ss = fgets ( tmp , 1000 , pd );
	if ( ! ss ) {
		log("gb: failed to ps auxww ntpd 2");
		if ( ! g_errno ) g_errno = EBADENGINEER;
		return false;
	}
	// must be there
	if ( ! strstr ( tmp,"ntpd") ) {
		log("gb: all proxies must have ntpd running! this "
		    "one does not!");
		if ( ! g_errno ) g_errno = EBADENGINEER;
		return false;
	}
	return true;
}