mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 12:17:35 +03:00
74ec812959
just return an error now. hopefully merge will try again later. also core if you try to write recs to an rdbmap that has already had its memory footprint reduced so we can find that overrun bug.
3381 lines
105 KiB
C++
3381 lines
105 KiB
C++
#include "gb-include.h"
|
|
|
|
// cygwin and apple mac os x does not support klogctl
|
|
#if defined(CYGWIN) || defined(__APPLE__)
|
|
// use a stub
|
|
int32_t klogctl( int, char *,int ) { return 0; }
|
|
#else
|
|
// otherwise, use the real one
|
|
#include <sys/klog.h> // for klogctl
|
|
#endif
|
|
|
|
#include "PingServer.h"
|
|
#include "UdpServer.h"
|
|
//#include "Sync.h"
|
|
#include "Conf.h"
|
|
#include "HttpServer.h"
|
|
#include "HttpMime.h"
|
|
#include "Proxy.h"
|
|
#include "Repair.h"
|
|
#include "Process.h"
|
|
#include "DailyMerge.h"
|
|
#include "Spider.h"
|
|
#include "Test.h"
|
|
#include "Rebalance.h"
|
|
#include "Version.h"
|
|
|
|
#define PAGER_BUF_SIZE (10*1024)
|
|
|
|
// from main.cpp. when keepalive script restarts us this is true
|
|
extern bool g_recoveryMode;
|
|
extern int32_t g_recoveryLevel;
|
|
|
|
// a global class extern'd in .h file
|
|
PingServer g_pingServer;
|
|
|
|
char s_kernelRingBuf[4097];
|
|
int32_t s_kernelRingBufLen = 0;
|
|
|
|
static void sleepWrapper ( int fd , void *state ) ;
|
|
//static void sleepWrapper10 ( int fd , void *state );
|
|
static void checkKernelErrors( int fd, void *state );
|
|
static void gotReplyWrapperP ( void *state , UdpSlot *slot ) ;
|
|
static void handleRequest11 ( UdpSlot *slot , int32_t niceness ) ;
|
|
static void gotReplyWrapperP2 ( void *state , UdpSlot *slot );
|
|
static void gotReplyWrapperP3 ( void *state , UdpSlot *slot );
|
|
static void updatePingTime ( Host *h , int32_t *pingPtr , int32_t tripTime ) ;
|
|
// JAB: warning abatement
|
|
//static bool pageTMobile ( Host *h , char *errmsg ) ;
|
|
//static bool pageAlltel ( Host *h , char *errmsg , char *num ) ;
|
|
//static bool pageVerizon ( Host *h , char *errmsg ) ;
|
|
//static void verizonWrapper ( void *state , TcpSocket *ts ) ;
|
|
//static bool pageVerizon2 ( void *state , TcpSocket *s ) ;
|
|
// JAB: warning abatement
|
|
//static bool pageSprintPCS ( Host *h , char *errmsg , char *num ) ;
|
|
//static void sprintPCSWrapper ( void *state , TcpSocket *ts ) ;
|
|
//static bool pageSprintPCS2 ( void *state , TcpSocket *ts ) ;
|
|
static bool sendAdminEmail ( Host *h, char *fromAddress,
|
|
char *toAddress, char *body ,
|
|
char *emailServIp );//= "mail.gigablast.com" );
|
|
static void emailSleepWrapper ( int fd, void *state );
|
|
|
|
bool PingServer::registerHandler ( ) {
|
|
// . we'll handle msgTypes of 0x11 for pings
|
|
// . register ourselves with the udp server
|
|
// . it calls our callback when it receives a msg of type 0x0A
|
|
// . it is now hot..., no, not hot anymore
|
|
//if ( ! g_udpServer2.registerHandler ( 0x11, handleRequest11 ))
|
|
// return false;
|
|
// register on low priority server to make transition easier
|
|
if ( ! g_udpServer.registerHandler ( 0x11, handleRequest11 ))
|
|
return false;
|
|
// limit this to 1000ms
|
|
if ( g_conf.m_pingSpacer > 1000 ) g_conf.m_pingSpacer = 1000;
|
|
// save this
|
|
m_pingSpacer = g_conf.m_pingSpacer;
|
|
// this starts off at zero
|
|
m_callnum = 0;
|
|
// . this was 500ms but now when a host shuts downs it sends all other
|
|
// hosts a msg saying so... PingServer::broadcastShutdownNotes() ...
|
|
// . so i put it up to 2000ms to save bandwidth some
|
|
// . but these packets are small and cheap so now let's send out 10
|
|
// every second to keep on top of things
|
|
// . so in a network of 128 hosts we'll cycle through in 12.8 seconds
|
|
// and set a host to dead on avg will take about 13 seconds
|
|
if ( ! g_loop.registerSleepCallback ( g_conf.m_pingSpacer , // ~100ms
|
|
(void *)(PTRTYPE)m_callnum,//NULL
|
|
sleepWrapper , 0 ) )
|
|
return false;
|
|
// if not host #0, we're done
|
|
if ( g_hostdb.m_hostId != 0 ) return true;
|
|
|
|
// we have disabled syncing for now
|
|
return true;
|
|
|
|
// . this is called ever 10 minutes
|
|
// . it tells all hosts to store a sync point at about the same time
|
|
// . the sync point is a fixed value in time used as a reference
|
|
// for performing incremental synchronization by Sync.cpp should
|
|
// a host go down without saving. one of its twins must of course
|
|
// remain up in order to sync him.
|
|
//if ( ! g_loop.registerSleepCallback ( SYNC_TIME*1000 , // ms
|
|
// NULL ,
|
|
// sleepWrapper10 ) )
|
|
// return false;
|
|
return true;
|
|
}
|
|
|
|
static int32_t s_outstandingPings = 0;
|
|
|
|
// . gets filename that contains the hosts from the Conf file
|
|
// . return false on errro
|
|
// . g_errno may NOT be set
|
|
bool PingServer::init ( ) {
|
|
// reset how many pings we've launched
|
|
//m_launched = 0;
|
|
//m_totalLaunched = 0;
|
|
m_i = 0;
|
|
m_numRequests2 = 0;
|
|
m_numReplies2 = 0;
|
|
m_useShotgun = 0;
|
|
m_pingProxy = 0;
|
|
m_minRepairMode = -1;
|
|
m_maxRepairMode = -1;
|
|
m_minRepairModeBesides0 = -1;
|
|
m_minRepairModeHost = NULL;
|
|
m_maxRepairModeHost = NULL;
|
|
m_minRepairModeBesides0Host = NULL;
|
|
|
|
initKernelErrorCheck();
|
|
|
|
// invalid info init
|
|
m_currentPing = -1;
|
|
m_bestPing = -1;
|
|
m_bestPingDate = 0;
|
|
|
|
m_numHostsWithForeignRecs = 0;
|
|
m_numHostsDead = 0;
|
|
m_hostsConfInDisagreement = false;
|
|
m_hostsConfInAgreement = false;
|
|
|
|
// . send a ping request to everybody on the network right now!
|
|
// . no, sleepWrapper needs to do it when g_loop is working
|
|
//sendPingsToAll();
|
|
// we're done
|
|
return true;
|
|
}
|
|
|
|
void PingServer::initKernelErrorCheck(){
|
|
// Most of the important logfiles are gathered in a single
|
|
// directory - /var/log
|
|
|
|
// * /var/log/dmesg - boot time hardware detection and driver setup
|
|
// * /var/log/messages - general system messages, includes most of
|
|
// what is in dmesg if it hasn't "rolled over".
|
|
// * /var/log/syslog - like the "messages" log, but sometimes includes
|
|
// other details
|
|
// * /var/log/mythtv/mythbackend.log - status logs from the MythTV
|
|
// backend server
|
|
// * /var/log/mysql/mysql.err - errors from the database server
|
|
// (usually empty!)
|
|
// * /var/log/daemon.log - messages from service tasks like lircd
|
|
// * /var/log/kern.log - if something has gone wrong with a kernel
|
|
// module, you may find something here.
|
|
// * /var/log/Xorg.0.log - start up log from the X server (GUI
|
|
// environment), including hardware detection and modes (resolution)
|
|
// selected
|
|
|
|
// BUT FOUND A BETTER WAY THAN TO READ LOG FILES!
|
|
// directly read the kernel ring buffer every x sec using klogctl.
|
|
// look for errors directly in the kernel ring buffer.
|
|
|
|
// fill the kernel ring buf with info that was available before startup
|
|
s_kernelRingBufLen = klogctl( 3, s_kernelRingBuf, 4096 );
|
|
if ( s_kernelRingBufLen < 0 ){
|
|
log ("db: klogctl returned error");
|
|
}
|
|
|
|
|
|
// clear the kernel Errors
|
|
for ( int32_t i = 0; i < g_hostdb.m_numHosts; i++ ){
|
|
g_hostdb.m_hosts[i].m_pingInfo.m_kernelErrors = 0;
|
|
g_hostdb.m_hosts[i].m_kernelErrorReported = false;
|
|
}
|
|
|
|
if ( !g_loop.registerSleepCallback( 30000, NULL, checkKernelErrors,0))
|
|
log ("registering kern.log failed");
|
|
return;
|
|
}
|
|
|
|
|
|
void PingServer::sendPingsToAll ( ) {
|
|
|
|
//g_conf.m_logDebugUdp = 1;
|
|
// stop for now
|
|
//return;
|
|
|
|
if ( g_isYippy ) return;
|
|
|
|
// if we are the query/spider compr. proxy then do not send out pings
|
|
if ( g_hostdb.m_myHost->m_type & HT_QCPROXY ) return;
|
|
if ( g_hostdb.m_myHost->m_type & HT_SCPROXY ) return;
|
|
|
|
|
|
// do not ping ourselves if we are host #0, because wrapper
|
|
// will not be called...
|
|
//if ( g_hostdb.m_hostId == 0 ) g_host0Replied = true;
|
|
|
|
// get host #0
|
|
Host *hz = g_hostdb.getHost ( 0 );
|
|
// sanity check
|
|
if ( hz->m_hostId != 0 ) { char *xx=NULL;*xx=0; }
|
|
|
|
|
|
// do a quick send to host 0 out of band if we have never
|
|
// got a reply from him. we need him to sync our clock!
|
|
static int32_t s_lastTime = 0;
|
|
if ( ! hz->m_inProgress1 && hz->m_numPingReplies == 0 &&
|
|
time(NULL) - s_lastTime > 2 ) {
|
|
// update clock to avoid oversending
|
|
s_lastTime = time(NULL);
|
|
// ping him
|
|
pingHost ( hz , hz->m_ip , hz->m_port );
|
|
}
|
|
|
|
// once we do a full round, drop out. use firsti to determine this
|
|
int32_t firsti = -1;
|
|
|
|
for ( ; m_i != firsti && s_outstandingPings < 5 ; ) {
|
|
// store it
|
|
if ( firsti == -1 ) firsti = m_i;
|
|
// get the next host in line
|
|
Host *h = g_listHosts [ m_i ];
|
|
uint32_t ip = g_listIps [ m_i ];
|
|
uint16_t port = g_listPorts [ m_i ];
|
|
// point to next ip/port, and wrap if we should
|
|
if ( ++m_i >= g_listNumTotal ) m_i = 0;
|
|
// skip if not udp port. might be http, dns or https.
|
|
if ( port != h->m_port ) continue;
|
|
// if he is in progress, skip as well. check for shotgun ip.
|
|
if ( ip == h->m_ip && h->m_inProgress1 ) continue;
|
|
if ( ip != h->m_ip && h->m_inProgress2 ) continue;
|
|
// do not ping query compression proxies or spider comp proxies
|
|
if ( h->m_type & HT_QCPROXY ) continue;
|
|
if ( h->m_type & HT_SCPROXY ) continue;
|
|
// try to launch the request
|
|
pingHost ( h , ip , port ) ;
|
|
}
|
|
|
|
|
|
|
|
// . check if pingSpacer was updated via master controls and fix our
|
|
// sleep wrapper callback interval if so
|
|
// . limit this to 1000ms
|
|
if ( g_conf.m_pingSpacer > 1000 ) g_conf.m_pingSpacer = 1000;
|
|
// update pingSpacer callback tick if it changed since last time
|
|
if ( m_pingSpacer == g_conf.m_pingSpacer ) return;
|
|
// register new one
|
|
if ( ! g_loop.registerSleepCallback ( m_pingSpacer , // ~100ms
|
|
(void *)(PTRTYPE)(m_callnum+1) ,
|
|
sleepWrapper , 0 ) ) {
|
|
static char logged = 0;
|
|
if ( ! logged )
|
|
log("net: Could not update ping spacer.");
|
|
logged = 1;
|
|
return;
|
|
}
|
|
// now it is safe to unregister last callback then
|
|
g_loop.unregisterSleepCallback((void *)(PTRTYPE)m_callnum,
|
|
sleepWrapper);
|
|
// point to next one
|
|
m_callnum++;
|
|
}
|
|
|
|
|
|
// class HostStatus {
|
|
// public:
|
|
// int64_t m_lastPing;
|
|
// char m_repairMode;
|
|
// char m_kernelError;
|
|
// char m_loadAvg;
|
|
// char m_percentMemUsed;
|
|
|
|
// };
|
|
|
|
// from Loop.cpp
|
|
extern float g_cpuUsage;
|
|
|
|
// ping host #i
|
|
void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {
|
|
// don't ping on interface machines
|
|
//if ( g_conf.m_interfaceMachine ) return;
|
|
|
|
// do not ping ourselves
|
|
//if ( h == g_hostdb.m_myHost ) return;
|
|
|
|
// return if NULL
|
|
if ( ! h ) return;
|
|
|
|
int32_t hostId = h->m_hostId;
|
|
|
|
// every time this is hostid 0, do a sanity check to make sure
|
|
// g_hostdb.m_numHostsAlive is accurate
|
|
if ( hostId == 0 ) {
|
|
int32_t numHosts = g_hostdb.getNumHosts();
|
|
if( h->m_isProxy )
|
|
numHosts = g_hostdb.getNumProxy();
|
|
// do not do more than once every 10 seconds
|
|
static int32_t lastTime = 0;
|
|
int32_t now = getTime();
|
|
if ( now - lastTime > 10 ) {
|
|
lastTime = now;
|
|
int32_t count = 0;
|
|
for ( int32_t i = 0 ; i < numHosts; i++ ) {
|
|
// count if not dead
|
|
Host *host;
|
|
if ( h->m_isProxy )
|
|
host = g_hostdb.getProxy(i);
|
|
else
|
|
host = g_hostdb.getHost(i);
|
|
if ( !g_hostdb.isDead(host))
|
|
count++;
|
|
}
|
|
// make sure count matches
|
|
if ( (!h->m_isProxy &&
|
|
count != g_hostdb.getNumHostsAlive()) ) {
|
|
//( h->m_isProxy &&
|
|
//count != g_hostdb.getNumProxyAlive()) ) {
|
|
char *xx = NULL; *xx = 0; }
|
|
}
|
|
}
|
|
|
|
// watch for out of bounds
|
|
//if ( hostId < 0 || hostId >= g_hostdb.getNumHosts() ) return;
|
|
// don't ping ourselves, we know we're alive
|
|
// only if we aren't the proxy
|
|
// MDW: no, let's ping ourselves so all the Host vars can be
|
|
// set properly in here and we don't have to repeat that
|
|
// code.
|
|
/*
|
|
if ( hostId == g_hostdb.m_hostId && h == g_hostdb.m_myHost ){
|
|
//&& !g_proxy.isProxyRunning() ) {
|
|
// set loadavg and return
|
|
h->m_loadAvg = g_process.getLoadAvg();
|
|
// set our percent mem used
|
|
h->m_percentMemUsed = ((float)g_mem.getUsedMem()*100.0)/
|
|
(float)g_mem.getMaxMem();
|
|
return;
|
|
}
|
|
*/
|
|
// don't ping again if already in progress
|
|
if ( ip == h->m_ip && h->m_inProgress1 ) return;
|
|
if ( ip != h->m_ip && h->m_inProgress2 ) return;
|
|
// time now
|
|
int64_t nowmsLocal = gettimeofdayInMillisecondsLocal();
|
|
//int64_t now2 = gettimeofdayInMillisecondsLocal();
|
|
// only ping a host once every 5 seconds tops
|
|
//if ( now - h->m_lastPing < 5000 ) return;
|
|
// stamp it
|
|
//h->m_pingInfo.m_lastPing = nowmsLocal;
|
|
h->m_lastPing = nowmsLocal;
|
|
// count it
|
|
s_outstandingPings++;
|
|
// consider it in progress
|
|
if ( ip == h->m_ip ) h->m_inProgress1 = true;
|
|
else h->m_inProgress2 = true;
|
|
// . if he is not dead, set this first send time
|
|
// . this can be the first time we sent an unanswered ping
|
|
// . this is only used to determine when to send emails since we
|
|
// can now send the email a configurable amount of seconds after the
|
|
// host as already been registered as dead. i hope this will fix the
|
|
// problem of the hitachis causing bus resets all the time. because
|
|
// we make the dead threshold only a few seconds to protect
|
|
// performance, but make the email time much higher so i can sleep
|
|
// at night.
|
|
// . in the beginning all hosts are considered dead, but the email code
|
|
// should not be 0, it should be -2
|
|
// . BOTH eth ports must be dead for host to be dead
|
|
bool isAlive = false;
|
|
if ( h->m_ping < g_conf.m_deadHostTimeout ) isAlive = true;
|
|
if ( g_conf.m_useShotgun &&
|
|
h->m_pingShotgun < g_conf.m_deadHostTimeout ) isAlive = true;
|
|
if ( isAlive && h->m_emailCode == 0 ) h->m_startTime =nowmsLocal;//now2
|
|
// log it only the first time he is registered as dead
|
|
if ( isAlive ) h->m_wasAlive = true;
|
|
else h->m_wasAlive = false;
|
|
// note it. wait 30 seconds cuz they are all high at the start when
|
|
// everyone is just coming up.
|
|
//if ( h->m_ping > 1000 &&
|
|
// gettimeofdayInMillisecondsLocal()-g_process.m_processStartTime>
|
|
// 30000 )
|
|
// log("gb: Got high ping of %"INT32" ms to hostid #%"INT32"",
|
|
// (int32_t)h->m_ping,(int32_t)h->m_hostId);
|
|
// . use the shotgun port to send the request? or the original port?
|
|
// . the reply will be sent back to the same ip/port as it was sent
|
|
// from because in PingServer::handleRequest() we set the
|
|
// "useSameSwitch" flag to true when passing to
|
|
// UdpServer::sendReply_ass()
|
|
//uint32_t ip;
|
|
//if ( useShotgunIp ) ip = h->m_ipShotgun;
|
|
//else ip = h->m_ip;
|
|
Host *me = g_hostdb.m_myHost;
|
|
// use the tmp buf
|
|
//char request[14+4+4+1];
|
|
//char *p = m_request;
|
|
|
|
// we can have multiple outstanding pings, so keep request bufs
|
|
// independent...
|
|
//char *request = h->m_requestBuf;
|
|
//char *p = h->m_requestBuf;
|
|
|
|
// we send our stats to host "h"
|
|
PingInfo *pi = &me->m_pingInfo;//RequestBuf;
|
|
|
|
pi->m_numCorruptDiskReads = g_numCorrupt;
|
|
pi->m_numOutOfMems = g_mem.m_outOfMems;
|
|
pi->m_socketsClosedFromHittingLimit = g_stats.m_closedSockets;
|
|
pi->m_currentSpiders = g_spiderLoop.m_numSpidersOut;
|
|
|
|
// store the last ping we got from it first
|
|
//*(int64_t *)p = h->m_lastPing; p += sizeof(int64_t);
|
|
|
|
// i don't think this should be in pinginfo
|
|
//pi->m_lastPing = h->m_pingInfo.m_lastPing;
|
|
|
|
// let the receiver know our repair mode
|
|
//*p = g_repairMode; p++;
|
|
pi->m_repairMode = g_repairMode;
|
|
|
|
// problem is that we know when the error occurs, but don't know when
|
|
// the error has been fixed. So just consider this host as dead unless
|
|
// gb is restarted and the problem is fixed
|
|
//*p = me->m_kernelErrors; p++;
|
|
//pi->m_kernelErrors = me->m_pingInfo.m_kernelErrors;
|
|
|
|
//if ( me->m_kernelErrors ){
|
|
//char *xx = NULL; *xx = 0;
|
|
//}
|
|
int32_t l_loadavg = (int32_t) (g_process.getLoadAvg() * 100.0);
|
|
//gbmemcpy(p, &l_loadavg, sizeof(int32_t)); p += sizeof(int32_t);
|
|
pi->m_loadAvg = l_loadavg ;
|
|
|
|
// then our percent mem used
|
|
float mem = ((float)g_mem.getUsedMem()*100.0)/(float)g_mem.getMaxMem();
|
|
//*(float *)p = mem ; p += sizeof(float); // 4 bytes
|
|
pi->m_percentMemUsed = mem;
|
|
|
|
// our cpu usage
|
|
//*(float *)p = me->m_cpuUsage ; p += sizeof(float); // 4 bytes
|
|
//pi->m_cpuUsage = me->m_pingInfo.m_cpuUsage;
|
|
|
|
// our num recs, docsIndexed
|
|
//*(int32_t*)p = (int32_t)g_clusterdb.getRdb()->getNumTotalRecs();
|
|
// *(int32_t*)p = (int32_t)g_process.getTotalDocsIndexed();
|
|
// p += sizeof(int32_t);
|
|
pi->m_totalDocsIndexed = (int32_t)g_process.getTotalDocsIndexed();
|
|
|
|
// urls indexed since startup
|
|
//*(int32_t*)p = (int32_t)g_test.m_urlsIndexed;
|
|
//p += sizeof(int32_t);
|
|
// our num recs, eventsIndexed
|
|
//*(int32_t*)p = g_timedb.getNumTotalEvents();//g_coldb.m_numEventsAllColl
|
|
//*(int32_t *)p = 0;
|
|
//p += sizeof(int32_t);
|
|
// slow disk reads
|
|
// *(int32_t*)p = g_stats.m_slowDiskReads;
|
|
// p += sizeof(int32_t);
|
|
pi->m_slowDiskReads = g_stats.m_slowDiskReads;
|
|
|
|
|
|
// and hosts.conf crc
|
|
//*(int32_t *)p = g_hostdb.getCRC(); p += 4;
|
|
pi->m_hostsConfCRC = g_hostdb.getCRC();
|
|
|
|
// ensure crc is legit
|
|
if ( g_hostdb.getCRC() == 0 ) { char *xx=NULL;*xx=0; }
|
|
|
|
// disk usage (df -ka)
|
|
//*(float *)p = g_process.m_diskUsage; p += 4;
|
|
pi->m_diskUsage = g_process.m_diskUsage;
|
|
|
|
// flags indicating our state
|
|
int32_t flags = 0;
|
|
// let others know we are doing our daily merge and have turned off
|
|
// our spiders. when host #0 indicates this state it will wait
|
|
// for all other hosts to enter the mergeMode. when other hosts
|
|
// receive this state from host #0, they will start their daily merge.
|
|
if ( g_spiderLoop.m_numSpidersOut > 0 ) flags |= PFLAG_HASSPIDERS;
|
|
if ( g_process.isRdbMerging() ) flags |= PFLAG_MERGING;
|
|
if ( g_process.isRdbDumping() ) flags |= PFLAG_DUMPING;
|
|
if ( g_rebalance.m_isScanning ) flags |= PFLAG_REBALANCING;
|
|
if ( g_recoveryMode ) flags |= PFLAG_RECOVERYMODE;
|
|
if ( g_rebalance.m_numForeignRecs ) flags |= PFLAG_FOREIGNRECS;
|
|
if ( g_dailyMerge.m_mergeMode == 0 ) flags |= PFLAG_MERGEMODE0;
|
|
if ( g_dailyMerge.m_mergeMode ==0 || g_dailyMerge.m_mergeMode == 6 )
|
|
flags |= PFLAG_MERGEMODE0OR6;
|
|
if ( ! isClockInSync() ) flags |= PFLAG_OUTOFSYNC;
|
|
|
|
uint8_t rv8 = (uint8_t)g_recoveryLevel;
|
|
if ( g_recoveryLevel > 255 ) rv8 = 255;
|
|
pi->m_recoveryLevel = rv8;
|
|
|
|
//*(int32_t *)p = flags; p += 4; // 4 bytes
|
|
pi->m_flags = flags;
|
|
|
|
// the collection number we are daily merging (currently 2 bytes)
|
|
collnum_t cn = -1;
|
|
if ( g_dailyMerge.m_cr ) cn = g_dailyMerge.m_cr->m_collnum;
|
|
//*(collnum_t *)p = cn ; p += sizeof(collnum_t);
|
|
pi->m_dailyMergeCollnum = cn;
|
|
|
|
pi->m_hostId = me->m_hostId;
|
|
|
|
pi->m_localHostTimeMS = gettimeofdayInMillisecondsLocal();
|
|
|
|
pi->m_udpSlotsInUseIncoming = g_udpServer.getNumUsedSlotsIncoming();
|
|
|
|
pi->m_tcpSocketsInUse = g_httpServer.m_tcp.m_numUsed;
|
|
|
|
// from Loop.cpp
|
|
pi->m_cpuUsage = g_cpuUsage;
|
|
|
|
// store hd temps
|
|
// gbmemcpy ( p , me->m_hdtemps , 4 * 2 );
|
|
// p += 4 * 2;
|
|
//gbmemcpy ( &pi->m_hdtemps , me->m_pingInfo.m_hdtemps , 4 * 2 );
|
|
|
|
// store the gbVersionStrBuf now, just a date with a \0 included
|
|
char *v = getVersion();
|
|
int32_t vsize = getVersionSize(); // 21 bytes
|
|
// gbmemcpy ( p , v , vsize );
|
|
// p += vsize;
|
|
if ( vsize != 21 ) { char *xx=NULL;*xx=0; }
|
|
gbmemcpy ( pi->m_gbVersionStr , v , vsize );
|
|
|
|
// int32_t requestSize = sizeof(PingRequest);//p - request;
|
|
|
|
// // sanity check
|
|
// if ( requestSize != sizeof(PingRequest) ) {
|
|
// // (44+4+4+21) ) { // MAX_PING_SIZE ) {
|
|
// log("ping: "
|
|
// "YOU ARE MIXING MULTIPLE GB VERSIONS IN YOUR CLUSTER. "
|
|
// "MAKE SURE THEY ARE ALL THE SAME GB BINARY");
|
|
// char *xx = NULL; *xx = 0; }
|
|
|
|
|
|
//char *request = (char *)pi;
|
|
|
|
// debug msg
|
|
//logf(LOG_DEBUG,"net: Sending ping request to hid=%"INT32" ip=%s.",
|
|
// h->m_hostId,iptoa(ip));
|
|
// . launch one
|
|
// . returns false and sets errno on error
|
|
// . returns true if request sent and it blocked
|
|
// . use MsgType of 0x11 for pinging
|
|
// . we now use the high-prioirty server, g_udpServer2
|
|
// . now we send over our current time so remote host will sync up
|
|
// with us
|
|
// . only sync up with hostId #0
|
|
// . if he goes down its ok because time is mostly important for
|
|
// spidering and spidering is suspended if a host is down
|
|
//if ( g_udpServer.sendRequest ( (char *)(&h->m_lastPing) ,
|
|
// 8 ,
|
|
|
|
// the proxy may be interfacing with the temporary cluster while
|
|
// we update the main cluster...
|
|
//int32_t port = h->m_port;
|
|
if ( g_proxy.isProxyRunning() && g_conf.m_useTmpCluster )
|
|
port++;
|
|
|
|
if ( h->m_isProxy ) hostId = -1;
|
|
if ( g_udpServer.sendRequest ( (char *)pi , //request ,
|
|
sizeof(PingInfo),//requestSize ,
|
|
0x11 ,
|
|
ip ,//h->m_ip ,
|
|
port ,//h->m_port ,
|
|
hostId ,
|
|
NULL ,
|
|
(void *)h , // callback state
|
|
gotReplyWrapperP ,
|
|
// timeout
|
|
(g_conf.m_deadHostTimeout/1000)+1 ,
|
|
1000 , // backoff
|
|
2000 , // max wait
|
|
NULL , // reply buf
|
|
0 , // reply buf size
|
|
0 )) // niceness
|
|
return;
|
|
// it had an error, so dec the count
|
|
s_outstandingPings--;
|
|
// consider it out of progress
|
|
if ( ip == h->m_ip ) h->m_inProgress1 = false;
|
|
else h->m_inProgress2 = false;
|
|
// had an error
|
|
log("net: Pinging host #%"INT32" had error: %s.",
|
|
h->m_hostId,mstrerror(g_errno) );
|
|
// reset it cuz it's not a showstopper
|
|
g_errno = 0;
|
|
}
|
|
|
|
// . this is the last hostId we sent an alert about
|
|
// . it is -1 if he has come back up since
|
|
// . it is -1 if we've sent no alerts
|
|
// . it is so we only send out one email even though a bunch of consecutive
|
|
// hosts right before us (in hostid ranking) may have gone down. The admin
|
|
// only needs one notification.
|
|
static int32_t s_lastSentHostId = -1;
|
|
|
|
void gotReplyWrapperP ( void *state , UdpSlot *slot ) {
|
|
// state is the host
|
|
Host *h = (Host *)state;
|
|
int32_t hid = h->m_hostId;
|
|
// if host 0 special case.
|
|
//if ( hid == 0 && g_sendingToHost0 ) {
|
|
// // no longer sending to him
|
|
// g_sendingToHost0 = false;
|
|
// // if he sent a reply, don't bother him so much any more
|
|
// if ( ! g_errno ) g_host0Replied = true;
|
|
//}
|
|
// un-count it
|
|
s_outstandingPings--;
|
|
// don't let udp server free our send buf, we own it
|
|
slot->m_sendBufAlloc = NULL;
|
|
// update ping time
|
|
int64_t nowms = gettimeofdayInMillisecondsLocal();
|
|
//int64_t now2 = gettimeofdayInMillisecondsLocal();
|
|
int64_t tripTime = nowms - slot->m_firstSendTime ;
|
|
// what port were we sending to?
|
|
//uint16_t port = slot->m_port;
|
|
// ensure not negative, clock might have been adjusted!
|
|
if ( tripTime < 0 ) tripTime = 0;
|
|
// get the Host ptr
|
|
//Host *h = g_hostdb.getHost ( hid );
|
|
// bail if none
|
|
if ( ! h ) { log(LOG_LOGIC,"net: pingserver: bad hostId."); return; }
|
|
// point to the right ping time, for the original port or for the
|
|
// shotgun port
|
|
int32_t *pingPtr = NULL;
|
|
if ( slot->m_ip == h->m_ipShotgun ) pingPtr = &h->m_pingShotgun;
|
|
// original overrides shotgun, in case ips match
|
|
if ( slot->m_ip == h->m_ip ) pingPtr = &h->m_ping;
|
|
// otherwise... wierd!!
|
|
if ( ! pingPtr ) pingPtr = &h->m_ping;
|
|
// debug msg
|
|
//logf(LOG_DEBUG,"net: Got ping reply for hid=%"INT32" ip=%s "
|
|
//"tripTime=%"INT64".", hid,iptoa(slot->m_ip),tripTime);
|
|
// update ping times for this host
|
|
//*pingPtr = tripTime;
|
|
if ( g_errno == EUDPTIMEDOUT ) tripTime = g_conf.m_deadHostTimeout;
|
|
updatePingTime ( h , pingPtr , tripTime );
|
|
// sanity checks
|
|
if ( slot->m_ip==h->m_ip && !h->m_inProgress1) {char *xx=NULL;*xx=0;}
|
|
if ( slot->m_ip!=h->m_ip && !h->m_inProgress2) {char *xx=NULL;*xx=0;}
|
|
// consider it out of progress
|
|
if ( slot->m_ip == h->m_ip ) h->m_inProgress1 = false;
|
|
else h->m_inProgress2 = false;
|
|
// count all replies
|
|
if ( ! g_errno ) h->m_numPingReplies++;
|
|
|
|
// but if g_errno == EUDPTIMEDOUT, mark the host as dead
|
|
if ( g_errno == EUDPTIMEDOUT ) {
|
|
// if not first time...
|
|
if ( ! h->m_wasAlive &&
|
|
// we must have been alive at some time
|
|
h->m_wasEverAlive &&
|
|
// and this is lefit
|
|
h->m_timeOfDeath != 0 &&
|
|
// and in our group
|
|
//h->m_groupId == g_hostdb.m_myHost->m_groupId ) {
|
|
h->m_shardNum == getMyShardNum() ) {
|
|
// how long dead for?
|
|
int64_t delta = nowms - h->m_timeOfDeath;
|
|
// we did it once, do not repeat
|
|
h->m_timeOfDeath = 0;
|
|
// num collections
|
|
int32_t nc = g_collectiondb.m_numRecs;
|
|
// if 5 minutes, issue reload if in our group
|
|
for ( int32_t i = 0 ; i < nc && delta > 2*60*1000 ; i++ ){
|
|
// get coll
|
|
SpiderColl *sc=g_spiderCache.getSpiderColl(i);
|
|
// skip if empty
|
|
if ( ! sc ) continue;
|
|
// flag it
|
|
sc->m_twinDied = true;
|
|
// flag it for a reload since our twin died
|
|
//sc->m_needsReload = true;
|
|
// reload m_sniTable/m_cdTable/etc.
|
|
//sc->m_doInitialScan = true;
|
|
}
|
|
}
|
|
//*pingPtr = g_conf.m_deadHostTimeout;
|
|
if ( h->m_wasAlive ) {
|
|
char *buf = "Host";
|
|
if(h->m_isProxy)
|
|
buf = "Proxy";
|
|
log("net: %s #%"INT32" ip=%s is dead. Has not responded to "
|
|
"ping in %"INT32" ms.", buf, h->m_hostId,
|
|
iptoa(slot->m_ip),
|
|
(int32_t)g_conf.m_deadHostTimeout);
|
|
// set dead time
|
|
h->m_timeOfDeath = nowms;
|
|
// send a kill -SEGV to the gb process on that
|
|
// host to see why he froze up!
|
|
// ALSO get a snapshot of the ps -aux on that machine
|
|
/*
|
|
char sbuf[1024];
|
|
sprintf (sbuf,"ssh %s '"
|
|
"ps auxww > /tmp/psout & "
|
|
"killall -SEGV gb "
|
|
"'",iptoa(h->m_ip));
|
|
log("net: system(\"%s\")",sbuf);
|
|
static bool s_count = 0
|
|
// only call system() once and only if we are host #0
|
|
if (g_hostdb.m_myHost->m_hostId == 0 && s_count == 0)
|
|
system ( sbuf );
|
|
// count how many times we do it
|
|
s_count++;
|
|
*/
|
|
}
|
|
|
|
}
|
|
|
|
// clear g_errno so we don't think any functions below set it
|
|
g_errno = 0;
|
|
|
|
// is the whole host alive? if either port is up it is still alive
|
|
bool isAlive = false;
|
|
if ( g_conf.m_useShotgun &&
|
|
h->m_pingShotgun < g_conf.m_deadHostTimeout ) isAlive = true;
|
|
if ( h->m_ping < g_conf.m_deadHostTimeout ) isAlive = true;
|
|
|
|
// if host is not dead clear his sentEmail bit so if he goes
|
|
// down again we may be obliged to send another email, provided
|
|
// we are the hostid right after him
|
|
if ( isAlive ) {
|
|
// allow him to receive emails again
|
|
h->m_emailCode = 0;
|
|
// if he was the subject of the last alert we emailed and now
|
|
// he is back up then we are free to send another alert about
|
|
// any other host that goes down
|
|
if ( h->m_hostId == s_lastSentHostId ) s_lastSentHostId = -1;
|
|
// mark this
|
|
h->m_wasEverAlive = true;
|
|
}
|
|
|
|
if ( isAlive && h->m_pingInfo.m_percentMemUsed >= 99.0 &&
|
|
h->m_firstOOMTime == 0 )
|
|
h->m_firstOOMTime = nowms;
|
|
if ( isAlive && h->m_pingInfo.m_percentMemUsed < 99.0 )
|
|
h->m_firstOOMTime = 0LL;
|
|
// if this host is alive and has been at 99% or more mem usage
|
|
// for the last X minutes, and we have got at least 10 ping replies
|
|
// from him, then send an email alert.
|
|
if ( isAlive &&
|
|
h->m_pingInfo.m_percentMemUsed >= 99.0 &&
|
|
nowms - h->m_firstOOMTime >= g_conf.m_sendEmailTimeout )
|
|
g_pingServer.sendEmail ( h , NULL , true , true );
|
|
|
|
// . if his ping was dead, try to send an email alert to the admin
|
|
// . returns false if blocked, true otherwise
|
|
// . sets g_errno on erro
|
|
// . send it iff both ports (if using shotgun) are dead
|
|
if ( ! isAlive && nowms - h->m_startTime >= g_conf.m_sendEmailTimeout)
|
|
g_pingServer.sendEmail ( h ) ;
|
|
|
|
|
|
PingInfo *pi = &h->m_pingInfo;
|
|
|
|
// if this host is alive but has some kernel error, then send an
|
|
// email alert.
|
|
if ( pi->m_kernelErrors && !h->m_kernelErrorReported ){
|
|
log("net: Host #%"INT32" is reporting kernel errors.",
|
|
h->m_hostId);
|
|
// MDW: disable for now, so does not wake us up at 3am
|
|
//g_pingServer.sendEmail ( h, NULL, true, false, true );
|
|
h->m_kernelErrorReported = true;
|
|
}
|
|
|
|
// reset if the machine has come back up
|
|
if ( !pi->m_kernelErrors && h->m_kernelErrorReported )
|
|
h->m_kernelErrorReported = false;
|
|
|
|
// reset in case sendEmail() set it
|
|
g_errno = 0;
|
|
|
|
Host *myHost = g_hostdb.m_myHost;
|
|
// . the one byte response has the sync status
|
|
// . i think we cored on this when it wasn't checking to see if
|
|
// slot->m_readBuf was NULL. how can that happen? it seemed to have,
|
|
// when it tried to allocate one byte of mem and failed...
|
|
if ( slot->m_readBufSize == 1 && slot->m_readBuf )
|
|
h->m_syncStatus = *(slot->m_readBuf);
|
|
/*
|
|
else if( myHost && myHost->m_isProxy && hid == 0 &&
|
|
slot->m_readBufSize == 9 && slot->m_readBuf ) {
|
|
h->m_syncStatus = *(slot->m_readBuf);
|
|
if ( *pingPtr < 200 ) {
|
|
int64_t time = *(int64_t*)(slot->m_readBuf+1);
|
|
settimeofdayInMillisecondsGlobal(time);
|
|
//time_t tmt = time/1000;
|
|
//log("net: Proxy got time of %s", ctime(&tmt));
|
|
}
|
|
}
|
|
*/
|
|
|
|
// debug msg
|
|
//log("slot=%"INT32", g_errno=%"INT32" (%s) "
|
|
// "hostId %"INT32" has ping of %"INT32"",
|
|
// (int32_t)slot,g_errno,mstrerror(g_errno),hid,tripTime);
|
|
// if we're host #0 and he responded really quickly,
|
|
// tell him to sync to our time
|
|
//int32_t h0MachineNum = g_hostdb.getMachineNum(0);
|
|
if (myHost && myHost->m_isProxy) return;
|
|
if ( g_hostdb.m_hostId != 0 ) return;
|
|
if ( *pingPtr > 200 ) return;
|
|
// debug
|
|
//log("PingServer:: uncomment me");
|
|
// don't send sync request to same machine either, we share the clock
|
|
//if((!h->m_isProxy && g_hostdb.getMachineNum(hid) == h0MachineNum) ||
|
|
// ( h->m_isProxy && h->m_ip == myHost->m_ip) )
|
|
// return;
|
|
// count this one too
|
|
s_outstandingPings++;
|
|
// record this time
|
|
g_pingServer.m_currentPing = *pingPtr;
|
|
|
|
// . ok, his ping was under half a second so he should sync with us
|
|
// . he should recognize empty requests as a request to sync
|
|
// . send reply back to the same ip/port that sent to us
|
|
if ( h->m_isProxy ) hid = -1;
|
|
|
|
// send back what his ping was so he knows
|
|
//*(int32_t *)h->m_requestBuf = *pingPtr;
|
|
//h->m_pingInfo.m_lastPing = *pingPtr;
|
|
*(int32_t *)h->m_tmpBuf = *pingPtr;
|
|
|
|
|
|
if ( g_udpServer.sendRequest (h->m_tmpBuf,//RequestBuf,
|
|
//h->m_requestBuf ,
|
|
4 , // 4 byte request
|
|
0x11 ,
|
|
slot->m_ip , // h->m_ip ,
|
|
slot->m_port , // h->m_port2 ,
|
|
hid ,
|
|
NULL ,
|
|
(void *)(PTRTYPE)h->m_hostId, //cb state
|
|
gotReplyWrapperP3 ,
|
|
// timeout
|
|
(g_conf.m_deadHostTimeout/1000)+1 ,
|
|
1000 , // backoff
|
|
2000 , // max wait
|
|
NULL , // reply buf
|
|
0 , // reply buf size
|
|
0 )) // niceness
|
|
return;
|
|
// he came back right away
|
|
s_outstandingPings--;
|
|
// had an error
|
|
log("net: Got error sending time sync request: %s.",
|
|
mstrerror(g_errno) );
|
|
// reset it cuz it's not a showstopper
|
|
g_errno = 0;
|
|
|
|
// . come down here to launch another ping if we need to
|
|
// . this is really only used when we first come up
|
|
// launchMore:
|
|
//g_pingServer.sendPingsToAll();
|
|
}
|
|
|
|
void gotReplyWrapperP3 ( void *state , UdpSlot *slot ) {
|
|
// do not free this!
|
|
slot->m_sendBufAlloc = NULL;
|
|
// un-count it
|
|
s_outstandingPings--;
|
|
// continue to launch more if we need to
|
|
//g_pingServer.sendPingsToAll();
|
|
}
|
|
|
|
// record time in the ping request iff from hostId #0
|
|
static int64_t s_deltaTime = 0;
|
|
|
|
// this may be called from a signal handler now...
|
|
void handleRequest11 ( UdpSlot *slot , int32_t niceness ) {
|
|
// get request
|
|
//char *request = slot->m_readBuf;
|
|
int32_t requestSize = slot->m_readBufSize;
|
|
char *request = slot->m_readBuf;
|
|
// get the ip/port of requester
|
|
uint32_t ip = slot->m_ip;
|
|
uint16_t port = slot->m_port;
|
|
// get the host entry
|
|
Host *h = g_hostdb.getHost ( ip , port );
|
|
// we may be the temporary cluster (grep for useTmpCluster) and
|
|
// the proxy is sending pings from its original port plus 1
|
|
if ( ! h ) h = g_hostdb.getHost ( ip , port + 1 );
|
|
// debug
|
|
//fprintf(stderr,"GOT PING insighandler=%"INT32" rsize=%"INT32" h=%"INT32"\n",
|
|
// (int32_t)g_inSigHandler,requestSize,(int32_t)h);
|
|
if ( ! h ) {
|
|
// size of 3 means it is a debug ping from
|
|
// proxy server is a connectIp, so don't display the message
|
|
// ./gb ./hosts.conf <hid> --udp
|
|
if ( requestSize != 3 && ! g_conf.isConnectIp(ip) )
|
|
log(LOG_LOGIC,"net: pingserver: No host for "
|
|
"dstip=%s port=%hu tid=%"INT32" fromhostid=%"INT32"",
|
|
iptoa(ip),port,slot->m_transId,slot->m_hostId);
|
|
//static int32_t s_count = 0;
|
|
//fprintf(stderr,"GOT %"INT32"\n",s_count++);
|
|
//g_udpServer2.sendErrorReply ( slot , 1 );
|
|
// set "useSameSwitch" to true so even if shotgunning is on
|
|
// the udp server will send the reply back to the same ip/port
|
|
// from which we got the request
|
|
g_udpServer.sendReply_ass ( NULL , // msg
|
|
0 , // msgSize
|
|
NULL , // alloc
|
|
0 , // alloc Size
|
|
slot ,
|
|
60 , // timeout in seconds
|
|
NULL , // state
|
|
NULL , // callback
|
|
500 , // backoff in ms
|
|
1000 , // max wait for backoff
|
|
true );// use same switch?
|
|
return;
|
|
}
|
|
|
|
|
|
// point to the correct ping time. this request may have come from
|
|
// the shotgun network, or the primary network.
|
|
int32_t *pingPtr = NULL;
|
|
if ( slot->m_ip == h->m_ipShotgun ) pingPtr = &h->m_pingShotgun;
|
|
// original overrides shotgun, in case ips match
|
|
if ( slot->m_ip == h->m_ip ) pingPtr = &h->m_ping;
|
|
// otherwise... wierd!!
|
|
if ( ! pingPtr ) pingPtr = &h->m_ping;
|
|
//logf(LOG_DEBUG,"net: Got ping request from hid=%"INT32" ip=%s",
|
|
// h->m_hostId,iptoa(slot->m_ip));
|
|
// . he's definitely not dead since he sent us a request
|
|
// . NO! he might not be able to see our packets but we can see his!
|
|
// so i am commenting this out... this seemed to happen on the
|
|
// cluster and caused craziness! "TX Unit Hang" errors in the
|
|
// kernel ring buffer (dmesg)
|
|
//if ( *pingPtr >= g_conf.m_deadHostTimeout ) {
|
|
// //*pingPtr = g_conf.m_deadHostTimeout - 1;
|
|
// updatePingTime ( h , pingPtr, g_conf.m_deadHostTimeout - 1 );
|
|
// //if ( *pingPtr < 0 ) *pingPtr = 0;
|
|
//}
|
|
// reply msg
|
|
char *reply = NULL;
|
|
int32_t replySize = 0;
|
|
|
|
// . a request size of 10 means to set g_repairMode to 1
|
|
// . it can only be advanced to 2 when we receive ping replies from
|
|
// everyone that they are not spidering or merging titledb...
|
|
if ( requestSize == sizeof(PingInfo)){
|
|
//MAX_PING_SIZE
|
|
//14+4+4+4+4+sizeof(collnum_t)+1 ) {
|
|
|
|
// sanity
|
|
PingInfo *pi2 = (PingInfo *)request;
|
|
if ( pi2->m_hostId != h->m_hostId ) {
|
|
char *xx=NULL;*xx=0; }
|
|
|
|
// now we just copy the class
|
|
gbmemcpy ( &h->m_pingInfo , request , requestSize );
|
|
|
|
/*
|
|
char* p = request + 10;
|
|
// fetch load avg...
|
|
h->m_loadAvg = ((double)(*((int32_t*)(p)))) / 100.0;
|
|
p += sizeof(int32_t);
|
|
// and mem used
|
|
h->m_percentMemUsed = *(float *)(p);
|
|
p += sizeof(float);
|
|
// and cpu usage
|
|
h->m_cpuUsage = *(float *)(p); // 4 bytes
|
|
p += sizeof(float);
|
|
|
|
// the host's global doc count.
|
|
h->m_docsIndexed = *(int32_t*)(p);
|
|
p += sizeof(int32_t);
|
|
*/
|
|
|
|
// how many we indexed since startup
|
|
//h->m_urlsIndexed = *(int32_t*)(p);
|
|
//p += sizeof(int32_t);
|
|
|
|
// the host's event count.
|
|
//h->m_eventsIndexed = *(int32_t*)(p);
|
|
//p += sizeof(int32_t);
|
|
|
|
/*
|
|
// slow disk reads is important
|
|
h->m_slowDiskReads = *(int32_t*)(p);
|
|
p += sizeof(int32_t);
|
|
|
|
h->m_hostsConfCRC = *(int32_t*)(p);
|
|
p += sizeof(int32_t);
|
|
// sanity
|
|
if ( h->m_hostsConfCRC == 0 ) { char *xx=NULL;*xx=0; }
|
|
|
|
// disk usage
|
|
h->m_diskUsage = *(float *)p;
|
|
p += sizeof(float);
|
|
|
|
// put the state flags
|
|
h->m_flags = *(int32_t *)(p);
|
|
p += sizeof(int32_t);
|
|
|
|
// the collnum it is daily merging
|
|
h->m_dailyMergeCollnum = *(collnum_t *)(p);
|
|
p += sizeof(collnum_t);
|
|
|
|
// the 4 hd temps
|
|
gbmemcpy ( h->m_hdtemps , p , 4 * 2 );
|
|
p += 4 * 2;
|
|
|
|
// at the end the gbverstionstrbuf
|
|
int32_t vsize = getVersionSize(); // 21
|
|
gbmemcpy ( h->m_gbVersionStrBuf , p , vsize );
|
|
p += vsize;
|
|
*/
|
|
|
|
// if any one of them is overheating, then turn off
|
|
// spiders on ourselves (and thus the full cluster)
|
|
for ( int32_t k = 0 ; k < 4 ; k++ )
|
|
if ( h->m_pingInfo.m_hdtemps[k] >
|
|
g_conf.m_maxHardDriveTemp )
|
|
g_conf.m_spideringEnabled = 0;
|
|
|
|
|
|
// we finally got a ping reply from him
|
|
h->m_gotPingReply = true;
|
|
|
|
// should we enter daily merge mode? only if hostid #0 is in it
|
|
//if ( (h->m_flags & PFLAG_MERGEMODENOT0) &&
|
|
// h->m_hostId == 0 &&
|
|
// g_hostdb.m_hostId != 0 &&
|
|
// g_dailyMerge.m_mergeMode == 0 )
|
|
// // advance into the daily merge mode
|
|
// g_dailyMerge.m_mergeMode = 1;
|
|
|
|
// . what repair mode is the requester in?
|
|
// . 0 means not in repair mode
|
|
// . 1 means in repair mode waiting for a local spider or
|
|
// titledb merge to stop
|
|
// . 2 means in repair mode and local spiders and local titledb
|
|
// merging have stopped and are will not start up again
|
|
// . 3 means all hosts have entered mode 2 so we can start
|
|
// repairing
|
|
// . 4 means done repairing and ready to exit repair mode,
|
|
// but waiting for other hosts to be mode 4 or 0 before
|
|
// it can exit, too, and go back to mode 0
|
|
/*
|
|
char mode = request[8];
|
|
h->m_kernelErrors = request[9];
|
|
*/
|
|
char mode = h->m_pingInfo.m_repairMode;
|
|
|
|
//if ( h->m_kernelErrors ){
|
|
//char *xx = NULL; *xx = 0;
|
|
//}
|
|
|
|
// set his repair mode in the hosts table
|
|
//if ( h ) {
|
|
// if his mode is 2 he is ready to start repairing
|
|
// because he has stopped all spiders and titledb
|
|
// merges from happening. if he just entered mode
|
|
// 2 now, check to see if all hosts are in mode 2 now.
|
|
char oldMode = h->m_repairMode;
|
|
// update his repair mode
|
|
h->m_repairMode = mode;
|
|
// . get the MIN repair mode of all hosts
|
|
// . expensive, so only do if a host changes modes
|
|
if ( oldMode != mode ||
|
|
g_pingServer.m_minRepairMode == -1 )
|
|
g_pingServer.setMinRepairMode ( h );
|
|
//}
|
|
// make it a normal ping now
|
|
requestSize = 8;
|
|
}
|
|
|
|
PingServer *ps = &g_pingServer;
|
|
ps->m_numHostsWithForeignRecs = 0;
|
|
ps->m_numHostsDead = 0;
|
|
ps->m_hostsConfInDisagreement = false;
|
|
ps->m_hostsConfInAgreement = false;
|
|
|
|
|
|
//
|
|
// do all hosts have the same hosts.conf????
|
|
//
|
|
// if some hosts are dead then we will not set either flag.
|
|
//
|
|
// scan all grunts for agreement. do this line once per sec?
|
|
//
|
|
int32_t agree = 0;
|
|
int32_t i; for ( i = 0 ; i < g_hostdb.getNumGrunts() ; i++ ) {
|
|
Host *h = &g_hostdb.m_hosts[i];
|
|
|
|
if ( h->m_pingInfo.m_flags & PFLAG_FOREIGNRECS )
|
|
ps->m_numHostsWithForeignRecs++;
|
|
|
|
if ( g_hostdb.isDead ( h ) )
|
|
ps->m_numHostsDead++;
|
|
|
|
// skip if not received yet
|
|
if ( ! h->m_pingInfo.m_hostsConfCRC ) continue;
|
|
// badness?
|
|
if ( h->m_pingInfo.m_hostsConfCRC != g_hostdb.m_crc ) {
|
|
ps->m_hostsConfInDisagreement = true;
|
|
break;
|
|
}
|
|
// count towards agreement
|
|
agree++;
|
|
}
|
|
|
|
// iff all in agreement, set this flag
|
|
if ( agree == g_hostdb.getNumGrunts() )
|
|
ps->m_hostsConfInAgreement = true;
|
|
|
|
|
|
|
|
|
|
|
|
// if request is 5 bytes, must be a host telling us he's shutting down
|
|
if ( requestSize == 5 ) {
|
|
// his byte #4 is sendEmailAlert
|
|
bool sendEmailAlert = request[4];
|
|
// and make him dead
|
|
if ( h ) {
|
|
//h->m_ping = g_conf.m_deadHostTimeout + 1;
|
|
//h->m_pingShotgun = g_conf.m_deadHostTimeout + 1;
|
|
int32_t deadTime = g_conf.m_deadHostTimeout + 1;
|
|
updatePingTime ( h , &h->m_ping , deadTime );
|
|
updatePingTime ( h , &h->m_pingShotgun , deadTime );
|
|
// don't email admin if sendEmailAlert is false
|
|
if ( ! sendEmailAlert ) h->m_emailCode = -2;
|
|
// otherwise, launch him one now
|
|
else {
|
|
//h->m_emailCode = 0;
|
|
// . this returns false if blocked, true otherw
|
|
// . sets g_errno on erro
|
|
g_pingServer.sendEmail ( h );
|
|
// reset in case sendEmail() set it
|
|
g_errno = 0;
|
|
}
|
|
// if we are a proxy and he had an outstanding 0xfd msg
|
|
// then we need to re-route that! likewise any msg
|
|
// that has not got a full reply needs to be timedout
|
|
// right now so we can re-route it...
|
|
g_udpServer.timeoutDeadHosts ( h );
|
|
}
|
|
else
|
|
log(LOG_LOGIC,"net: pingserver: Got NULL host ptr.");
|
|
|
|
}
|
|
// . if size is 4 then he wants us to sync with him
|
|
// . this was "0", but now we include what the ping was
|
|
else if ( requestSize == 4 ) {
|
|
// get the ping time
|
|
int32_t ping = *(int32_t *)request;
|
|
// store it
|
|
g_pingServer.m_currentPing = ping;
|
|
// should we update the clock?
|
|
bool setClock = true;
|
|
// . add 1ms DRIFT for every hour since last update
|
|
// . use local clock time only
|
|
int32_t nowLocal = getTime();
|
|
// how many seconds since we last updated our clock?
|
|
int32_t delta = nowLocal - g_pingServer.m_bestPingDate;
|
|
// drift it 1ms every 5 seconds, that seems somewhat typical
|
|
int32_t drift = delta / 5;
|
|
// get best "drifted" ping, "dping"
|
|
int32_t dping = g_pingServer.m_bestPing + drift;
|
|
// no overflowing
|
|
if ( dping < g_pingServer.m_bestPing ) dping = 0x7fffffff;
|
|
// if this is our first time
|
|
if ( g_pingServer.m_bestPingDate == 0 ) dping = 0x7fffffff;
|
|
// . don't bother if not more accurate
|
|
// . update the clock on "ping" ties because our local clock
|
|
// drifts a lot
|
|
if ( g_pingServer.m_currentPing > dping )
|
|
setClock = false;
|
|
// ping must be < 200 ms to update
|
|
if ( g_pingServer.m_currentPing > 200 ) setClock = false;
|
|
// if no host... how can this happen?
|
|
if ( ! h ) setClock = false;
|
|
// only update if from host #0
|
|
if ( h && h->m_hostId != 0 ) setClock = false;
|
|
// proxy can be host #0, too! watch out...
|
|
if ( h && h->m_isProxy ) setClock = false;
|
|
// only commit sender's time if from hostId #0
|
|
if ( setClock ) {
|
|
// what time is it now?
|
|
int64_t nowmsLocal=gettimeofdayInMillisecondsLocal();
|
|
// log it
|
|
log(LOG_DEBUG,"admin: Got ping of %"INT32" ms. Updating "
|
|
"clock. drift=%"INT32" delta=%"INT32" s_deltaTime=%"INT64"ms "
|
|
"nowmsLocal=%"INT64"ms",
|
|
(int32_t)g_pingServer.m_currentPing,drift,delta,
|
|
s_deltaTime,nowmsLocal);
|
|
// what should the new time be? (local mobo time)
|
|
int64_t newTime = s_deltaTime + nowmsLocal;
|
|
// update clock
|
|
settimeofdayInMillisecondsGlobal ( newTime );
|
|
// time stamps
|
|
g_pingServer.m_bestPingDate = nowLocal;
|
|
// and the ping
|
|
g_pingServer.m_bestPing = g_pingServer.m_currentPing;
|
|
// clear this
|
|
//s_deltaTime = 0;
|
|
}
|
|
}
|
|
// all pings now deliver a timestamp of the sending host
|
|
else if ( requestSize == 8 ) {
|
|
//reply = g_pingServer.getReplyBuffer();
|
|
// only record sender's time if from hostId #0
|
|
if ( h && h->m_hostId == 0 && !h->m_isProxy) {
|
|
// what time is it now?
|
|
int64_t nowmsLocal=gettimeofdayInMillisecondsLocal();
|
|
// . seems these servers drift by 1 ms every 5 secs
|
|
// . or that is about 17 seconds a day
|
|
// . we do NOT know how accurate host #0's supplied
|
|
// time is because the request may have been delayed
|
|
log(LOG_DEBUG,"admin: host #0 time is %"INT64" ms and "
|
|
"our local time is %"INT64" ms, delta=%"INT64" ms",
|
|
*(int64_t *)request,nowmsLocal ,
|
|
*(int64_t *)request - nowmsLocal );
|
|
// update s_delta in case host #0 sends us a
|
|
// request size of 4, telling us to sync up with this
|
|
s_deltaTime =*(int64_t *)request - nowmsLocal;
|
|
}
|
|
// Reply to the proxy with a time stamp to sync with
|
|
/*
|
|
else if ( h && h->m_isProxy && g_hostdb.m_hostId == 0 &&
|
|
g_conf.m_timeSyncProxy ) {
|
|
int64_t time = gettimeofdayInMillisecondsLocal();
|
|
// skip the syncstatus byte and write the time
|
|
*(int64_t *)(reply+1) = time;
|
|
replySize = sizeof(int64_t);
|
|
}
|
|
*/
|
|
// send back a 1 byte msg with our sync status now
|
|
//Host *me = g_hostdb.getHost ( g_hostdb.m_hostId );
|
|
//*(char*)reply = me->m_syncStatus;
|
|
//replySize++;
|
|
reply = NULL;
|
|
replySize = 0;
|
|
// debug -- don't ack any pings for now
|
|
//g_udpServer2.destroySlot ( slot );
|
|
//return;
|
|
}
|
|
// a tap request -- to store the sync point in the sync file
|
|
//else if ( requestSize == 9 )
|
|
// g_sync.addOp ( OP_SYNCPT , "" , *(int64_t *)request );
|
|
// otherwise, unknown request size
|
|
else {
|
|
log(LOG_LOGIC,"net: pingserver: Unknown request size of "
|
|
"%"INT32" bytes. You are probably running a different gb "
|
|
"version on this host. check the hosts table for "
|
|
"version info.", requestSize);
|
|
}
|
|
// always send back an empty reply
|
|
g_udpServer.sendReply_ass ( reply , // msg
|
|
replySize , // msgSize
|
|
NULL , // alloc
|
|
0 , // alloc Size
|
|
slot ,
|
|
60 , // timeout in seconds
|
|
NULL , // state
|
|
NULL , // callback
|
|
500 , // backoff in ms
|
|
1000 , // max wait for backoff
|
|
true );// use same switch?
|
|
|
|
|
|
// . now in PingServer.cpp for hostid 0 it checks
|
|
// the urlsindexed from each host if g_conf.m_testParserEnabled
|
|
// is true to see if we should call g_test.stopIt()
|
|
// . add up each hosts urls indexed
|
|
if ( ! g_conf.m_testParserEnabled ) return;
|
|
if ( g_hostdb.m_myHost->m_hostId != 0 ) return;
|
|
int32_t total = 0;
|
|
for ( int32_t i = 0 ; i < g_hostdb.getNumHosts() ; i++ ) {
|
|
Host *h = &g_hostdb.m_hosts[i];
|
|
total += h->m_urlsIndexed;
|
|
}
|
|
// all done?
|
|
if ( total >= g_test.m_urlsAdded ) g_test.stopIt();
|
|
|
|
return;
|
|
}
|
|
|
|
// . sets m_minRepairMode
|
|
// . only call this when host "h" changes repair mode
|
|
void PingServer::setMinRepairMode ( Host *hhh ) {
|
|
// . this host is the holder of the min now, return if not a match
|
|
// . do not return if m_minRepairMode has not been set at all though
|
|
bool returnNow = true;
|
|
if ( m_minRepairModeHost == hhh ) returnNow = false;
|
|
if ( m_maxRepairModeHost == hhh ) returnNow = false;
|
|
if ( m_minRepairModeBesides0Host == hhh ) returnNow = false;
|
|
if ( m_minRepairMode == -1 ) returnNow = false;
|
|
if ( m_maxRepairMode == -1 ) returnNow = false;
|
|
if ( m_minRepairModeBesides0 == -1 ) returnNow = false;
|
|
if ( returnNow ) return;
|
|
// count the mins
|
|
int32_t min = -1;
|
|
int32_t max = -1;
|
|
int32_t min0 = -1;
|
|
Host *minh = NULL;
|
|
Host *maxh = NULL;
|
|
Host *minh0 = NULL;
|
|
// scan to find new min
|
|
for ( int32_t i = 0 ; i < g_hostdb.getNumHosts() ; i++ ) {
|
|
// count if not dead
|
|
Host *h = &g_hostdb.m_hosts[i];
|
|
// . if it is us, the repairMode is not updated because we
|
|
// do not ping ourselves.
|
|
// . we check ourselves in the the getMinRepairMode() and
|
|
// getNumHostsInRepairMode7() functions defined in
|
|
// PingServer.h
|
|
if ( h == g_hostdb.m_myHost ) continue;
|
|
// get repair mode
|
|
int32_t repairMode = h->m_repairMode;
|
|
// is it a min?
|
|
if ( repairMode < min || min == -1 ) {
|
|
// we got a new minner
|
|
min = repairMode;
|
|
minh = h;
|
|
}
|
|
// is it a max?
|
|
if ( repairMode > max || max == -1 ) {
|
|
// we got a new minner
|
|
max = repairMode;
|
|
maxh = h;
|
|
}
|
|
// . min0 is the lowest repair mode that is not 0
|
|
// . if they are all 0, then it will be 0
|
|
if ( repairMode == 0 ) continue;
|
|
if ( repairMode < min0 || min0 == -1 ) {
|
|
min0 = repairMode;
|
|
minh0 = h;
|
|
}
|
|
}
|
|
// set these guys to the min
|
|
m_minRepairMode = min;
|
|
m_minRepairModeHost = minh;
|
|
// and these to max
|
|
m_maxRepairMode = max;
|
|
m_maxRepairModeHost = maxh;
|
|
// if they are all 0 then this will be 0
|
|
m_minRepairModeBesides0 = min0;
|
|
m_minRepairModeBesides0Host = minh0;
|
|
}
|
|
|
|
//
|
|
// This code is used to cyclically ping all hosts in the network
|
|
//
|
|
|
|
// this wrapper is called once 100ms, or so...
|
|
void sleepWrapper ( int fd , void *state ) {
|
|
// . do not do anything if still doing initial pinging above
|
|
// . in fact, launch as many pings as we can right now
|
|
//if ( ! g_host0Replied ||
|
|
// g_pingServer.m_totalLaunched <
|
|
// (g_hostdb.getNumHosts() + g_hostdb.getNumProxy()) ) {
|
|
// g_pingServer.sendPingsToAll();
|
|
// return;
|
|
//}
|
|
g_pingServer.sendPingsToAll();//pingNextHost ( );
|
|
}
|
|
|
|
|
|
/*
|
|
void PingServer::pingNextHost ( ) {
|
|
|
|
skip:
|
|
// . don't use more than 32 UdpSlots for pinging
|
|
// . don't use more than numHosts UdpSlots for pinging
|
|
int32_t n = g_hostdb.getNumHosts();
|
|
if ( m_pingProxy )
|
|
n = g_hostdb.getNumProxy();
|
|
int32_t max = n;
|
|
if ( n > 32 ) max = 32;
|
|
|
|
if ( s_outstandingPings >= max ) return;
|
|
// the next hostid to ping
|
|
static int32_t s_nextHostId = 0;
|
|
// cycle through pinging different hosts
|
|
// get next host to ping
|
|
if ( s_nextHostId >= n ) {
|
|
s_nextHostId = 0;
|
|
// toggle shotgun if we should
|
|
if ( g_conf.m_useShotgun ) {
|
|
if ( m_useShotgun ) {
|
|
m_useShotgun = 0;
|
|
if(g_hostdb.getNumProxy())
|
|
m_pingProxy ^= 1;
|
|
}
|
|
else {
|
|
m_useShotgun = 1;
|
|
}
|
|
}
|
|
else {
|
|
if(g_hostdb.getNumProxy())
|
|
m_pingProxy ^= 1;
|
|
}
|
|
}
|
|
// always turn off if we should
|
|
if ( ! g_conf.m_useShotgun ) m_useShotgun = 0;
|
|
// ping it
|
|
Host *h = g_hostdb.getHost(s_nextHostId);
|
|
if ( m_pingProxy )
|
|
h = g_hostdb.getProxy(s_nextHostId);
|
|
pingHost ( h, m_useShotgun );
|
|
// inc next host to ping
|
|
s_nextHostId++;
|
|
}
|
|
*/
|
|
|
|
//////////////////////////////////////////////////////
|
|
// email sending code
|
|
//////////////////////////////////////////////////////
|
|
|
|
// we can send emails when a host is detected as dead
|
|
|
|
// . returns false if blocked, true otherwise
|
|
// . sets g_errno on error
|
|
bool PingServer::sendEmail ( Host *h ,
|
|
char *errmsg ,
|
|
bool sendToAdmin ,
|
|
bool oom ,
|
|
bool kernelErrors ,
|
|
bool parmChanged ,
|
|
bool forceIt ,
|
|
int32_t mxIP ) { // 0 means none
|
|
// clear this
|
|
g_errno = 0;
|
|
// not if we have outstanding requests
|
|
if ( m_numReplies2 < m_numRequests2 ) {
|
|
log("net: Email not sent since there are %"INT32" outstanding "
|
|
"replies.",m_numReplies2 - m_numRequests2);
|
|
return true;
|
|
}
|
|
// throttle the oom sends
|
|
if ( oom ) {
|
|
static int32_t s_lastOOMTime = 0;
|
|
int32_t now = getTimeLocal();
|
|
// space 15 minutes apart
|
|
if ( now - s_lastOOMTime < 15*60 ) return true;
|
|
// set time
|
|
s_lastOOMTime = now;
|
|
}
|
|
// always force these now because they are messing up our latency graph
|
|
if ( oom ) forceIt = true;
|
|
// . even if we don't send an email, log it
|
|
// . return if alerts disabled
|
|
if ( ! g_conf.m_sendEmailAlerts && ! forceIt ) {
|
|
// . only log if this is his first time as being detected dead
|
|
// . this is useful cuz it might hint at a down link
|
|
if ( h != NULL && h->m_emailCode == 0 ) {
|
|
h->m_emailCode = 1;
|
|
//log("net: Host #%"INT32" is dead. Has not responded to "
|
|
// "ping in %"INT32" ms.",h->m_hostId,
|
|
// (int32_t)g_conf.m_deadHostTimeout);
|
|
}
|
|
return true;
|
|
}
|
|
// bitch and bail if h is NULL and this is a dead host msg
|
|
if ( ! h && errmsg == NULL) {
|
|
g_errno = EBADENGINEER;
|
|
log(LOG_LOGIC,"net: pingserver: Host ptr is NULL 2.");
|
|
return true;
|
|
}
|
|
// if he's not open for alerts,return (see PingServer.h) for alert defn
|
|
if ( h && h->m_emailCode != 0 ) return true;
|
|
|
|
// don't send another email until the last us we alerted revives
|
|
if ( h && s_lastSentHostId >= 0 ) {
|
|
// in fact, even after the lastSentHostId host comes back
|
|
// up, don't send to these guys who were dead before it came
|
|
// back up
|
|
h->m_emailCode = -5;
|
|
return true;
|
|
}
|
|
|
|
char msgbuf[2048];
|
|
if( h ) { //as a special case construct the error message here if
|
|
// we have a host.
|
|
// . are we the designated host to send the email alert?
|
|
// . our hostid must be the next alive hostId
|
|
int32_t dhid = h->m_hostId;
|
|
Host *dh = g_hostdb.getHost ( dhid );
|
|
Host *origdh = dh;
|
|
//while ( dh && dh->m_ping >= g_conf.m_deadHostTimeout ) {
|
|
int32_t totalCount = 0;
|
|
while ( dh && ( g_hostdb.isDead ( dh ) || dh == origdh ) ) {
|
|
if ( ++dhid >= g_hostdb.getNumHosts() ) dhid = 0;
|
|
dh = g_hostdb.getHost ( dhid );
|
|
if ( totalCount++ >= g_hostdb.getNumHosts() ) break;
|
|
}
|
|
// . if we're not the next alive host in line to send, bail
|
|
// . if next-in-line crashes before he sends then there will be
|
|
// a cascade affect and we could end up sending a BUNCH of
|
|
// emails so prevent that now by setting his m_emailCode
|
|
if ( dhid != g_hostdb.m_hostId ) {
|
|
h->m_emailCode = -3;
|
|
return true;
|
|
}
|
|
|
|
// mark him as in progress
|
|
h->m_emailCode = -4;
|
|
// a host or proxy?
|
|
char *nm = "Host";
|
|
if ( h->m_isProxy ) nm = "Proxy";
|
|
// note it in the log
|
|
if ( oom )
|
|
log("net: %s %s #%"INT32" is out of mem for %"INT32" ms. "
|
|
"Sending email alert.",h->m_hostname,nm,
|
|
h->m_hostId,
|
|
(int32_t)g_conf.m_sendEmailTimeout);
|
|
else if ( kernelErrors )
|
|
log("net: %s %s #%"INT32" has an error in the kernel. "
|
|
"Sending email alert.",h->m_hostname,nm,
|
|
h->m_hostId);
|
|
else
|
|
log("net: %s %s #%"INT32" is dead. Has not responded to "
|
|
"ping in %"INT32" ms. Sending email alert.",
|
|
h->m_hostname,nm,h->m_hostId,
|
|
(int32_t)g_conf.m_sendEmailTimeout);
|
|
// . make the msg
|
|
// . put host0 ip in ()'s so we know what network it was
|
|
Host *h0 = g_hostdb.getHost ( 0 );
|
|
int32_t ip0 = 0;
|
|
if ( h0 ) ip0 = h0->m_ip;
|
|
char *desc = "dead";
|
|
if ( oom ) desc = "out of memory";
|
|
else if ( kernelErrors ) desc = "having kernel errors";
|
|
sprintf ( msgbuf , "%s %s %"INT32" is %s. cluster=%s (%s)",
|
|
h->m_hostname,nm,
|
|
h->m_hostId, desc, g_conf.m_clusterName,iptoa(ip0));
|
|
errmsg = msgbuf;
|
|
}
|
|
|
|
// . returns false if blocked, true otherwise
|
|
// . sets g_errno on error
|
|
bool status = true;
|
|
m_numRequests2 = 0;
|
|
m_numReplies2 = 0;
|
|
|
|
// sysadmin
|
|
if ( g_conf.m_sendEmailAlertsToSysadmin && sendToAdmin ) {
|
|
m_numRequests2++;
|
|
if ( ! sendAdminEmail ( h,
|
|
"sysadmin@gigablast.com",
|
|
"sysadmin@gigablast.com",
|
|
errmsg ,
|
|
"mail.gigablast.com" ) )
|
|
status = false;
|
|
}
|
|
|
|
// set the max for sanity checking in gotdoc
|
|
m_maxRequests2 = m_numRequests2;
|
|
|
|
bool delay = g_conf.m_delayNonCriticalEmailAlerts;
|
|
// oom is always critical, as is kernel errors
|
|
if ( oom || kernelErrors ) delay = false;
|
|
|
|
// if delay non critical email alerts is true do not send email
|
|
// alerts about dead hosts to anyone except sysadmin@gigablast.com
|
|
// between 10:00pm and 9:30am unless all the other twins of the
|
|
// dead host are also dead. Instead, wait till after 9:30 am if
|
|
// the host is still dead.
|
|
if ( delay && h && sendToAdmin ) {
|
|
|
|
/*
|
|
int32_t deaHr,deaMin,debHr,debMin;
|
|
char hr[3],min[3];
|
|
hr[2] = '\0';
|
|
min[2] = '\0';
|
|
gbmemcpy ( hr, g_conf.m_delayEmailsAfter, 2 );
|
|
deaHr = atoi(hr);
|
|
gbmemcpy ( min, g_conf.m_delayEmailsAfter + 3, 2 );
|
|
deaMin = atoi(min);
|
|
gbmemcpy ( hr, g_conf.m_delayEmailsBefore, 2 );
|
|
debHr = atoi(hr);
|
|
gbmemcpy ( min, g_conf.m_delayEmailsBefore + 3, 2 );
|
|
debMin = atoi(min);
|
|
//get the current time. use getTime() because
|
|
//then it is sync'ed with host 0's time.
|
|
time_t rawTime = getTime();
|
|
struct tm *timeInfo;
|
|
//timeInfo stores the lasthour time
|
|
timeInfo = localtime ( &rawTime );
|
|
char buf[64];
|
|
strftime ( buf , 100 , "%b %d %T", timeInfo);
|
|
//log ( LOG_WARN,"net: pingserver: local time is %s when "
|
|
// "host died",buf );
|
|
//tm struct has hours from 0-23
|
|
|
|
int32_t tmHr = timeInfo->tm_hour;
|
|
int32_t tmMin = timeInfo->tm_min;
|
|
|
|
bool delay = false;
|
|
|
|
if ( ( tmHr > deaHr || ( tmHr == deaHr && tmMin > deaMin ) ) &&
|
|
( tmHr < debHr || ( tmHr == debHr && tmMin < debMin ) ) )
|
|
delay = true;
|
|
|
|
else if ( ( tmHr > deaHr ||
|
|
( tmHr == deaHr && tmMin > deaMin ) ) &&
|
|
( debHr < deaHr ||
|
|
( debHr == deaHr && debMin < deaMin ) ) )
|
|
delay = true;
|
|
|
|
else if ( ( tmHr < debHr ||
|
|
( tmHr == debHr && tmMin < debMin ) ) &&
|
|
( deaHr > debHr ||
|
|
( deaHr == debHr && deaMin > debMin ) ) )
|
|
delay = true;
|
|
*/
|
|
|
|
// always delay no matter the time now
|
|
bool delay = true;
|
|
|
|
if ( delay ) {
|
|
|
|
//check if the hosts twins are dead too
|
|
int32_t numTwins = 0;
|
|
Host *hosts = g_hostdb.getShard( h->m_shardNum,
|
|
&numTwins );
|
|
int32_t i = 0;
|
|
while ( i < numTwins ){
|
|
if ( !g_hostdb.isDead ( hosts[i].m_hostId ) )
|
|
break;
|
|
i++;
|
|
}
|
|
|
|
//if no twin is alive, emergency ! send email !
|
|
//if even one twin is alive, don't send now
|
|
if ( i == numTwins ) goto skipSleep;
|
|
/*
|
|
time_t rawTime2 = getTime();
|
|
struct tm *timeInfo2;
|
|
//timeInfo stores the lasthour UTC time
|
|
timeInfo2=localtime(&rawTime2);
|
|
//change the time to delayemailsBefore
|
|
timeInfo2->tm_hour = debHr;
|
|
timeInfo2->tm_min = debMin;
|
|
rawTime2 = mktime ( timeInfo2 );
|
|
|
|
double diffTime = difftime ( rawTime2 , rawTime );
|
|
|
|
// if difftime is neg, that means we were yesterday,
|
|
// so add 1 day to get real diff
|
|
if (diffTime < 0 )
|
|
diffTime = (60 * 60 * 24 ) + diffTime;
|
|
|
|
log( LOG_WARN,"net: pingserver: Delay non critical "
|
|
"email alerts on. Trying to send email after %"INT32" "
|
|
"seconds",(int32_t) diffTime );
|
|
|
|
int32_t wait = (int32_t)diffTime * 1000 ;//ms
|
|
// wake up after so many seconds
|
|
g_loop.registerSleepCallback( wait, h,
|
|
emailSleepWrapper );
|
|
*/
|
|
return true;
|
|
}
|
|
}
|
|
|
|
skipSleep:
|
|
// matt tmobile
|
|
//if ( g_conf.m_sendEmailAlertsToMattTmobile ) {
|
|
// m_numRequests2++;
|
|
// if ( ! pageTMobile ( h, errmsg))
|
|
// status = false;
|
|
//}
|
|
// matt alltel
|
|
/*
|
|
if ( g_conf.m_sendEmailAlertsToMattAlltell ) {
|
|
m_numRequests2++;
|
|
if ( ! sendAdminEmail ( h,
|
|
"sysadmin@gigablast.com",
|
|
"5054503518@message.alltel.com",
|
|
errmsg ,
|
|
"mms-mta.alltel.net" ))
|
|
status = false;
|
|
}
|
|
*/
|
|
// matt alltel
|
|
// if ( g_conf.m_sendEmailAlertsToMattAlltell ) {
|
|
// m_numRequests2++;
|
|
// //if ( ! pageAlltel ( h, errmsg , "5053626809" ) )
|
|
// if ( ! pageAlltel ( h, errmsg , "5054503518" ) )
|
|
// status = false;
|
|
// }
|
|
// matt verizon
|
|
//if ( g_conf.m_sendEmailAlertsToMattVerizon ) {
|
|
// m_numRequests2++;
|
|
// if ( ! pageVerizon ( h, errmsg ) )
|
|
// status = false;
|
|
//}
|
|
|
|
|
|
|
|
// melissa
|
|
// if ( g_conf.m_sendEmailAlertsToMelissa ) {
|
|
// m_numRequests2++;
|
|
// if ( ! pageAlltel ( h, errmsg , "5054292121" ) ) // melissa
|
|
// status = false;
|
|
// }
|
|
|
|
// partap
|
|
/*
|
|
if ( g_conf.m_sendEmailAlertsToPartap ) {
|
|
m_numRequests2++;
|
|
//if ( ! pageSprintPCS ( h, errmsg , "5056889554") )
|
|
// status = false;
|
|
if ( ! sendAdminEmail ( h,
|
|
"sysadmin@gigablast.com",
|
|
"5056889554@mx.messaging.sprintpcs.com",
|
|
errmsg ,
|
|
"mx.messaging.sprintpcs.com") )
|
|
status = false;
|
|
}
|
|
|
|
// javier
|
|
if ( g_conf.m_sendEmailAlertsToJavier ) {
|
|
m_numRequests2++;
|
|
//if ( ! pageSprintPCS ( h, errmsg , "5056959513") )
|
|
// status = false;
|
|
if ( ! sendAdminEmail ( h,
|
|
"sysadmin@gigablast.com",
|
|
"5056959513@mx.messaging.sprintpcs.com",
|
|
errmsg ,
|
|
"mx.messaging.sprintpcs.com") )
|
|
status = false;
|
|
}
|
|
|
|
// cinco
|
|
// if ( g_conf.m_sendEmailAlertsToCinco ) {
|
|
// m_numRequests2++;
|
|
// //if ( ! pageSprintPCS ( h, errmsg , "5054179933") )
|
|
// // status = false;
|
|
// if ( ! sendAdminEmail ( h,
|
|
// "sysadmin@gigablast.com",
|
|
// "5054179933@mx.messaging.sprintpcs.com",
|
|
// errmsg ) )
|
|
// status = false;
|
|
// }
|
|
|
|
// zak
|
|
if ( g_conf.m_sendEmailAlertsToZak) {
|
|
m_numRequests2++;
|
|
if ( ! sendAdminEmail ( h,
|
|
"zak@gigablast.com",
|
|
"zak@gigablast.com",
|
|
errmsg ,
|
|
"mail.gigablast.com" ) )
|
|
status = false;
|
|
m_numRequests2++;
|
|
if ( ! sendAdminEmail ( h,
|
|
"sysadmin@gigablast.com",
|
|
"5052204556@message.alltel.com",
|
|
errmsg ,
|
|
"mms-mta.alltel.net" ))
|
|
status = false;
|
|
}
|
|
if ( g_conf.m_sendEmailAlertsToSabino ) {
|
|
m_numRequests2++;
|
|
//if ( ! pageSprintPCS ( h, errmsg , "5056959513") )
|
|
// status = false;
|
|
if ( ! sendAdminEmail ( h,
|
|
"sysadmin@gigablast.com",
|
|
"7082076550@mobile.mycingular.com",
|
|
errmsg ,
|
|
"mx.mycingular.net" ) )
|
|
status = false;
|
|
}
|
|
*/
|
|
|
|
bool e1 = g_conf.m_sendEmailAlertsToEmail1;
|
|
bool e2 = g_conf.m_sendEmailAlertsToEmail2;
|
|
bool e3 = g_conf.m_sendEmailAlertsToEmail3;
|
|
bool e4 = g_conf.m_sendEmailAlertsToEmail4;
|
|
|
|
// some people don't want parm change alerts
|
|
if ( parmChanged && ! g_conf.m_sendParmChangeAlertsToEmail1) e1=false;
|
|
if ( parmChanged && ! g_conf.m_sendParmChangeAlertsToEmail2) e2=false;
|
|
if ( parmChanged && ! g_conf.m_sendParmChangeAlertsToEmail3) e3=false;
|
|
if ( parmChanged && ! g_conf.m_sendParmChangeAlertsToEmail4) e4=false;
|
|
|
|
// point to provided IP as string
|
|
char *mxIPStr = NULL;
|
|
char ipBuf[64];
|
|
if ( mxIP ) {
|
|
sprintf(ipBuf,"%s",iptoa(mxIP));
|
|
mxIPStr = ipBuf;
|
|
}
|
|
|
|
if ( e1 ) {
|
|
m_numRequests2++;
|
|
m_maxRequests2++;
|
|
char *mxHost = g_conf.m_email1MX;
|
|
if ( mxIP ) mxHost = mxIPStr;
|
|
if ( ! sendAdminEmail ( h,
|
|
g_conf.m_email1From,
|
|
g_conf.m_email1Addr,
|
|
errmsg,
|
|
mxHost ) ) // g_conf.m_email1MX) )
|
|
status = false;
|
|
}
|
|
if ( e2 ) {
|
|
m_numRequests2++;
|
|
m_maxRequests2++;
|
|
char *mxHost = g_conf.m_email2MX;
|
|
if ( mxIP ) mxHost = mxIPStr;
|
|
if ( ! sendAdminEmail ( h,
|
|
g_conf.m_email2From,
|
|
g_conf.m_email2Addr,
|
|
errmsg,
|
|
mxHost ) ) // g_conf.m_email2MX) )
|
|
status = false;
|
|
}
|
|
if ( e3 ) {
|
|
m_numRequests2++;
|
|
m_maxRequests2++;
|
|
char *mxHost = g_conf.m_email3MX;
|
|
if ( mxIP ) mxHost = mxIPStr;
|
|
if ( ! sendAdminEmail ( h,
|
|
g_conf.m_email3From,
|
|
g_conf.m_email3Addr,
|
|
errmsg,
|
|
mxHost ) ) // g_conf.m_email3MX) )
|
|
status = false;
|
|
}
|
|
if ( e4 ) {
|
|
m_numRequests2++;
|
|
m_maxRequests2++;
|
|
char *mxHost = g_conf.m_email4MX;
|
|
if ( mxIP ) mxHost = mxIPStr;
|
|
if ( ! sendAdminEmail ( h,
|
|
g_conf.m_email4From,
|
|
g_conf.m_email4Addr,
|
|
errmsg,
|
|
mxHost ) ) // g_conf.m_email4MX) )
|
|
status = false;
|
|
}
|
|
|
|
// set the max for sanity checking below
|
|
//m_maxRequests2 = m_numRequests2;
|
|
// did we block or not? return true if nobody blocked
|
|
return status;
|
|
}
|
|
|
|
void emailSleepWrapper ( int fd, void *state ){
|
|
g_loop.unregisterSleepCallback( state, emailSleepWrapper );
|
|
//check if the host is still dead and if the last host to die is this
|
|
//host
|
|
Host *h = (Host *)state;
|
|
//if he is still dead and no other host has died after it
|
|
if ( g_hostdb.isDead( h ) && s_lastSentHostId == h->m_hostId ){
|
|
//reset s_lastSentHostId for sendEmail()
|
|
s_lastSentHostId = -1;
|
|
//reset hosts emailcode so that it sends the email
|
|
h->m_emailCode = 0;
|
|
g_pingServer.sendEmail ( h , NULL , false );//sendToAdmin
|
|
}
|
|
//if we're alive then no need to send email. If some other host has
|
|
//died, that host will send an email anyway.
|
|
}
|
|
|
|
|
|
#include "HttpServer.h"
|
|
static void gotDocWrapper ( void *state , TcpSocket *ts ) ;
|
|
|
|
// JAB: warning abatement
|
|
#if 0
|
|
// . returns false if blocks, true otherwise
|
|
// . sets g_errno on error
|
|
bool pageTMobile ( Host *h , char *errmsg) {
|
|
// log("EMAIL SENT %s!!!!!!!!!!!!!!!",errmsg);
|
|
// return true;
|
|
// for debug without actually wasting our cell phone instant messages
|
|
//log("EMAIL SENT hid=%"INT32"!!!!!!!!!!!!!!!",h->m_hostId);
|
|
//gotDocWrapper ( h , NULL );
|
|
//return true;
|
|
// looks like TcpServer won't let us use a static sendBuf
|
|
// cuz we cannot just set TcpSocket->m_sendBuf to NULL to stop
|
|
// the freeing cuz if TcpServer::gotIp() has an error it
|
|
// frees it sendBuf. This happens in other places too.
|
|
void * state = h;
|
|
char *buf = (char *) mmalloc ( PAGER_BUF_SIZE , "PingServer" );
|
|
|
|
// bail on malloc error
|
|
if ( ! buf ) {
|
|
log("net: Could not allocate %"INT32" bytes to send email "
|
|
"to mobile." , (int32_t)PAGER_BUF_SIZE);
|
|
g_pingServer.m_numReplies++;
|
|
return true;
|
|
}
|
|
char *p = buf;
|
|
char *pend = buf + PAGER_BUF_SIZE;
|
|
// send the request
|
|
sprintf ( p ,
|
|
//"POST /mytmobile/login/default.asp?"
|
|
//"POST /messaging/default.asp?"
|
|
"POST /customer_site/jsp/messaging_lo.jsp?"
|
|
"To=5054503518 HTTP/1.0\r\n"
|
|
"Accept: image/gif, image/x-xbitmap, image/jpeg, "
|
|
"image/pjpeg, application/x-shockwave-flash, "
|
|
"application/msword, */*\r\n"
|
|
"Accept-Language: en-us\r\n"
|
|
"Content-Type: application/x-www-form-urlencoded\r\n"
|
|
"Accept-Encoding: gzip, deflate\r\n"
|
|
"User-Agent: Mozilla/4.0 "
|
|
"(compatible; MSIE 6.0; Windows 98; Win 9x 4.90)\r\n"
|
|
//"Host: www.t-mobile.com\r\n"
|
|
"Host: wmg.tmomail.net\r\n"
|
|
"Content-Length: xxx\r\n"
|
|
//"Connection: Keep-Alive\r\n"
|
|
"Connection: close\r\n"
|
|
"Cache-Control: no-cache\r\n\r\n"
|
|
//"txtNum=5054503518&txtFrom=gb&"
|
|
"DOMAIN_NAME=@tmomail.com&"
|
|
"min=5054503518&"
|
|
"require_sender=gb&"
|
|
//"hdnpublic=1&"
|
|
"msgTermsUse=on&"
|
|
//"txtMessage=");
|
|
//"trackResponses=No&"
|
|
//"Send.x=Yes&"
|
|
"text=");
|
|
p += gbstrlen ( p );
|
|
// append the err msg, but convert spaces to +'s
|
|
int32_t i;
|
|
for ( i = 0 ; errmsg[i] && p + 4 < pend ; i++ ) {
|
|
if ( errmsg[i] == ' ' )
|
|
*p++ = '+';
|
|
else if ( errmsg[i] == '.' ) {
|
|
p[0] = '%';
|
|
p[1] = '2';
|
|
p[2] = 'E';
|
|
p += 3;
|
|
}
|
|
else
|
|
*p++ = errmsg[i];
|
|
}
|
|
*p = '\0';
|
|
int32_t bufLen = p - buf;
|
|
// replace xxx
|
|
char *s = strstr ( buf , "xxx" );
|
|
char *t = strstr ( buf , "\r\n\r\n" ) + 4;
|
|
int32_t clen = bufLen - ( t - buf );
|
|
sprintf ( s , "%3li" , clen );
|
|
s [ 3 ] = '\r';
|
|
|
|
// post it up
|
|
// borrow our HttpServer's TcpServer
|
|
//TcpServer *ts = g_httpServer.getTcp();
|
|
TcpServer *ts = g_httpServer.getSSLTcp();
|
|
if ( ! ts ) {
|
|
log("db: Could not page alltel because "
|
|
"no gb.pem file in working dir so "
|
|
"SSL server is unavailable.");
|
|
return true;
|
|
}
|
|
if ( ! ts->sendMsg ( //"65.161.188.152" ,
|
|
//gbstrlen("65.161.188.152") ,
|
|
"63.122.5.193",
|
|
gbstrlen("63.122.5.193"),
|
|
443 , // https, 80 ,
|
|
buf ,
|
|
PAGER_BUF_SIZE ,
|
|
bufLen ,
|
|
bufLen ,
|
|
(void *)h ,
|
|
gotDocWrapper ,
|
|
60*1000 ,
|
|
100*1024 , // maxTextDocLen
|
|
100*1024 )) // maxOtherDocLen
|
|
return false;
|
|
// we did not block, so update h->m_emailCode
|
|
gotDocWrapper (state, NULL );
|
|
// we did not block
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
// JAB: warning abatement
|
|
#if 0
|
|
// . returns false if blocks, true otherwise
|
|
// . sets g_errno on error
|
|
bool pageAlltel ( Host *h , char *errmsg , char *num ) {
|
|
|
|
// for debug without actually wasting our cell phone instant messages
|
|
//log("EMAIL SENT hid=%"INT32"!!!!!!!!!!!!!!!",h->m_hostId);
|
|
//gotDocWrapper ( h , NULL );
|
|
//return true;
|
|
// looks like TcpServer won't let us use a static sendBuf
|
|
// cuz we cannot just set TcpSocket->m_sendBuf to NULL to stop
|
|
// the freeing cuz if TcpServer::gotIp() has an error it
|
|
// frees it sendBuf. This happens in other places too.
|
|
void * state = h;
|
|
char *buf = (char *) mmalloc ( PAGER_BUF_SIZE , "PingServer" );
|
|
// bail on malloc error
|
|
if ( ! buf ) {
|
|
log("net: Could not allocate %"INT32" bytes to send email "
|
|
"to alltel." , PAGER_BUF_SIZE);
|
|
g_pingServer.m_numReplies++;
|
|
return true;
|
|
}
|
|
char *p = buf;
|
|
char *pend = buf + PAGER_BUF_SIZE;
|
|
// send the request
|
|
char *host = "message.alltel.com";
|
|
char *ip = "166.102.172.2"; // message.alltel.com
|
|
//char *num = "5054292121"; // melissa
|
|
|
|
sprintf ( p ,
|
|
//"POST /mytmobile/login/default.asp?"
|
|
"POST /customer_site/jsp/messaging.jsp"
|
|
" HTTP/1.0\r\n"
|
|
"Accept: image/gif, image/x-xbitmap, image/jpeg, "
|
|
"image/pjpeg, application/x-shockwave-flash, "
|
|
"application/msword, */*\r\n"
|
|
"Accept-Language: en-us\r\n"
|
|
"Content-Type: application/x-www-form-urlencoded\r\n"
|
|
"Accept-Encoding: gzip, deflate\r\n"
|
|
"User-Agent: Mozilla/4.0 "
|
|
"(compatible; MSIE 6.0; Windows 98; Win 9x 4.90)\r\n"
|
|
"Host: %s\r\n" // www.t-mobile.com
|
|
"Content-Length: xxx\r\n"
|
|
//"Connection: Keep-Alive\r\n"
|
|
"Connection: close\r\n"
|
|
"Cache-Control: no-cache\r\n\r\n"
|
|
// post data
|
|
"trackResponses=No&"
|
|
"devicetype=1&"
|
|
"DOMAIN_NAME=@vtext.com&"
|
|
"sender=gb&"
|
|
"min=%s&" // phone number
|
|
"callback=&"
|
|
"count=160&"
|
|
"type=1&" // 1=high priority, 0=normal
|
|
"text=" ,
|
|
host , num );
|
|
p += gbstrlen ( p );
|
|
// append the err msg, but convert spaces to +'s
|
|
int32_t i;
|
|
for ( i = 0 ; errmsg[i] && p + 4 < pend ; i++ ) {
|
|
if ( errmsg[i] == ' ' )
|
|
*p++ = '+';
|
|
else if ( errmsg[i] == '.' ) {
|
|
p[0] = '%';
|
|
p[1] = '2';
|
|
p[2] = 'E';
|
|
p += 3;
|
|
}
|
|
else
|
|
*p++ = errmsg[i];
|
|
}
|
|
*p = '\0';
|
|
int32_t bufLen = p - buf;
|
|
// replace xxx
|
|
char *s = strstr ( buf , "xxx" );
|
|
char *t = strstr ( buf , "\r\n\r\n" ) + 4;
|
|
int32_t clen = bufLen - ( t - buf );
|
|
sprintf ( s , "%3li" , clen );
|
|
s [ 3 ] = '\r';
|
|
|
|
// post it up
|
|
// borrow our HttpServer's TcpServer
|
|
TcpServer *ts = g_httpServer.getTcp();
|
|
if ( ! ts->sendMsg ( ip ,
|
|
gbstrlen(ip) ,
|
|
80 ,
|
|
buf ,
|
|
PAGER_BUF_SIZE ,
|
|
bufLen ,
|
|
bufLen ,
|
|
state ,
|
|
gotDocWrapper ,
|
|
60*1000 ,
|
|
100*1024 , // maxTextDocLen
|
|
100*1024 )) // maxOtherDocLen
|
|
return false;
|
|
// we did not block, so update h->m_emailCode
|
|
gotDocWrapper ( state , NULL );
|
|
// we did not block
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
|
|
// JAB: warning abatement
|
|
#if 0
|
|
// . returns false if blocks, true otherwise
|
|
// . sets g_errno on error
|
|
bool pageSprintPCS ( Host *h , char *errmsg , char *num) {
|
|
char *host = "messaging.sprintpcs.com";
|
|
//char *num = "2813004108"; // partap
|
|
|
|
// for debug without actually wasting our cell phone instant messages
|
|
//log("EMAIL SENT hid=%"INT32"!!!!!!!!!!!!!!!",h->m_hostId);
|
|
//gotDocWrapper ( h , NULL );
|
|
//return true;
|
|
// looks like TcpServer won't let us use a static sendBuf
|
|
// cuz we cannot just set TcpSocket->m_sendBuf to NULL to stop
|
|
// the freeing cuz if TcpServer::gotIp() has an error it
|
|
// frees it sendBuf. This happens in other places too.
|
|
void * state = h;
|
|
|
|
char *buf = (char *) mmalloc ( PAGER_BUF_SIZE , "PingServer" );
|
|
// bail on malloc error
|
|
if ( ! buf ) {
|
|
log("net: Could not allocate %"INT32" bytes to send email "
|
|
"to sprint pcs." , PAGER_BUF_SIZE);
|
|
g_pingServer.m_numReplies++;
|
|
return true;
|
|
}
|
|
|
|
char *p = buf;
|
|
char *pend = buf + PAGER_BUF_SIZE;
|
|
|
|
// save ptr to host or email type
|
|
*(Host **)(p+PAGER_BUF_SIZE-4) = (Host*)state;
|
|
|
|
// then the message to send
|
|
sprintf ( p ,
|
|
//"POST /mytmobile/login/default.asp?"
|
|
"POST /textmessaging/composeconfirm"
|
|
//;jsessionid=CSRtp1YQo3WyzNL0edNEHsI4oN2IloYE9buvqYaO
|
|
//Pgijz4Ht1WUO!1942930617!182751426!5070!7002"
|
|
" HTTP/1.0\r\n"
|
|
"Accept: image/gif, image/x-xbitmap, image/jpeg, "
|
|
"image/pjpeg, application/x-shockwave-flash, "
|
|
"application/msword, */*\r\n"
|
|
"Accept-Language: en-us\r\n"
|
|
"Content-Type: application/x-www-form-urlencoded\r\n"
|
|
"Accept-Encoding: gzip, deflate\r\n"
|
|
"User-Agent: Mozilla/4.0 "
|
|
"(compatible; MSIE 6.0; Windows 98; Win 9x 4.90)\r\n"
|
|
"Host: %s\r\n" // www.t-mobile.com
|
|
"Content-Length: xxx\r\n"
|
|
//"Connection: Keep-Alive\r\n"
|
|
"Connection: close\r\n"
|
|
"Cookie: \r\n"
|
|
"Cache-Control: no-cache\r\n\r\n"
|
|
// post data
|
|
"randomToken=&"
|
|
"phoneNumber=%s&" // phone number
|
|
//"characters=%"INT32"&"
|
|
//"characters=160&"
|
|
//"callBackNumber=&"
|
|
"message=" ,
|
|
host , num );//, (int32_t)160-gbstrlen(errmsg) );
|
|
p += gbstrlen ( p );
|
|
// append the err msg, but convert spaces to +'s
|
|
int32_t i;
|
|
for ( i = 0 ; errmsg[i] && p + 4 < pend ; i++ ) {
|
|
if ( errmsg[i] == ' ' )
|
|
*p++ = '+';
|
|
else if ( errmsg[i] == '.' ) {
|
|
p[0] = '%';
|
|
p[1] = '2';
|
|
p[2] = 'E';
|
|
p += 3;
|
|
}
|
|
else
|
|
*p++ = errmsg[i];
|
|
}
|
|
*p = '\0';
|
|
//int32_t bufLen = p - buf;
|
|
|
|
// gotta get the cookie
|
|
Url url;
|
|
//char *uu = "http://65.174.43.73/textmessaging/compose";
|
|
char *uu = "http://144.230.162.49/textmessaging/compose";
|
|
//char *uu = "http://messaging.sprintpcs.com/textmessaging/compose";
|
|
url.set(uu,gbstrlen(uu));
|
|
if ( ! g_httpServer.getDoc ( &url ,
|
|
0 , // offset
|
|
-1 , // size
|
|
false , // m_ifModifiedSince
|
|
buf , // state
|
|
sprintPCSWrapper , //
|
|
30*1000 , // timeout
|
|
0 , // m_proxyIp
|
|
0 , // m_proxyPort
|
|
100*1024 , // m_maxTextDocLen
|
|
100*1024 , // m_maxOtherDocLen
|
|
"Mozilla/4.0 " // user agent
|
|
"(compatible; MSIE 6.0; Windows 98; "
|
|
"Win 9x 4.90)\r\n" ) )
|
|
return false;
|
|
// must have been an error
|
|
log("net: Got error getting page from Sprint PCS: %s.",
|
|
mstrerror(g_errno));
|
|
// always call this at the end
|
|
return pageSprintPCS2 ( buf , NULL );
|
|
}
|
|
#endif
|
|
|
|
// JAB: warning abatement
|
|
#if 0
|
|
void sprintPCSWrapper ( void *state , TcpSocket *ts) {
|
|
pageSprintPCS2 ( state , ts );
|
|
}
|
|
#endif
|
|
|
|
// JAB: warning abatement
|
|
#if 0
|
|
bool pageSprintPCS2 ( void *state , TcpSocket *s) {
|
|
|
|
char *ip = "65.174.43.73";
|
|
|
|
char *buf = (char *)state;
|
|
int32_t bufLen = gbstrlen(buf);
|
|
|
|
char *p = buf;
|
|
char *pend = p + bufLen;
|
|
|
|
// last 4 bytes of buffer is the Host ptr
|
|
|
|
|
|
void* state2 = (p+PAGER_BUF_SIZE-4);
|
|
|
|
if ( g_errno || ! s ) {
|
|
mfree ( buf , PAGER_BUF_SIZE , "PingServer" );
|
|
// always call this at the end
|
|
gotDocWrapper( state2 , NULL );
|
|
return true;
|
|
}
|
|
|
|
// sprint debug msg
|
|
//log("net: Got reply from Sprint PCS:\n%s",s->m_readBuf);
|
|
|
|
// get the cookie
|
|
HttpMime mime;
|
|
mime.set ( s->m_readBuf , s->m_readOffset , NULL );
|
|
char *cookie = mime.getCookie ( );
|
|
int32_t cookieLen = mime.getCookieLen ();
|
|
bufLen += cookieLen;
|
|
|
|
if ( ! cookie ) {
|
|
log("net: No cookie to send to Sprint PCS");
|
|
g_errno = EBADREPLY;
|
|
mfree ( buf , PAGER_BUF_SIZE , "PingServer" );
|
|
// always call this at the end
|
|
gotDocWrapper( state2 , NULL );
|
|
return true;
|
|
}
|
|
|
|
// part the request to make room for cookie
|
|
char *ss = strstr ( p , "Cookie: ");
|
|
if ( ! ss ) { char *xx = NULL; *xx = 0; }
|
|
ss += 8; // points to right after the space
|
|
// insert the cookie
|
|
gbmemcpy ( ss+cookieLen , ss , pend - ss );
|
|
gbmemcpy ( ss , cookie , cookieLen );
|
|
pend += cookieLen;
|
|
|
|
// replace xxx
|
|
char *sx = strstr ( buf , "xxx" );
|
|
char *t = strstr ( buf , "\r\n\r\n" ) + 4;
|
|
int32_t clen = bufLen - ( t - buf );
|
|
sprintf ( sx , "%3li" , clen );
|
|
sx [ 3 ] = '\r';
|
|
|
|
// sprint debug msg
|
|
//log("net: Sending to Sprint PCS:\n%s",buf);
|
|
|
|
// post it up
|
|
// borrow our HttpServer's TcpServer
|
|
TcpServer *ts = g_httpServer.getTcp();
|
|
if ( ! ts->sendMsg ( ip ,
|
|
gbstrlen(ip) ,
|
|
80 ,
|
|
buf ,
|
|
PAGER_BUF_SIZE ,
|
|
gbstrlen(buf) ,
|
|
gbstrlen(buf) ,
|
|
state2 ,
|
|
gotDocWrapper ,
|
|
60*1000 ,
|
|
100*1024 , // maxTextDocLen
|
|
100*1024 )) // maxOtherDocLen
|
|
return false;
|
|
// we did not block, so update h->m_emailCode
|
|
gotDocWrapper ( state2 , NULL );
|
|
// we did not block
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
// a bit of a hack for monitor.cpp
|
|
//int32_t g_emailMX1IPBackUp = 0;
|
|
|
|
bool sendAdminEmail ( Host *h,
|
|
char *fromAddress,
|
|
char *toAddress,
|
|
char *body ,
|
|
char *emailServIp) {
|
|
// create a new buffer
|
|
char *buf = (char *) mmalloc ( PAGER_BUF_SIZE , "PingServer" );
|
|
// fill the buffer
|
|
char *p = buf;
|
|
// helo line
|
|
p += sprintf(p, "HELO gigablast.com\r\n");
|
|
// mail line
|
|
p += sprintf(p, "MAIL from:<%s>\r\n", fromAddress);
|
|
// to line
|
|
p += sprintf(p, "RCPT to:<%s>\r\n", toAddress);
|
|
// data
|
|
p += sprintf(p, "DATA\r\n");
|
|
// body
|
|
p += sprintf(p, "To: %s\r\n", toAddress);
|
|
p += sprintf(p, "Subject: Sysadmin Event Message\r\n");
|
|
// mime header must be separated from body by an extra \r\n
|
|
p += sprintf(p, "\r\n");
|
|
p += sprintf(p, "%s", body);
|
|
// quit
|
|
p += sprintf(p, "\r\n.\r\nQUIT\r\n");
|
|
// get the length
|
|
int32_t buffLen = (p - buf);
|
|
// send the message
|
|
TcpServer *ts = g_httpServer.getTcp();
|
|
log ( LOG_WARN, "PingServer: Sending email to sysadmin:\n %s", buf );
|
|
//if ( !ts->sendMsg ( g_conf.m_smtpHost,
|
|
// gbstrlen(g_conf.m_smtpHost),
|
|
// g_conf.m_smtpPort,
|
|
char *ip = emailServIp;//"66.154.102.229"; // gf39, mail server ip
|
|
// use backup if there
|
|
//char ipString[64];
|
|
//if ( g_emailServIPBackup ) {
|
|
// iptoa(ipString,g_emailMX1IPBackup);
|
|
// ip = ipString;
|
|
//}
|
|
if ( !ts->sendMsg ( ip,
|
|
gbstrlen(ip),
|
|
25, // smtp (send mail transfer protocol) port
|
|
buf,
|
|
PAGER_BUF_SIZE,
|
|
buffLen,
|
|
buffLen,
|
|
h,
|
|
gotDocWrapper,
|
|
60*1000,
|
|
100*1024,
|
|
100*1024 ) )
|
|
return false;
|
|
// we did not block, so update h->m_emailCode
|
|
gotDocWrapper ( h , NULL );
|
|
// we did not block
|
|
return true;
|
|
}
|
|
|
|
|
|
void gotDocWrapper ( void *state , TcpSocket *s ) {
|
|
// keep track of how many we got
|
|
g_pingServer.m_numReplies2++;
|
|
if ( g_pingServer.m_numReplies2 > g_pingServer.m_maxRequests2 ) {
|
|
log(LOG_LOGIC,"net: too many replies received. "
|
|
"requests:%"INT32" replies:%"INT32" maxrequests:%"INT32"",
|
|
g_pingServer.m_numRequests2,
|
|
g_pingServer.m_numReplies2,
|
|
g_pingServer.m_maxRequests2);
|
|
//char *xx = NULL; *xx = 0;
|
|
}
|
|
Host *h = (Host *)state;
|
|
|
|
// if ( ! h ) { log("net: h is NULL in pingserver."); return; }
|
|
// don't let tcp server free the sendbuf, that's static
|
|
//s->m_sendBuf = NULL;
|
|
if ( g_errno ) {
|
|
if(h) {
|
|
log("net: Had error sending email to mobile for dead "
|
|
"hostId "
|
|
"#%"INT32": %s.", h->m_hostId,mstrerror(g_errno));
|
|
} else {
|
|
log("net: Had error sending email to mobile for "
|
|
"int32_t latency: %s.", mstrerror(g_errno));
|
|
}
|
|
// mark as 0 on error to try sending again later
|
|
//h->m_emailCode = 0;
|
|
// reset these errors just in case
|
|
g_errno = 0;
|
|
return;
|
|
}
|
|
// log it
|
|
if(h)
|
|
log("net: Email sent successfully for dead host #%"INT32".",
|
|
h->m_hostId);
|
|
else
|
|
log("net: Email sent successfully for int32_t latency.");
|
|
// . show the reply
|
|
// . seems to crash if we log the read buffer... no \0?
|
|
if ( s && s->m_readBuf )
|
|
log("net: Got messaging server reply #%"INT32".\n%s",
|
|
g_pingServer.m_numReplies2,s->m_readBuf );
|
|
// otherwise, success
|
|
if(h) {
|
|
h->m_emailCode = 1;
|
|
// . mark him as the one email we sent
|
|
// . don't send another email until he comes back up!
|
|
// . it's just a waste
|
|
s_lastSentHostId = h->m_hostId;
|
|
}
|
|
}
|
|
|
|
//////////////////////////////////////////////////
|
|
// the shutdown broadcast
|
|
//////////////////////////////////////////////////
|
|
|
|
// when a host goes down gracefully it lets all its peers know so they
|
|
// do not send requests to it.
|
|
|
|
// . broadcast shutdown notes
|
|
// . returns false if blocked, true otherwise
|
|
// . does not set g_errno
|
|
bool PingServer::broadcastShutdownNotes ( bool sendEmailAlert ,
|
|
void *state ,
|
|
void (* callback)(void *state) ) {
|
|
// don't broadcast on interface machines
|
|
if ( g_conf.m_interfaceMachine ) return true;
|
|
// only call once
|
|
if ( m_numRequests != m_numReplies ) return true;
|
|
// keep track
|
|
m_numRequests = 0;
|
|
m_numReplies = 0;
|
|
// save callback info
|
|
m_broadcastState = state;
|
|
m_broadcastCallback = callback;
|
|
// use this buffer
|
|
static char s_buf [5];
|
|
*(int32_t *)s_buf = g_hostdb.m_hostId;
|
|
// followed by sendEmailAlert
|
|
s_buf[4] = (char)sendEmailAlert;
|
|
|
|
int32_t np = g_hostdb.getNumProxy();
|
|
// do not send to proxies if we are a proxy
|
|
if ( g_hostdb.m_myHost->m_isProxy ) np = 0;
|
|
// sent to proxy hosts too so they don't send to us
|
|
for ( int32_t i = 0 ; i < np ; i++ ) {
|
|
// get host
|
|
Host *h = g_hostdb.getProxy(i);
|
|
// skip ourselves
|
|
//if ( h->m_hostId == g_hostdb.m_hostId ) continue;
|
|
// count as sent
|
|
m_numRequests++;
|
|
// send it right now
|
|
if ( g_udpServer.sendRequest ( s_buf ,
|
|
5 , // rqstSz
|
|
0x11 , // msgType
|
|
h->m_ip ,
|
|
h->m_port ,
|
|
// we are sending to a proxy!
|
|
-1 , // h->m_hostId ,
|
|
NULL , //
|
|
NULL , // state
|
|
gotReplyWrapperP2 ,
|
|
3 , // 3 sec timeout
|
|
-1 , // default backoff
|
|
-1 , // default maxwait
|
|
NULL , // reply buf
|
|
0 , // reply buf size
|
|
0 ))// niceness
|
|
continue;
|
|
// otherwise, had an error
|
|
m_numReplies++;
|
|
// reset g_errno
|
|
g_errno = 0;
|
|
}
|
|
|
|
|
|
// send a high priority msg to each host in network, except us
|
|
for ( int32_t i = 0 ; i < g_hostdb.getNumHosts() ; i++ ) {
|
|
// get host
|
|
Host *h = &g_hostdb.m_hosts[i];
|
|
// skip ourselves
|
|
if ( h->m_hostId == g_hostdb.m_hostId ) continue;
|
|
// count as sent
|
|
m_numRequests++;
|
|
// request will be freed by UdpServer
|
|
//char *r = (char *) mmalloc ( 4 , "PingServer" );
|
|
//if ( ! r ) return true;
|
|
//gbmemcpy ( r , (char *)(&h->m_hostId) , 4 );
|
|
// send it right now
|
|
if ( g_udpServer.sendRequest ( s_buf ,
|
|
5 , // rqstSz
|
|
0x11 , // msgType
|
|
h->m_ip ,
|
|
h->m_port ,
|
|
h->m_hostId ,
|
|
NULL , //
|
|
NULL , // state
|
|
gotReplyWrapperP2 ,
|
|
3 , // 3 sec timeout
|
|
-1 , // default backoff
|
|
-1 , // default maxwait
|
|
NULL , // reply buf
|
|
0 , // reply buf size
|
|
0 ))// niceness
|
|
continue;
|
|
// otherwise, had an error
|
|
m_numReplies++;
|
|
// reset g_errno
|
|
g_errno = 0;
|
|
}
|
|
// if already done return true
|
|
if ( m_numReplies >= m_numRequests ) return true;
|
|
// otherwise we blocked
|
|
return false;
|
|
}
|
|
|
|
void gotReplyWrapperP2 ( void *state , UdpSlot *slot ) {
|
|
// count it
|
|
g_pingServer.m_numReplies++;
|
|
// don't let udp server free our send buf, we own it
|
|
slot->m_sendBufAlloc = NULL;
|
|
// discard errors
|
|
g_errno = 0;
|
|
// bail if not done
|
|
if ( g_pingServer.m_numReplies < g_pingServer.m_numRequests ) return ;
|
|
// call our wrapper
|
|
if ( g_pingServer.m_broadcastCallback )
|
|
g_pingServer.m_broadcastCallback(g_pingServer.m_broadcastState);
|
|
}
|
|
|
|
//////////////////////////////////////////////////
|
|
// the sync point section
|
|
//////////////////////////////////////////////////
|
|
|
|
// every 10 minutes, host #0 tells all hosts to store a "sync point".
|
|
// this will force all hosts to dump a list of the names of all the files
|
|
// they have created since the last "sync point" was stored. in addition
|
|
// to dumping this list to disk, the "sync point" itself (a timestamp)
|
|
// will be appened to the list so we know at what approximate times all
|
|
// the files were created. this if for doing incremental synchronization.
|
|
// all "sync points" are from host #0's clock.
|
|
|
|
// ensure not too many sync point store requests off at once
|
|
static int32_t s_outstandingTaps = 0;
|
|
static char s_lastSyncPoint [ 9 ];
|
|
static int32_t s_nextTapHostId ;
|
|
|
|
static void tapLoop ( ) ;
|
|
static void gotTapReplyWrapper ( void *state , UdpSlot *slot ) ;
|
|
|
|
// this wrapper is called once every second, or so...
|
|
void sleepWrapper10 ( int fd , void *state ) {
|
|
// can't do much if still waiting on a very old tap!
|
|
if ( s_outstandingTaps ) {
|
|
log("PingServer::sleepWrapper10: stuck tap. sync messed up.");
|
|
return;
|
|
}
|
|
// request is 9 bytes to distinguish from 8-byte requests
|
|
int64_t stamp = gettimeofdayInMilliseconds();
|
|
// . keep incrementing this if already used to avoid repeat stamps
|
|
// . should be extremely rare
|
|
// . even with this we can still have repeat stamps if we are not
|
|
// in sync, so i check for that in Sync::getSyncPoint()
|
|
// . -1 is reserved for meaning no stamp (see Msg0.h)
|
|
//while ( g_sync.hasStamp ( stamp ) || stamp == -1 ) stamp++;
|
|
// save it for distributing
|
|
*(int64_t *) s_lastSyncPoint = stamp;
|
|
// reset loop parms
|
|
s_nextTapHostId = 0;
|
|
// do the tap loop
|
|
tapLoop ( );
|
|
}
|
|
|
|
void tapLoop ( ) {
|
|
// . don't use more than 16 UdpSlots for tapping
|
|
// . don't use more than numHosts UdpSlots for tapping
|
|
int32_t max = g_hostdb.getNumHosts();
|
|
if ( max > 16 ) max = 16;
|
|
loop:
|
|
if ( s_outstandingTaps >= max ) return;
|
|
// cycle through pinging different hosts
|
|
int32_t n = g_hostdb.getNumHosts();
|
|
// if done sending requests then just return
|
|
if ( s_nextTapHostId >= n ) return;
|
|
// otherwise, tap this guy
|
|
g_pingServer.tapHost ( s_nextTapHostId );
|
|
// inc next host to ping
|
|
s_nextTapHostId++;
|
|
// do as many in a row as we can
|
|
goto loop;
|
|
}
|
|
|
|
void gotTapReplyWrapper ( void *state , UdpSlot *slot ) {
|
|
// it came back
|
|
s_outstandingTaps--;
|
|
// don't let udp server free our send buf, we own it
|
|
slot->m_sendBufAlloc = NULL;
|
|
// discard errors
|
|
g_errno = 0;
|
|
// loop to do more if we need to
|
|
tapLoop ( );
|
|
}
|
|
|
|
// ping host #i
|
|
void PingServer::tapHost ( int32_t hostId ) {
|
|
// don't ping on interface machines
|
|
if ( g_conf.m_interfaceMachine ) return;
|
|
// don't tap ourselves
|
|
//if ( hostId == g_hostdb.m_hostId ) return;
|
|
// watch for out of bounds
|
|
if ( hostId < 0 || hostId >= g_hostdb.getNumHosts() ) return;
|
|
// get host ptr
|
|
Host *h = g_hostdb.getHost ( hostId );
|
|
// return if NULL
|
|
if ( ! h ) return;
|
|
// don't tap again if already in progress
|
|
//if ( h->m_inTapProgress ) return;
|
|
// count it
|
|
s_outstandingTaps++;
|
|
// consider it in progress
|
|
//h->m_inProgress = true;
|
|
// . launch one
|
|
// . returns false and sets errno on error
|
|
// . returns true if request sent and it blocked
|
|
// . size of 2 is unique to a tap
|
|
// . use MsgType of 0x11 for pinging
|
|
// . we now use the high-prioirty server, g_udpServer2
|
|
// . now we send over our current time so remote host will sync up
|
|
// with us
|
|
// . only sync up with hostId #0
|
|
// . if he goes down its ok because time is mostly important for
|
|
// spidering and spidering is suspended if a host is down
|
|
if ( g_udpServer.sendRequest ( s_lastSyncPoint ,
|
|
9 ,
|
|
0x11 ,
|
|
h->m_ip ,
|
|
h->m_port ,
|
|
h->m_hostId ,
|
|
NULL ,
|
|
(void *)(PTRTYPE)h->m_hostId,// cb state
|
|
gotTapReplyWrapper ,
|
|
30 , // timeout
|
|
1000 , // backoff
|
|
10000 , // max wait
|
|
NULL , // reply buf
|
|
0 , // reply buf size
|
|
0 ))// niceness
|
|
return;
|
|
// it had an error, so dec the count
|
|
s_outstandingTaps--;
|
|
// consider it out of progress
|
|
//h->m_inTapProgress = false;
|
|
// had an error
|
|
log("net: Had error sending sync point request to host #%"INT32": %s.",
|
|
h->m_hostId,mstrerror(g_errno) );
|
|
// reset it cuz it's not a showstopper
|
|
g_errno = 0;
|
|
}
|
|
|
|
// . this should be called on reception of an ACK to keep track of ping time.
|
|
// . this should be called on time out of a dgram reception.
|
|
// . "tripTime" is in milliseconds
|
|
// . "tripTime" should be the timeOut time if "timedOut" is true
|
|
// . returns false if you have to wait for your callback to be called
|
|
// . returns true if you don't have to wait... stamp happened immediately
|
|
// . sets g_errno on any error, regardless of whether true or false was returnd
|
|
/*
|
|
void PingServer::stampHost ( int32_t hostId , int32_t tripTime , bool timedOut ) {
|
|
// hostId of -1 means unknown, so we can't stamp it
|
|
if ( hostId == -1 ) return;
|
|
if ( hostId >= g_hostdb.getNumHosts() ) return;
|
|
// ensure tripTime doesn't wrap around
|
|
if ( tripTime < 0 ) return; // tripTime = 0x7fffffff;
|
|
// sanity check
|
|
if ( tripTime > 65000 ) {
|
|
log("PingServer::stampHost: bad tripTime");
|
|
}
|
|
// get the host, return false on error
|
|
Host *h = m_hostPtrs[hostId];
|
|
// if he was considered timedout, only stamp if it makes his time worse
|
|
if ( timedOut && tripTime < h->m_pingAvg ) return;
|
|
// TODO: do we need this?
|
|
// . don't update this host if this new ping time is < 10% different
|
|
// than the ping time we have recorded now
|
|
// int32_t diff = newPing - h->m_pingAvg;
|
|
// if ( diff < 0 ) diff *= -1;
|
|
// if ( ( 100 * diff ) / h->m_pingAvg < 10 ) return true;
|
|
|
|
// . we keep track of the last 4 pings for each host
|
|
// . we use the avg ping time to select fast hosts for querying
|
|
// . we use the std. dev. to know when to re-send datagrams
|
|
// shift the 5-ping window and insert the new ping
|
|
for ( int32_t i = 0 ; i < 3 ; i++ ) h->m_pings[i] = h->m_pings[i+1];
|
|
h->m_pings[3] = tripTime; // our latest ping
|
|
// compute the avg
|
|
int32_t avg = 0;
|
|
for ( int32_t i = 0 ; i < 4 ; i++ ) avg += h->m_pings[i];
|
|
avg /= 4;
|
|
h->m_pingAvg = avg;
|
|
// compute the std dev
|
|
int32_t stdDev = 0;
|
|
for ( int32_t i = 0 ; i < 4 ; i++ ) {
|
|
int32_t diff = ( h->m_pings[i] - avg );
|
|
if ( diff < 0 ) stdDev -= diff;
|
|
else stdDev += diff;
|
|
}
|
|
stdDev /= 4;
|
|
h->m_pingStdDev = stdDev;
|
|
}
|
|
*/
|
|
|
|
/*
|
|
void PingServer::getTimes ( int32_t hostId , int32_t *avg , int32_t *stdDev ) {
|
|
// make defaults (right now for dns server only)
|
|
*avg = 5000; // milliseconds = 5 seconds
|
|
*stdDev = 500;
|
|
// hostId of -1 means unknown, so we can't stamp it
|
|
if ( hostId == -1 ) return;
|
|
if ( hostId >= g_hostdb.getNumHosts() ) return;
|
|
// get the host, return false on error
|
|
Host *h = m_hostPtrs[hostId];
|
|
*avg = h->m_pingAvg;
|
|
*stdDev = h->m_pingStdDev;
|
|
return;
|
|
}
|
|
*/
|
|
|
|
// if its status changes from dead to alive or vice versa, we have to
|
|
// update g_hostdb.m_numHostsAlive. Dns.cpp and Msg17 will use this count
|
|
void updatePingTime ( Host *h , int32_t *pingPtr , int32_t tripTime ) {
|
|
|
|
// sanity check
|
|
if ( pingPtr != &h->m_ping && pingPtr != &h->m_pingShotgun ) {
|
|
char *xx = NULL; *xx = 0; }
|
|
|
|
// . was it dead before this?
|
|
// . both ips must be dead for it to be dead
|
|
bool wasDead = g_hostdb.isDead ( h );
|
|
|
|
// do the actual update
|
|
*pingPtr = tripTime;
|
|
// do not go negative on us
|
|
if ( *pingPtr < 0 ) *pingPtr = 0;
|
|
|
|
// . record max ping
|
|
// . wait 60 seconds for everyone to come up if we just started up
|
|
if ( tripTime > h->m_pingMax &&
|
|
// do not count shotgun ips!
|
|
pingPtr == &h->m_ping &&
|
|
gettimeofdayInMillisecondsLocal()-g_process.m_processStartTime>=
|
|
60000 ) {
|
|
h->m_pingMax = tripTime;
|
|
char *desc = "";
|
|
if ( pingPtr == &h->m_pingShotgun ) desc = " (shotgun)";
|
|
if ( tripTime > 50 )
|
|
log("gb: got new max ping time of %"INT32" for "
|
|
"host #%"INT32"%s ",tripTime,h->m_hostId,desc);
|
|
}
|
|
|
|
// is it dead now?
|
|
bool isDead = g_hostdb.isDead ( h );
|
|
|
|
// force it out of sync
|
|
if ( isDead ) h->m_inSync = false;
|
|
|
|
//if ( ! wasDead && isDead )
|
|
// log("hey");
|
|
//logf(LOG_DEBUG,"ping: hostid %"INT32" wasDead=%"INT32" isDead=%"INT32" "
|
|
// "numAlive=%"INT32"",
|
|
// (int32_t)h->m_hostId,(int32_t)wasDead,(int32_t)isDead,
|
|
// (int32_t)g_hostdb.m_numHostsAlive);
|
|
|
|
if( h->m_isProxy ) {
|
|
// maintain m_numProxyAlive if there was a change in state
|
|
//if ( wasDead && ! isDead ) g_hostdb.m_numProxyAlive++;
|
|
//if ( ! wasDead && isDead ) g_hostdb.m_numProxyAlive--;
|
|
|
|
// sanity check, this should be at least 1 since we are alive
|
|
//if ( g_hostdb.m_numProxyAlive < 0 ||
|
|
// g_hostdb.m_numProxyAlive > g_hostdb.getNumProxy() ) {
|
|
// char *xx = NULL; *xx =0; }
|
|
}
|
|
else {
|
|
// maintain m_numHostsAlive if there was a change in state
|
|
if ( wasDead && ! isDead ) g_hostdb.m_numHostsAlive++;
|
|
if ( ! wasDead && isDead ) g_hostdb.m_numHostsAlive--;
|
|
|
|
// sanity check, this should be at least 1 since we are alive
|
|
if ( g_hostdb.m_numHostsAlive < 0 ||
|
|
g_hostdb.m_numHostsAlive > g_hostdb.m_numHosts ) {
|
|
char *xx = NULL; *xx =0; }
|
|
}
|
|
}
|
|
|
|
void checkKernelErrors( int fd, void *state ){
|
|
Host *me = g_hostdb.m_myHost;
|
|
|
|
int64_t st = gettimeofdayInMilliseconds();
|
|
char buf[4098];
|
|
// klogctl reads the last 4k lines of the kernel ring buffer
|
|
int16_t bufLen = klogctl(3,buf,4096);
|
|
int64_t took = gettimeofdayInMilliseconds() - st;
|
|
if ( took >= 3 ) {
|
|
int32_t len = bufLen;
|
|
if ( len > 200 ) len = 200;
|
|
char c = buf[len];
|
|
buf[len] = '\0';
|
|
log("db: klogctl took %"INT64" ms to read %s",took, buf);
|
|
buf[len] = c;
|
|
}
|
|
|
|
if ( bufLen < 0 ){
|
|
log ("db: klogctl returned error: %s",mstrerror(errno));
|
|
return;
|
|
}
|
|
// make sure not too big!
|
|
if ( bufLen >= 4097 ) {
|
|
log ("db: klogctl overflow");
|
|
return;
|
|
}
|
|
buf[bufLen] = '\0';
|
|
|
|
// compare it to the old buffer
|
|
if ( s_kernelRingBufLen == bufLen &&
|
|
strncmp ( s_kernelRingBuf, buf, bufLen ) == 0 )
|
|
// nothing has changed so return
|
|
return;
|
|
|
|
// somethings changed. find out what part has changed
|
|
// sometimes the kernel ring buffer gets full and overwrites itself.
|
|
// in that case compare only latter half of the old buffer
|
|
char *oldKernBuf = s_kernelRingBuf;
|
|
int32_t oldKernBufLen = s_kernelRingBufLen;
|
|
if ( s_kernelRingBufLen > 3 * 1024 ){
|
|
oldKernBuf = s_kernelRingBuf + s_kernelRingBufLen / 2;
|
|
oldKernBufLen = gbstrlen( oldKernBuf );
|
|
}
|
|
|
|
// somethings changed. find out what part has changed
|
|
char *changedBuf = strstr ( buf, oldKernBuf );
|
|
// we found the old buf. skip it and go to the part that has changed
|
|
if ( changedBuf )
|
|
changedBuf += oldKernBufLen;
|
|
// we couldn't find the old buf in the new buf!
|
|
else
|
|
changedBuf = buf;
|
|
int32_t changedBufLen = gbstrlen(changedBuf);
|
|
|
|
// copy the new buf over to the old buf
|
|
strcpy ( s_kernelRingBuf, buf );
|
|
s_kernelRingBufLen = bufLen;
|
|
|
|
static int32_t s_lastCount = 0;
|
|
|
|
// since we do not know if this host has been fixed or not by looking
|
|
// at the kernel ring buffer, keep returning until someone clicks
|
|
// 'clear kernel error message' control in Master Controls.
|
|
// but don't return before copying over the new buffer
|
|
if ( me->m_pingInfo.m_kernelErrors > 0 ) return;
|
|
|
|
// check if we match any error strings in master controls
|
|
char *p = NULL;
|
|
if ( gbstrlen(g_conf.m_errstr1) > 0 )
|
|
p = strstr( changedBuf, g_conf.m_errstr1 );
|
|
|
|
if ( !p && gbstrlen(g_conf.m_errstr2) > 0 )
|
|
p = strstr( changedBuf, g_conf.m_errstr2 );
|
|
|
|
if ( !p && gbstrlen(g_conf.m_errstr3) > 0 )
|
|
p = strstr( changedBuf, g_conf.m_errstr3 );
|
|
|
|
if ( p ){
|
|
// a kernel error that we cared for has happened!
|
|
// check what kind of an error is this
|
|
// isolate that line from the rest of the buf
|
|
while ( p < changedBuf + changedBufLen && *p != '\n' )
|
|
p++;
|
|
*p = '\0';
|
|
|
|
while ( p > changedBuf && *(p-1) != '\n' )
|
|
p--;
|
|
|
|
if ( strncasestr ( p, gbstrlen(p) , "scsi" ) &&
|
|
g_numIOErrors > s_lastCount ) {
|
|
me->m_pingInfo.m_kernelErrors = ME_IOERR;
|
|
s_lastCount = g_numIOErrors;
|
|
}
|
|
else if ( strncasestr ( p, gbstrlen(p), "100 mbps" ) )
|
|
me->m_pingInfo.m_kernelErrors = ME_100MBPS;
|
|
// assume an I/O IO error here otherwise
|
|
else if ( g_numIOErrors > s_lastCount ) {
|
|
me->m_pingInfo.m_kernelErrors = ME_UNKNWN;
|
|
s_lastCount = g_numIOErrors;
|
|
}
|
|
log ( LOG_DEBUG,"PingServer: error message in "
|
|
"kernel ring buffer, \"%s\"", p );
|
|
log ( LOG_WARN,"PingServer: this host shall be "
|
|
"dead to all other hosts unless the problem is fixed "
|
|
"and kernel error message cleared in Master Controls." );
|
|
}
|
|
return;
|
|
}
|
|
|
|
void PingServer::sendEmailMsg ( int32_t *lastTimeStamp , char *msg ) {
|
|
// leave if we already sent and alert within 5 mins
|
|
//static int32_t s_lasttime = 0;
|
|
int32_t now = getTimeGlobalNoCore();
|
|
if ( now - *lastTimeStamp < 5*60 ) return;
|
|
// prepare msg to send
|
|
//Host *h0 = g_hostdb.getHost ( 0 );
|
|
char msgbuf[1024];
|
|
snprintf(msgbuf, 1024,
|
|
"cluster %s : proxy: %s",
|
|
g_conf.m_clusterName,
|
|
msg);
|
|
// send it, force it, so even if email alerts off, it sends it
|
|
g_pingServer.sendEmail ( NULL , // Host *h
|
|
msgbuf , // char *errmsg = NULL ,
|
|
true , // bool sendToAdmin = true ,
|
|
false , // bool oom = false ,
|
|
false , // bool kernelErrors = false ,
|
|
false , // bool parmChanged = false ,
|
|
true );// bool forceIt = false );
|
|
*lastTimeStamp = now;
|
|
return;
|
|
}
|
|
|
|
/////////////////
|
|
//
|
|
// for sending email notifications to external addresses
|
|
//
|
|
///////////////////
|
|
|
|
/*
|
|
bool gotMxIp ( EmailInfo *ei ) ;
|
|
|
|
void gotMxIpWrapper ( void *state , int32_t ip ) {
|
|
EmailInfo *ei = (EmailInfo *)state;
|
|
// i guess set it
|
|
ei->m_mxIp = ip;
|
|
// handle it
|
|
if ( ! gotMxIp ( ei ) ) return;
|
|
// did not block, call callback
|
|
ei->m_callback ( ei->m_state );
|
|
}
|
|
*/
|
|
|
|
void doneSendingEmailWrapper ( void *state , TcpSocket *sock ) {
|
|
if ( g_errno )
|
|
log("crawlbot: error sending email = %s",mstrerror(g_errno));
|
|
// log the reply
|
|
if ( sock && sock->m_readBuf )
|
|
log("crawlbot: got socket reply=%s", sock->m_readBuf);
|
|
EmailInfo *ei = (EmailInfo *)state;
|
|
ei->m_callback ( ei->m_state );
|
|
}
|
|
|
|
|
|
// returns false if blocked, true otherwise
|
|
bool sendEmail ( class EmailInfo *ei ) {
|
|
|
|
// this is often set from XmlDoc.cpp::indexDoc()
|
|
g_errno = 0;
|
|
|
|
char *to = ei->m_toAddress.getBufStart();
|
|
char *dom = strstr(to,"@");
|
|
if ( ! dom || ! dom[1] ) {
|
|
g_errno = EBADENGINEER;
|
|
log("email: missing @ sign in email %s",to);
|
|
return true;
|
|
}
|
|
// point to domain
|
|
dom++;
|
|
// ref that for printing HELO line
|
|
ei->m_dom = dom;
|
|
|
|
// just send it to a sendmail server which will forward it,
|
|
// because a lot of email servers don't like us connecting directly
|
|
// beause i think our IP address does not match that of our
|
|
// MX ip for our domain? sendmail must be configured to allow
|
|
// forwarding if it receives an email from the IP of host #0
|
|
// in the cluster.
|
|
ei->m_mxIp = atoip(g_conf.m_sendmailIp);
|
|
|
|
/*
|
|
// prepend a special marker so Dns.cpp returns the mx record
|
|
ei->m_mxDomain.safePrintf("gbmxrec-%s",dom);
|
|
|
|
// get mx ip. returns false if would block.
|
|
if ( ! g_dns.getIp ( ei->m_mxDomain.getBufStart() ,
|
|
ei->m_mxDomain.getLength() ,
|
|
&ei->m_mxIp,
|
|
ei ,
|
|
gotMxIpWrapper ) )
|
|
return false;
|
|
|
|
return gotMxIp ( ei );
|
|
}
|
|
|
|
// returns false if blocked, true otherwise
|
|
bool gotMxIp ( EmailInfo *ei ) {
|
|
|
|
// error?
|
|
if ( g_errno ) {
|
|
log("crawlbot: error getting MX IP to send email alert for "
|
|
"%s = %s",
|
|
ei->m_mxDomain.getBufStart(),
|
|
mstrerror(g_errno));
|
|
return true;
|
|
}
|
|
*/
|
|
|
|
// wtf?
|
|
if ( ei->m_mxIp == 0 ) {
|
|
log("crawlbot: got bad MX ip of 0 for %s",
|
|
ei->m_mxDomain.getBufStart());
|
|
return true;
|
|
}
|
|
|
|
// label alloc'd mem with gotmxip in case of mem leak
|
|
SafeBuf sb;//("gotmxip");
|
|
// helo line
|
|
sb.safePrintf("HELO %s\r\n",ei->m_dom);
|
|
// mail line
|
|
sb.safePrintf( "MAIL FROM:<%s>\r\n", ei->m_fromAddress.getBufStart());
|
|
// to line
|
|
sb.safePrintf( "RCPT TO:<%s>\r\n", ei->m_toAddress.getBufStart());
|
|
// data
|
|
sb.safePrintf( "DATA\r\n");
|
|
// body
|
|
sb.safePrintf( "To: %s\r\n", ei->m_toAddress.getBufStart());
|
|
sb.safePrintf( "Subject: %s\r\n",ei->m_subject.getBufStart());
|
|
// mime header must be separated from body by an extra \r\n
|
|
sb.safePrintf( "\r\n");
|
|
sb.safePrintf( "%s", ei->m_body.getBufStart() );
|
|
// quit
|
|
sb.safePrintf( "\r\n.\r\nQUIT\r\n\r\n");
|
|
// send the message
|
|
TcpServer *ts = g_httpServer.getTcp();
|
|
log ( LOG_WARN, "crawlbot: Sending email to %s (MX IP=%s):\n %s",
|
|
ei->m_toAddress.getBufStart(),
|
|
iptoa(ei->m_mxIp),
|
|
sb.getBufStart() );
|
|
// make a temp string
|
|
SafeBuf mxIpStr;
|
|
mxIpStr.safePrintf("%s",iptoa(ei->m_mxIp) );
|
|
if ( !ts->sendMsg ( mxIpStr.getBufStart(),
|
|
mxIpStr.length(),
|
|
25, // smtp (send mail transfer protocol) port
|
|
sb.getBufStart(),
|
|
sb.getCapacity(),
|
|
sb.getLength(),
|
|
sb.getLength(),
|
|
ei,//NULL,//h,
|
|
doneSendingEmailWrapper,
|
|
60*1000,
|
|
100*1024,
|
|
100*1024 ) ) {
|
|
// tcpserver will free it, so prevent double free with detach
|
|
sb.detachBuf();
|
|
return false;
|
|
}
|
|
// error? if no error, it was a successful write and it done,
|
|
// otherwise we will want to free the sendbuf here so do not
|
|
// call detachBuf()
|
|
if ( ! g_errno )
|
|
// tcpserver will free it, so prevent double free with detach
|
|
sb.detachBuf();
|
|
// we did not block
|
|
return true;
|
|
}
|
|
|
|
|
|
static void gotMandrillReplyWrapper ( void *state , TcpSocket *s ) {
|
|
// why core here with s NULL
|
|
if ( ! s ) {
|
|
// crap seems like we do not retry so they will not get
|
|
// the notification... how to fix better?
|
|
log("email: failed to lookup mandrill ip. sock is null.");
|
|
g_errno = EBADIP;
|
|
}
|
|
else {
|
|
// log the mandril reply
|
|
log("email: got mandrill reply: %s",s->m_readBuf);
|
|
}
|
|
EmailInfo *ei = (EmailInfo *)state;
|
|
if ( ei->m_callback ) ei->m_callback ( ei->m_state );
|
|
}
|
|
|
|
|
|
// mailchimp http mail api
|
|
bool sendEmailThroughMandrill ( class EmailInfo *ei ) {
|
|
|
|
// this is often set from XmlDoc.cpp::indexDoc()
|
|
g_errno = 0;
|
|
|
|
SafeBuf sb;
|
|
|
|
// then the message to send
|
|
sb.safePrintf(
|
|
"POST /api/1.0/messages/send-template.json"
|
|
" HTTP/1.0\r\n"
|
|
"Accept: image/gif, image/x-xbitmap, image/jpeg, "
|
|
"image/pjpeg, application/x-shockwave-flash, "
|
|
"application/msword, */*\r\n"
|
|
"Accept-Language: en-us\r\n"
|
|
//"Content-Type: application/x-www-form-urlencoded\r\n"
|
|
"Content-Type: application/json\r\n"
|
|
//"Accept-Encoding: gzip, deflate\r\n"
|
|
"User-Agent: Mozilla/4.0 "
|
|
"(compatible; MSIE 6.0; Windows 98; Win 9x 4.90)\r\n"
|
|
"Host: mandrillapp.com\r\n" // www.t-mobile.com
|
|
"Content-Length: xxxx\r\n"
|
|
//"Connection: Keep-Alive\r\n"
|
|
"Connection: close\r\n"
|
|
//"Cookie: \r\n"
|
|
"Cache-Control: no-cache\r\n\r\n"
|
|
);
|
|
//
|
|
// post data
|
|
//
|
|
char *to = ei->m_toAddress.getBufStart();
|
|
char *from = ei->m_fromAddress.getBufStart();
|
|
|
|
char *crawl = "unknown2";
|
|
CollectionRec *cr = g_collectiondb.m_recs[ei->m_collnum];
|
|
if ( cr ) crawl = cr->m_diffbotCrawlName.getBufStart();
|
|
|
|
SafeBuf ub;
|
|
ub.safePrintf( "{\"key\":\"GhWT0UpcVBl7kmumrt9dqg\","
|
|
"\"template_name\":\"crawl-finished\","
|
|
"\"template_content\": [],"
|
|
"\"message\": {"
|
|
"\"to\": ["
|
|
"{"
|
|
"\"email\":\"%s\""
|
|
"}"
|
|
"],"
|
|
|
|
"\"from_email\":\"%s\","
|
|
"\"headers\": {"
|
|
"\"Reply-To\":\"%s\""
|
|
"},"
|
|
"\"bcc_address\":\"%s\","
|
|
"\"global_merge_vars\":["
|
|
"{"
|
|
"\"name\":\"CRAWLNAME\","
|
|
"\"content\":\"%s\""
|
|
"}"
|
|
"]"
|
|
"}"
|
|
"}"
|
|
, to
|
|
, from
|
|
, from
|
|
, from
|
|
, crawl
|
|
);
|
|
// this is not for application/json content type in POST request
|
|
//ub.urlEncode();
|
|
// how big?
|
|
int32_t contentLen = ub.length();
|
|
// append the post data to the full request
|
|
sb.safeMemcpy ( &ub );
|
|
// make sure ends in \0
|
|
sb.nullTerm();
|
|
|
|
// set it
|
|
char *needle = "Content-Length: ";
|
|
int32_t needleLen = gbstrlen(needle);
|
|
char *s = strstr(sb.getBufStart(),needle);
|
|
s += needleLen;
|
|
char c = s[4];
|
|
sprintf(s,"%04"INT32"",contentLen);
|
|
s[4] = c;
|
|
|
|
|
|
// show it
|
|
log("email: sending request to https://mandrillapp.com/ : %s",
|
|
sb.getBufStart() );
|
|
|
|
// gotta get the cookie
|
|
char *uu = "https://mandrillapp.com/";
|
|
if ( ! g_httpServer.getDoc ( uu,
|
|
0, // ip
|
|
0 , // offset
|
|
-1 , // size
|
|
false , // m_ifModifiedSince
|
|
ei , // state
|
|
gotMandrillReplyWrapper , //
|
|
60*1000 , // timeout
|
|
0 , // m_proxyIp
|
|
0 , // m_proxyPort
|
|
100*1024 , // m_maxTextDocLen
|
|
100*1024 , // m_maxOtherDocLen
|
|
NULL, // user agent
|
|
"HTTP/1.0" , //proto
|
|
true, // post?
|
|
NULL, // cookie
|
|
NULL, // additional header
|
|
sb.getBufStart() ) ) // full requesst
|
|
return false;
|
|
// must have been an error
|
|
log("net: Got error getting page from mandrill: %s.",
|
|
mstrerror(g_errno));
|
|
// ignore it
|
|
g_errno = 0;
|
|
// always call this at the end
|
|
return true;
|
|
}
|
|
|
|
/////////////////////////////
|
|
//
|
|
// send two notifications, email and webhook
|
|
//
|
|
/////////////////////////////
|
|
|
|
void doneSendingNotifyEmailWrapper ( void *state ) {
|
|
EmailInfo *ei = (EmailInfo *)state;
|
|
ei->m_notifyBlocked--;
|
|
// error?
|
|
log("build: email notification status (count=%i) (ei=0x%"PTRFMT"): %s",
|
|
(int)ei->m_notifyBlocked,(PTRTYPE)ei,mstrerror(g_errno));
|
|
// ignore it for rest
|
|
g_errno = 0;
|
|
// wait for post url to get done
|
|
if ( ei->m_notifyBlocked > 0 ) return;
|
|
// unmark it
|
|
ei->m_inUse = false;
|
|
// all done
|
|
ei->m_finalCallback ( ei->m_finalState );
|
|
}
|
|
|
|
void doneGettingNotifyUrlWrapper ( void *state , TcpSocket *sock ) {
|
|
EmailInfo *ei = (EmailInfo *)state;
|
|
ei->m_notifyBlocked--;
|
|
// error?
|
|
log("build: url notification status (count=%i) (ei=0x%"PTRFMT"): %s",
|
|
(int)ei->m_notifyBlocked,(PTRTYPE)ei,mstrerror(g_errno));
|
|
// wait for email to get done
|
|
if ( ei->m_notifyBlocked > 0 ) return;
|
|
// unmark it
|
|
ei->m_inUse = false;
|
|
// all done
|
|
ei->m_finalCallback ( ei->m_finalState );
|
|
}
|
|
|
|
// for printCrawlDetailsInJson()
|
|
#include "PageCrawlBot.h"
|
|
|
|
// . return false if would block, true otherwise
|
|
// . used to send email and get a url when a crawl hits a maxToCrawl
|
|
// or maxToProcess limitation.
|
|
bool sendNotification ( EmailInfo *ei ) {
|
|
|
|
// disable for now
|
|
//log("ping: NOT SENDING NOTIFICATION -- DEBUG!!");
|
|
//return true;
|
|
|
|
if ( ei->m_inUse ) { char *xx=NULL;*xx=0; }
|
|
|
|
// caller must set this, as well as m_finalCallback/m_finalState
|
|
CollectionRec *cr = g_collectiondb.m_recs[ei->m_collnum];
|
|
|
|
char *email = "";
|
|
char *url = "";
|
|
char *crawl = "unknown2";
|
|
|
|
if ( cr ) email = cr->m_notifyEmail.getBufStart();
|
|
if ( cr ) url = cr->m_notifyUrl.getBufStart();
|
|
if ( cr ) crawl = cr->m_diffbotCrawlName.getBufStart();
|
|
|
|
// sanity check, can only call once
|
|
if ( ei->m_notifyBlocked != 0 ) { char *xx=NULL;*xx=0; }
|
|
|
|
ei->m_inUse = true;
|
|
|
|
|
|
if ( email && email[0] ) {
|
|
log("build: sending email notification to %s for "
|
|
"crawl \"%s\" : %s",
|
|
email,crawl,ei->m_spiderStatusMsg.getBufStart());
|
|
SafeBuf msg;
|
|
msg.safePrintf("Your crawl \"%s\" has a new status: %s"
|
|
, ei->m_spiderStatusMsg.getBufStart()
|
|
, crawl );
|
|
|
|
// reset m_length otherwise it builds up
|
|
ei->m_toAddress.reset();
|
|
ei->m_toAddress.safeStrcpy ( email );
|
|
ei->m_toAddress.nullTerm();
|
|
|
|
// reset m_length otherwise it builds up
|
|
ei->m_fromAddress.reset();
|
|
ei->m_fromAddress.safePrintf("support@diffbot.com");
|
|
|
|
/*
|
|
ei->m_subject.safePrintf("crawl paused");
|
|
ei->m_body.safePrintf("Your crawl for collection \"%s\" "
|
|
"has been paused because it hit "
|
|
"a maxPagesToCrawl or maxPagesToProcess "
|
|
"limitation."
|
|
, cr->m_coll);
|
|
*/
|
|
ei->m_state = ei;//this;
|
|
ei->m_callback = doneSendingNotifyEmailWrapper;
|
|
// this will usually block, unless error maybe
|
|
if ( ! sendEmailThroughMandrill ( ei ) )
|
|
ei->m_notifyBlocked++;
|
|
}
|
|
|
|
if ( url && url[0] ) {
|
|
log("build: sending url notification to %s for coll \"%s\"",
|
|
url,crawl);
|
|
|
|
Url uu; uu.set ( url );
|
|
|
|
SafeBuf fullReq;
|
|
fullReq.safePrintf("POST %s HTTP/1.0\r\n"
|
|
"User-Agent: Crawlbot/2.0\r\n"
|
|
"Accept: */*\r\n"
|
|
"Host: "
|
|
, uu.getPath()
|
|
);
|
|
fullReq.safeMemcpy ( uu.getHost() , uu.getHostLen() );
|
|
fullReq.safePrintf("\r\n");
|
|
// make custom headers
|
|
fullReq.safePrintf ("X-Crawl-Name: %s\r\n"
|
|
// last \r\n is added in HttpRequest.cpp
|
|
"X-Crawl-Status: %s\r\n" // hdrs
|
|
, cr->m_diffbotCrawlName.getBufStart()
|
|
, ei->m_spiderStatusMsg.getBufStart()
|
|
);
|
|
// also in post body
|
|
SafeBuf postContent;
|
|
// the collection details
|
|
printCrawlDetailsInJson ( &postContent , cr );
|
|
// content-length of it
|
|
fullReq.safePrintf("Content-Length: %"INT32"\r\n",
|
|
postContent.length());
|
|
// type is json
|
|
fullReq.safePrintf("Content-Type: application/json\r\n");
|
|
fullReq.safePrintf("\r\n");
|
|
// then the post content
|
|
fullReq.safeMemcpy ( &postContent );
|
|
fullReq.nullTerm();
|
|
|
|
// GET request
|
|
if ( ! g_httpServer.getDoc ( url ,
|
|
0 , // ip
|
|
0 , // offset
|
|
-1 , // size
|
|
false, // ifmodsince
|
|
ei,//this ,
|
|
doneGettingNotifyUrlWrapper ,
|
|
60*1000 , // timeout
|
|
0, // proxyip
|
|
0 , // proxyport
|
|
10000, // maxTextDocLen
|
|
10000, // maxOtherDocLen
|
|
"Crawlbot/2.0", // user agent
|
|
"HTTP/1.0", // proto
|
|
true , // doPost
|
|
NULL, // cookie
|
|
NULL , // custom hdrs
|
|
fullReq.getBufStart() ,
|
|
NULL ) )
|
|
ei->m_notifyBlocked++;
|
|
}
|
|
|
|
if ( ei->m_notifyBlocked == 0 ) {
|
|
ei->m_inUse = false;
|
|
return true;
|
|
}
|
|
|
|
// we blocked, wait
|
|
return false;
|
|
}
|