mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 12:17:35 +03:00
keep track of how many times the host exited/cored as an exponent
to the 'x' in the hosts table. this way we can detect hosts that have restarted many times and fix them.
This commit is contained in:
parent
e583850e40
commit
2ce107e4be
2
Hostdb.h
2
Hostdb.h
@ -115,7 +115,7 @@ class PingInfo {
|
||||
char m_gbVersionStr[21];
|
||||
char m_repairMode;
|
||||
char m_kernelErrors;
|
||||
|
||||
uint8_t m_recoveryLevel;
|
||||
};
|
||||
|
||||
class Host {
|
||||
|
@ -521,9 +521,18 @@ skipReplaceHost:
|
||||
}
|
||||
|
||||
// recovery mode? reocvered from coring?
|
||||
if ((flags & PFLAG_RECOVERYMODE)&& format == FORMAT_HTML )
|
||||
if ((flags & PFLAG_RECOVERYMODE)&& format == FORMAT_HTML ) {
|
||||
fb.safePrintf("<b title=\"Recovered from core"
|
||||
"\">x</b>");
|
||||
// this is only 8-bits at the moment so it's capped
|
||||
// at 255. this level is 1 the first time we core
|
||||
// and are restarted.
|
||||
if ( h->m_pingInfo.m_recoveryLevel > 1 )
|
||||
fb.safePrintf("<sup>%"INT32"</sup>",
|
||||
(int32_t)
|
||||
h->m_pingInfo.m_recoveryLevel);
|
||||
}
|
||||
|
||||
if ((flags & PFLAG_RECOVERYMODE)&& format != FORMAT_HTML )
|
||||
fb.safePrintf("Recovered from core");
|
||||
|
||||
@ -1463,7 +1472,8 @@ skipReplaceHost:
|
||||
"<td>x (status flag)</td>"
|
||||
"<td>Indicates host has abruptly exited due to a fatal "
|
||||
"error (cored) and "
|
||||
"restarted itself."
|
||||
"restarted itself. The exponent is how many times it has "
|
||||
"done this. If no exponent, it only did it once."
|
||||
"</td>"
|
||||
"</tr>\n"
|
||||
|
||||
|
@ -28,6 +28,7 @@ int32_t klogctl( int, char *,int ) { return 0; }
|
||||
|
||||
// from main.cpp. when keepalive script restarts us this is true
|
||||
extern bool g_recoveryMode;
|
||||
extern int32_t g_recoveryLevel;
|
||||
|
||||
// a global class extern'd in .h file
|
||||
PingServer g_pingServer;
|
||||
@ -491,6 +492,10 @@ void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {
|
||||
flags |= PFLAG_MERGEMODE0OR6;
|
||||
if ( ! isClockInSync() ) flags |= PFLAG_OUTOFSYNC;
|
||||
|
||||
uint8_t rv8 = (uint8_t)g_recoveryLevel;
|
||||
if ( g_recoveryLevel > 255 ) rv8 = 255;
|
||||
pi->m_recoveryLevel = rv8;
|
||||
|
||||
//*(int32_t *)p = flags; p += 4; // 4 bytes
|
||||
pi->m_flags = flags;
|
||||
|
||||
|
17
main.cpp
17
main.cpp
@ -195,6 +195,8 @@ void dumpLinkdb ( char *coll,int32_t sfn,int32_t numFiles,bool includeT
|
||||
void exitWrapper ( void *state ) { exit(0); };
|
||||
|
||||
bool g_recoveryMode = false;
|
||||
|
||||
int32_t g_recoveryLevel = 0;
|
||||
|
||||
bool isRecoveryFutile ( ) ;
|
||||
|
||||
@ -1116,8 +1118,15 @@ int main2 ( int argc , char *argv[] ) {
|
||||
//send an email on startup for -r, like if we are recovering from an
|
||||
//unclean shutdown.
|
||||
g_recoveryMode = false;
|
||||
if ( strcmp ( cmd , "-r" ) == 0 ) g_recoveryMode = true;
|
||||
if ( strcmp ( cmd2 , "-r" ) == 0 ) g_recoveryMode = true;
|
||||
char *cc = NULL;
|
||||
if ( strncmp ( cmd , "-r" ,2 ) == 0 ) cc = cmd;
|
||||
if ( strncmp ( cmd2 , "-r",2 ) == 0 ) cc = cmd2;
|
||||
if ( cc ) {
|
||||
g_recoveryMode = true;
|
||||
g_recoveryLevel = 1;
|
||||
if ( cc[2] ) g_recoveryLevel = atoi(cc+2);
|
||||
if ( g_recoveryLevel < 0 ) g_recoveryLevel = 0;
|
||||
}
|
||||
|
||||
// run as daemon? then we have to fork
|
||||
if ( strcmp ( cmd , "-d" ) == 0 ) g_conf.m_runAsDaemon = true;
|
||||
@ -5170,6 +5179,7 @@ int install ( install_flag_konst_t installFlag , int32_t hostId , char *dir ,
|
||||
"export MALLOC_CHECK_=0;"
|
||||
"cp -f gb gb.oldsave ; "
|
||||
"ADDARGS='' "
|
||||
"INC=1 "
|
||||
"EXITSTATUS=1 ; "
|
||||
"while [ \\$EXITSTATUS != 0 ]; do "
|
||||
"{ "
|
||||
@ -5191,7 +5201,8 @@ int install ( install_flag_konst_t installFlag , int32_t hostId , char *dir ,
|
||||
" ;"
|
||||
|
||||
"EXITSTATUS=\\$? ; "
|
||||
"ADDARGS='-r' ; "
|
||||
"ADDARGS='-r'\\$INC ; "
|
||||
"INC=\\$((INC+1));"
|
||||
"} "
|
||||
"done >& /dev/null & \" %s",
|
||||
//"\" %s",
|
||||
|
Loading…
Reference in New Issue
Block a user