git rebalancing working right

This commit is contained in:
Matt Wells 2014-01-15 17:40:17 -08:00
parent 4a04542829
commit 4b27b22949
5 changed files with 60 additions and 8 deletions

View File

@ -2633,7 +2633,8 @@ Pages.o: Pages.cpp gb-include.h types.h fctypes.h Unicode.h \
Events.h Msg22.h CatRec.h Catdb.h Msge1.h Msg4.h Msg8b.h SearchInput.h \
Msg40.h Msg39.h Msg37.h Posdb.h TopTree.h Clusterdb.h IndexTable2.h \
Msg51.h Msg17.h Msg3a.h PostQueryRerank.h Sanity.h Msg1.h Datedb.h \
SiteGetter.h Title.h Address.h zlib.h zconf.h Spider.h Users.h
SiteGetter.h Title.h Address.h zlib.h zconf.h Spider.h Users.h \
Rebalance.h
PageSockets.o: PageSockets.cpp gb-include.h types.h fctypes.h Unicode.h \
UnicodeProperties.h UCPropTable.h iconv.h hash.h Errno.h Log.h \
TcpServer.h openssl/ssl.h openssl/e_os2.h openssl/opensslconf.h \

View File

@ -10,6 +10,7 @@
#include "Proxy.h"
#include "PageParser.h" // g_inPageParser
#include "Users.h"
#include "Rebalance.h"
// a global class extern'd in Pages.h
Pages g_pages;
@ -984,11 +985,19 @@ bool Pages::printAdminTop ( SafeBuf *sb ,
"is not the same over all hosts.");
}
// if any host had foreign recs, not that
if ( ps->m_numHostsWithForeignRecs ) {
if ( g_rebalance.m_isScanning ) {
if ( adds ) mb.safePrintf("<br><br>");
adds++;
mb.safePrintf("One or more hosts require shard rebalance. "
mb.safePrintf("Rebalancer is currently running.");
}
// if any host had foreign recs, not that
char *needsRebalance = g_rebalance.getNeedsRebalance();
if ( ! g_rebalance.m_isScanning &&
needsRebalance &&
*needsRebalance ) {
if ( adds ) mb.safePrintf("<br><br>");
adds++;
mb.safePrintf("This host requires a shard rebalance. "
"Click 'rebalance shards' in master controls.");
}

View File

@ -23,9 +23,12 @@ Rebalance g_rebalance;
Rebalance::Rebalance ( ) {
m_inRebalanceLoop = false;
m_numForeignRecs = 0;
m_rebalanceCount = 0LL;
// reset
m_rdbNum = 0;
m_collnum = 0;
m_lastCollnum = -1;
m_lastRdb = NULL;
KEYMIN ( m_nextKey , MAX_KEY_BYTES );
KEYMAX ( m_endKey , MAX_KEY_BYTES );
m_needsRebalanceValid = false;
@ -183,6 +186,13 @@ void Rebalance::scanLoop ( ) {
CollectionRec *cr = g_collectiondb.m_recs[m_collnum];
// skip if none... like statsdb, i guess don't rebalance!!
if ( ! cr ) continue;
// new?
//if ( m_lastCollnum != m_collnum ) {
// log("rebalance: rebalancing %s", cr->m_coll);
// m_lastCollnum = m_collnum;
//}
// scan all rdbs in that collection
for ( ; m_rdbNum < g_process.m_numRdbs ; m_rdbNum++ ) {
// skip if not good
@ -193,8 +203,20 @@ void Rebalance::scanLoop ( ) {
if ( ! rdb->isInitialized() ) continue;
// skip statsdb, do not rebalance that
if ( rdb->m_rdbId == RDB_STATSDB ) continue;
// log it as well
if ( m_lastRdb != rdb ) {
log("rebalance: scanning %s [%s]",
cr->m_coll,rdb->m_dbname);
// only do this once per rdb/coll
m_lastRdb = rdb;
// reset key cursor as well!!!
KEYMIN ( m_nextKey , MAX_KEY_BYTES );
}
// scan it. returns true if done, false if blocked
if ( ! scanRdb ( ) ) return;
// note it
log("rebalance: did %lli recs",m_rebalanceCount);
m_rebalanceCount = 0;
}
// reset it for next colls
m_rdbNum = 0;
@ -204,11 +226,17 @@ void Rebalance::scanLoop ( ) {
m_isScanning = false;
m_needsRebalance = false;
// get rid of the 'F' flag in PageHosts.cpp
m_numForeignRecs = 0;
// save the file then, but with these stats:
m_collnum = 0;
m_rdbNum = 0;
KEYMIN(m_nextKey,MAX_KEY_BYTES);
log("rebalance: done rebalancing all collections. "
"Saving rebalance.txt.");
saveRebalanceFile();
}
@ -218,6 +246,8 @@ bool Rebalance::saveRebalanceFile ( ) {
// convert m_nextKey
binToHex ( (unsigned char *)&m_nextKey , MAX_KEY_BYTES , keyStr );
//log("db: saving rebalance.txt");
SafeBuf sb;
sb.safePrintf (
"myshard: %li\n"
@ -266,7 +296,7 @@ bool Rebalance::scanRdb ( ) {
&m_list ,
m_nextKey ,
m_endKey , // should be maxed!
1024 , // min rec sizes
100024 , // min rec sizes
true , // include tree?
false , // includeCache
false , // addToCache
@ -325,6 +355,11 @@ bool Rebalance::gotList ( ) {
m_posMetaList.reset();
m_negMetaList.reset();
if ( m_list.isEmpty() ) {
KEYSET ( m_nextKey , m_endKey , ks );
return true;
}
char *last = NULL;
for ( ; ! m_list.isExhausted() ; m_list.skipCurrentRec() ) {
@ -336,6 +371,8 @@ bool Rebalance::gotList ( ) {
last = rec;
// skip it if it belongs with us
if ( shard == myShard ) continue;
// count it
m_rebalanceCount++;
// otherwise, it does not!
//long recSize = m_list.getCurrentRecSize();
// copy the full key into "key" buf because might be compressed
@ -364,7 +401,7 @@ bool Rebalance::gotList ( ) {
// make key a delete
key[0] &= 0xfe;
// and store that negative key
m_posMetaList.safeMemcpy ( key , ks );
m_negMetaList.safeMemcpy ( key , ks );
}
// update nextkey

View File

@ -25,9 +25,12 @@ class Rebalance {
bool m_inRebalanceLoop;
long m_numForeignRecs;
long long m_rebalanceCount;
long m_rdbNum;
collnum_t m_collnum;
collnum_t m_lastCollnum;
class Rdb *m_lastRdb;
char m_nextKey[MAX_KEY_BYTES];
char m_endKey[MAX_KEY_BYTES];
bool m_needsRebalanceValid;

View File

@ -191,12 +191,14 @@ bool Titledb::verify ( char *coll ) {
// tally it up
g_rebalance.m_numForeignRecs += count - got;
log ("db: Out of first %li records in titledb, "
"only %li belong to our group.",count,got);
"only %li belong to our shard. c=%s",count,got,coll);
// exit if NONE, we probably got the wrong data
if ( count > 10 && got == 0 )
log("db: Are you sure you have the right "
"data in the right directory? "
"Exiting.");
"coll=%s "
"Exiting.",
coll);
// repeat with log
for ( list.resetListPtr() ; ! list.isExhausted() ;
list.skipCurrentRecord() ) {