fix bugs related to restarting a cored shard

during repair mode. need to be able to resume
repair/rebuild scan.
This commit is contained in:
Matt Wells 2015-01-06 11:28:55 -08:00
parent 19c92339b3
commit b693fe1530
2 changed files with 43 additions and 13 deletions

View File

@ -1287,21 +1287,37 @@ bool addMetaList ( char *p , UdpSlot *slot ) {
// return true;
//}
// an uninitialized secondary rdb? it will have a keysize
// if 0 if its never been intialized from the repair page
// of 0 if its never been intialized from the repair page.
// don't core any more, we probably restarted this shard
// and it needs to wait for host #0 to syncs its
// g_conf.m_repairingEnabled to '1' so it can start its
// Repair.cpp repairWrapper() loop and init the secondary
// rdbs so "rdb" here won't be NULL any more.
if ( rdb && rdb->m_ks <= 0 ) {
log("msg4: oops. got an rdbId key for a secondary "
"rdb and not in repair mode! fix xmldoc!");
char *xx=NULL;*xx=0;
time_t currentTime = getTime();
static time_t s_lastTime = 0;
if ( currentTime > s_lastTime + 10 ) {
s_lastTime = currentTime;
log("msg4: oops. got an rdbId key for a "
"secondary "
"rdb and not in repair mode. waiting to "
"be in repair mode.");
g_errno = ETRYAGAIN;
return false;
//char *xx=NULL;*xx=0;
}
}
if ( ! rdb ) {
if ( slot )
log("msg4: rdbId of %"INT32" unrecognized from "
"hostip=%s. "
log("msg4: rdbId of %"INT32" unrecognized "
"from hostip=%s. "
"dropping WHOLE request", (int32_t)rdbId,
iptoa(slot->m_ip));
else
log("msg4: rdbId of %"INT32" unrecognized. "
"dropping WHOLE request", (int32_t)rdbId);
g_errno = ETRYAGAIN;
return false;
// drop it for now!!
//if ( p < pend ) goto loop;
// all done
@ -1334,7 +1350,11 @@ bool addMetaList ( char *p , UdpSlot *slot ) {
// sanity check
if ( rdb->getKeySize() == 0 ) {
log("seems like a stray /e/repair-addsinprogress.dat file "
"rdbId=%"INT32". not in repair mode. dropping.",(int32_t)rdbId);
"rdbId=%"INT32". waiting to be in repair mode."
,(int32_t)rdbId);
//not in repair mode. dropping.",(int32_t)rdbId);
g_errno = ETRYAGAIN;
return false;
char *xx=NULL;*xx=0;
// drop it for now!!
p += recSize;

View File

@ -879,18 +879,23 @@ int32_t RdbBase::addFile ( int32_t id , bool isNew , int32_t mergeNum , int32_t
log(LOG_LOGIC,"db: addFile: fileId collided."); return -1; }
// shift everyone up if we need to fit this file in the middle somewher
if ( i < m_numFiles ) {
int32_t size = (m_numFiles-i)*sizeof(BigFile *);
memmove ( &m_files [i+1] , &m_files [i] , size);
memmove ( &m_fileIds[i+1] , &m_fileIds[i] , size);
memmove ( &m_fileIds2[i+1], &m_fileIds2[i], size);
memmove ( &m_maps [i+1] , &m_maps [i] , size);
int nn = m_numFiles-i;
memmove ( &m_files [i+1] , &m_files[i],nn*sizeof(BigFile *));
memmove ( &m_fileIds[i+1] , &m_fileIds[i],nn*sizeof(int32_t));
memmove ( &m_fileIds2[i+1], &m_fileIds2[i],nn*sizeof(int32_t));
memmove ( &m_maps [i+1] , &m_maps [i],nn*sizeof(RdbMap *));
}
// insert this file into position #i
m_fileIds [i] = id;
m_fileIds2 [i] = id2;
m_files [i] = f;
m_maps [i] = m;
// debug point
//log("map #0 is %s ptr=%llx (nf=%i)",
// m_maps[0]->getFilename(),(long long)m_maps[0],m_numFiles);
// to free up mem for diffbot's many collections...
cr = g_collectiondb.getRec ( m_collnum );
if ( ! isNew && cr && cr->m_isCustomCrawl )
@ -2446,8 +2451,13 @@ void RdbBase::closeMaps ( bool urgent ) {
}
void RdbBase::saveMaps ( bool useThread ) {
for ( int32_t i = 0 ; i < m_numFiles ; i++ )
for ( int32_t i = 0 ; i < m_numFiles ; i++ ) {
if ( ! m_maps[i] ) {
log("base: map for file #%i is null",i);
continue;
}
m_maps[i]->writeMap ( );
}
}
void RdbBase::verifyDiskPageCache ( ) {