open-source-search-engine/Rdb.cpp
Matt Wells d050fb81b5 fix rebuild code to rebuild spider status docs in index,
and to remove them from titledb if user has disabled
'index spider replies' in the spider controls to save disk.
made them off by default by now since they use some disk.
2015-06-16 16:29:26 -06:00

3436 lines
111 KiB
C++

#include "gb-include.h"
#include "Rdb.h"
//#include "Checksumdb.h"
#include "Clusterdb.h"
#include "Hostdb.h"
#include "Tagdb.h"
#include "Catdb.h"
#include "Indexdb.h"
#include "Posdb.h"
#include "Cachedb.h"
#include "Monitordb.h"
//#include "Datedb.h"
#include "Titledb.h"
#include "Spider.h"
//#include "Tfndb.h"
//#include "Sync.h"
#include "Spider.h"
#include "Repair.h"
#include "Process.h"
#include "Statsdb.h"
#include "Syncdb.h"
#include "Sections.h"
#include "Placedb.h"
#include "Spider.h"
#include "Revdb.h"
#include "hash.h"
//#include "CollectionRec.h"
void attemptMergeAll ( int fd , void *state ) ;
//static char s_tfndbHadOppKey ;
//static key_t s_tfndbOppKey ;
Rdb::Rdb ( ) {
m_lastReclaim = -1;
m_cacheLastTime = 0;
m_cacheLastTotal = 0LL;
//m_numBases = 0;
m_inAddList = false;
m_collectionlessBase = NULL;
m_initialized = false;
m_numMergesOut = 0;
//memset ( m_bases , 0 , sizeof(RdbBase *) * MAX_COLLS );
reset();
}
void Rdb::reset ( ) {
//if ( m_needsSave ) {
// log(LOG_LOGIC,"db: Trying to reset tree without saving.");
// char *xx = NULL; *xx = 0;
// return;
//}
/*
for ( int32_t i = 0 ; i < m_numBases ; i++ ) {
if ( ! m_bases[i] ) continue;
mdelete ( m_bases[i] , sizeof(RdbBase) , "Rdb Coll" );
delete (m_bases[i]);
m_bases[i] = NULL;
}
m_numBases = 0;
*/
if ( m_collectionlessBase ) {
RdbBase *base = m_collectionlessBase;
mdelete (base, sizeof(RdbBase), "Rdb Coll");
delete (base);
m_collectionlessBase = NULL;
}
// reset tree and cache
m_tree.reset();
m_buckets.reset();
m_mem.reset();
//m_cache.reset();
m_lastWrite = 0LL;
m_isClosing = false;
m_isClosed = false;
m_isSaving = false;
m_isReallyClosing = false;
m_registered = false;
m_lastTime = 0LL;
}
Rdb::~Rdb ( ) {
reset();
}
RdbBase *Rdb::getBase ( collnum_t collnum ) {
if ( m_isCollectionLess )
return m_collectionlessBase;
// RdbBase for statsdb, etc. resides in collrec #0 i guess
CollectionRec *cr = g_collectiondb.m_recs[collnum];
if ( ! cr ) return NULL;
// this might load the rdbbase on demand now
return cr->getBase ( m_rdbId ); // m_bases[(unsigned char)m_rdbId];
}
// used by Rdb::addBase1()
void Rdb::addBase ( collnum_t collnum , RdbBase *base ) {
// if we are collectionless, like g_statsdb.m_rdb or
// g_cachedb.m_rdb, etc.. shared by all collections essentially.
if ( m_isCollectionLess ) {
m_collectionlessBase = base;
return;
}
CollectionRec *cr = g_collectiondb.m_recs[collnum];
if ( ! cr ) return;
//if ( cr->m_bases[(unsigned char)m_rdbId] ) { char *xx=NULL;*xx=0; }
RdbBase *oldBase = cr->getBasePtr ( m_rdbId );
if ( oldBase ) { char *xx=NULL;*xx=0; }
//cr->m_bases[(unsigned char)m_rdbId] = base;
cr->setBasePtr ( m_rdbId , base );
log ( LOG_DEBUG,"db: added base to collrec "
"for rdb=%s rdbid=%"INT32" coll=%s collnum=%"INT32" "
"base=0x%"PTRFMT"",
m_dbname,(int32_t)m_rdbId,cr->m_coll,(int32_t)collnum,
(PTRTYPE)base);
}
// JAB: warning abatement
//static bool g_init = false;
bool Rdb::init ( char *dir ,
char *dbname ,
bool dedup ,
int32_t fixedDataSize ,
int32_t minToMerge ,
int32_t maxTreeMem ,
int32_t maxTreeNodes ,
bool isTreeBalanced ,
int32_t maxCacheMem ,
int32_t maxCacheNodes ,
bool useHalfKeys ,
bool loadCacheFromDisk ,
DiskPageCache *pc ,
bool isTitledb ,
bool preloadDiskPageCache ,
char keySize ,
bool biasDiskPageCache ,
bool isCollectionLess ) {
// reset all
reset();
// sanity
if ( ! dir ) { char *xx=NULL;*xx=0; }
// this is the working dir, all collection repositiories are subdirs
//m_dir.set ( dir );
// catdb, statsdb, accessdb, facebookdb, syncdb
m_isCollectionLess = isCollectionLess;
// save the dbname NULL terminated into m_dbname/m_dbnameLen
m_dbnameLen = gbstrlen ( dbname );
gbmemcpy ( m_dbname , dbname , m_dbnameLen );
m_dbname [ m_dbnameLen ] = '\0';
// store the other parameters for initializing each Rdb
m_dedup = dedup;
m_fixedDataSize = fixedDataSize;
m_maxTreeMem = maxTreeMem;
m_useHalfKeys = useHalfKeys;
m_pc = pc;
m_isTitledb = isTitledb;
m_preloadCache = preloadDiskPageCache;
m_biasDiskPageCache = biasDiskPageCache;
m_ks = keySize;
m_inDumpLoop = false;
// set our id
m_rdbId = getIdFromRdb ( this );
if ( m_rdbId <= 0 )
return log(LOG_LOGIC,"db: dbname of %s is invalied.",dbname);
// sanity check
if ( m_ks != getKeySizeFromRdbId(m_rdbId) ) { char*xx=NULL;*xx=0;}
// get page size
m_pageSize = GB_TFNDB_PAGE_SIZE;
if ( m_rdbId == RDB_INDEXDB ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB2_INDEXDB2 ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB_POSDB ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB2_POSDB2 ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
//if ( m_rdbId == RDB_DATEDB ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
//if ( m_rdbId == RDB2_DATEDB2 ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB_SECTIONDB ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB_PLACEDB ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB2_SECTIONDB2) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB2_PLACEDB2 ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB_TITLEDB ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB2_TITLEDB2 ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB_SPIDERDB ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB_DOLEDB ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB2_SPIDERDB2 ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB_CACHEDB ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB_SERPDB ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB_MONITORDB ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB_LINKDB ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB2_LINKDB2 ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB_REVDB ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
if ( m_rdbId == RDB2_REVDB2 ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
// let's obsolete this rec/list cache because using the
// disk page cache cleverly, is usually better than this,
// because this ignores newly added data (it is not realtime),
// and it really only saves us from having to intersect a
// bunch of indexdb/datedb lists.
/*
loadCacheFromDisk = false;
maxCacheMem = 0;
maxCacheNodes = 0;
// . set up our cache
// . we could be adding lists so keep fixedDataSize -1 for cache
if ( ! m_cache.init ( maxCacheMem ,
fixedDataSize ,
true , // support lists
maxCacheNodes ,
m_useHalfKeys ,
m_dbname ,
loadCacheFromDisk ,
m_ks , // cache key size
m_ks ) ) // data key size
return false;
*/
// we can't merge more than MAX_RDB_FILES files at a time
if ( minToMerge > MAX_RDB_FILES ) minToMerge = MAX_RDB_FILES;
m_minToMerge = minToMerge;
// . if we're in read only mode, don't bother with *ANY* trees
// . no, let's bother with them now because we are missing
// search results running the tmp cluster ('./gb tmpstart')
/*
if ( g_conf.m_readOnlyMode ) {
// make sure to set m_ks for m_tree
m_tree.m_ks = m_ks;
// add the single dummy collection for catdb
if ( g_catdb.getRdb() == this ) return g_catdb.addColl ( NULL);
//goto preload;
return true;
}
*/
//;
m_useTree = true;
if (//g_conf.m_useBuckets &&
(m_rdbId == RDB_INDEXDB ||
m_rdbId == RDB2_INDEXDB2 ||
m_rdbId == RDB_POSDB ||
m_rdbId == RDB2_POSDB2
//m_rdbId == RDB_DATEDB ||
//m_rdbId == RDB2_DATEDB2
//m_rdbId == RDB_LINKDB ||
//m_rdbId == RDB2_LINKDB2))
))
m_useTree = false;
sprintf(m_treeName,"tree-%s",m_dbname);
// . if maxTreeNodes is -1, means auto compute it
// . set tree to use our fixed data size
// . returns false and sets g_errno on error
if(m_useTree) {
int32_t rdbId = m_rdbId;
// statsdb is collectionless really so pass on to tree
if ( rdbId == RDB_STATSDB ) rdbId = -1;
if ( ! m_tree.set ( fixedDataSize ,
maxTreeNodes , // max # nodes in tree
isTreeBalanced ,
maxTreeMem ,
false , // own data?
m_treeName , // allocname
false , // dataInPtrs?
m_dbname ,
m_ks ,
// make useProtection true for debugging
false , // use protection?
false , // alowdups?
rdbId ) )
return false;
}
else {
if(treeFileExists()) {
m_tree.set ( fixedDataSize ,
maxTreeNodes , // max # nodes in tree
isTreeBalanced ,
maxTreeMem ,
false , // own data?
m_treeName , // allocname
false , // dataInPtrs?
m_dbname ,
m_ks ,
// make useProtection true for debugging
false , // use protection?
false , // alowdups?
m_rdbId );
}
// set this then
sprintf(m_treeName,"buckets-%s",m_dbname);
if( ! m_buckets.set ( fixedDataSize,
maxTreeMem,
false, //own data
m_treeName, // allocName
m_rdbId,
false, //data in ptrs
m_dbname,
m_ks,
false)) { //use protection
return false;
}
}
// now get how much mem the tree is using (not including stored recs)
int32_t dataMem;
if(m_useTree) dataMem = maxTreeMem - m_tree.getTreeOverhead();
else dataMem = maxTreeMem - m_buckets.getMemOccupied( );
sprintf(m_memName,"mem-%s",m_dbname);
//if ( fixedDataSize != 0 && ! m_mem.init ( &m_dump , dataMem ) )
if ( fixedDataSize != 0 && ! m_mem.init ( this , dataMem , m_ks ,
m_memName ) )
return log("db: Failed to initialize memory: %s.",
mstrerror(g_errno));
// load any saved tree
if ( ! loadTree ( ) ) return false;
// i prefer to put these into Statsdb::init() etc.
// rather than here because then if we disable an rdb we don't
// have to mess with code here as well:
// add the single dummy collection for catdb
//if ( g_catdb.getRdb() == this )
// return g_catdb.addColl ( NULL );
// we now call g_*db.addColl(NULL) for Statsdb::init(),
// Cachedb::init(), ... directly
//if ( g_statsdb.getRdb() == this )
// return g_statsdb.addColl ( NULL );
//if ( g_cachedb.getRdb() == this )
// return g_cachedb.addColl ( NULL );
//if ( g_serpdb.getRdb() == this )
// return g_serpdb.addColl ( NULL );
//else if ( g_accessdb.getRdb() == this )
// return g_accessdb.addColl ( NULL );
//else if ( g_facebookdb.getRdb() == this )
// return g_facebookdb.addColl ( NULL );
//if ( g_syncdb.getRdb() == this )
// return g_syncdb.addColl ( NULL );
// set this for use below
//*(int64_t *)m_gbcounteventsTermId =
// hash64n("gbeventcount")&TERMID_MASK;
m_initialized = true;
// success
return true;
}
// . when the PageRepair.cpp rebuilds our rdb for a particular collection
// we clear out the old data just for that collection and point to the newly
// rebuilt data
// . rdb2 is the rebuilt/secondary rdb we want to set this primary rdb to
// . rename, for safe keeping purposes, current old files to :
// trash/coll.mycoll.timestamp.indexdb0001.dat.part30 and
// trash/timestamp.indexdb-saved.dat
// . rename newly rebuilt files from indexdbRebuild0001.dat.part30 to
// indexdb0001.dat.part30 (just remove the "Rebuild" from the filename)
// . remove all recs for that coll from the tree AND cache because the rebuilt
// rdb is replacing the primary rdb for this collection
// . the rebuilt secondary tree should be empty! (force dumped)
// . reload the maps/files in the primary rdb after we remove "Rebuild" from
// their filenames
// . returns false and sets g_errno on error
bool Rdb::updateToRebuildFiles ( Rdb *rdb2 , char *coll ) {
// how come not in repair mode?
if ( ! g_repairMode ) { char *xx = NULL; *xx = 0; }
// make a dir in the trash subfolder to hold them
uint32_t t = (uint32_t)getTime();
char dstDir[256];
// make the trash dir if not there
sprintf ( dstDir , "%s/trash/" , g_hostdb.m_dir );
int32_t status = ::mkdir ( dstDir ,
S_IRUSR | S_IWUSR | S_IXUSR |
S_IRGRP | S_IWGRP | S_IXGRP |
S_IROTH | S_IXOTH ) ;
// we have to create it
sprintf ( dstDir , "%s/trash/rebuilt%"UINT32"/" , g_hostdb.m_dir , t );
status = ::mkdir ( dstDir ,
S_IRUSR | S_IWUSR | S_IXUSR |
S_IRGRP | S_IWGRP | S_IXGRP |
S_IROTH | S_IXOTH ) ;
if ( status && errno != EEXIST ) {
g_errno = errno;
return log("repair: Could not mkdir(%s): %s",dstDir,
mstrerror(errno));
}
// clear it in case it existed
g_errno = 0;
// if some things need to be saved, how did that happen?
// we saved everything before we entered repair mode and did not
// allow anything more to be added... and we do not allow any
// collections to be deleted via Collectiondb::deleteRec() when
// in repair mode... how could this happen?
//if ( m_needsSave ) { char *xx = NULL; *xx = 0; }
// delete old collection recs
CollectionRec *cr = g_collectiondb.getRec ( coll );
if ( ! cr ) return log("db: Exchange could not find coll, %s.",coll);
collnum_t collnum = cr->m_collnum;
RdbBase *base = getBase ( collnum );
if ( ! base )
return log("repair: Could not find old base for %s.",coll);
RdbBase *base2 = rdb2->getBase ( collnum );
if ( ! base2 )
return log("repair: Could not find new base for %s.",coll);
if ( rdb2->getNumUsedNodes() != 0 )
return log("repair: Recs present in rebuilt tree for db %s "
"and collection %s.",m_dbname,coll);
logf(LOG_INFO,"repair: Updating rdb %s for collection %s.",
m_dbname,coll);
// now MOVE the tree file on disk
char src[1024];
char dst[1024];
if(m_useTree) {
sprintf ( src , "%s/%s-saved.dat" , g_hostdb.m_dir , m_dbname );
sprintf ( dst , "%s/%s-saved.dat" , dstDir , m_dbname );
}
else {
sprintf ( src , "%s/%s-buckets-saved.dat", g_hostdb.m_dir , m_dbname );
sprintf ( dst , "%s/%s-buckets-saved.dat", dstDir , m_dbname );
}
char *structName = "tree";
if(!m_useTree) structName = "buckets";
char cmd[2048+32];
sprintf ( cmd , "mv %s %s",src,dst);
logf(LOG_INFO,"repair: Moving *-saved.dat %s. %s", structName, cmd);
errno = 0;
if ( gbsystem ( cmd ) == -1 )
return log("repair: Moving saved %s had error: %s.",
structName, mstrerror(errno));
log("repair: Moving saved %s: %s",structName, mstrerror(errno));
// now move our map and data files to the "trash" subdir, "dstDir"
logf(LOG_INFO,"repair: Moving old data and map files to trash.");
if ( ! base->moveToTrash(dstDir) )
return log("repair: Trashing new rdb for %s failed.",coll);
// . now rename the newly rebuilt files to our filenames
// . just removes the "Rebuild" from their filenames
logf(LOG_INFO,"repair: Renaming new data and map files.");
if ( ! base2->removeRebuildFromFilenames() )
return log("repair: Renaming old rdb for %s failed.",coll);
// reset the rdb bases (clears out files and maps from mem)
base->reset ();
base2->reset();
// reload the newly rebuilt files into the primary rdb
logf(LOG_INFO,"repair: Loading new data and map files.");
if ( ! base->setFiles() )
return log("repair: Failed to set new files for %s.",coll);
// allow rdb2->reset() to succeed without dumping core
rdb2->m_tree.m_needsSave = false;
rdb2->m_buckets.setNeedsSave(false);
// . make rdb2, the secondary rdb used for rebuilding, give up its mem
// . if we do another rebuild its ::init() will be called by PageRepair
rdb2->reset();
// clean out tree, newly rebuilt rdb does not have any data in tree
if ( m_useTree ) m_tree.delColl ( collnum );
else m_buckets.delColl( collnum );
// reset our cache
//m_cache.clear ( collnum );
// Success
return true;
}
// . returns false and sets g_errno on error, returns true on success
// . if this rdb is collectionless we set m_collectionlessBase in addBase()
bool Rdb::addRdbBase1 ( char *coll ) { // addColl()
collnum_t collnum = g_collectiondb.getCollnum ( coll );
return addRdbBase2 ( collnum );
}
bool Rdb::addRdbBase2 ( collnum_t collnum ) { // addColl2()
if ( ! m_initialized ) {
g_errno = EBADENGINEER;
return log("db: adding coll to uninitialized rdb!");
}
// catdb,statsbaccessdb,facebookdb,syncdb
if ( m_isCollectionLess )
collnum = (collnum_t)0;
// ensure no max breech
if ( collnum < (collnum_t) 0 ) {
g_errno = ENOBUFS;
int64_t maxColls = 1LL << (sizeof(collnum_t)*8);
return log("db: %s: Failed to add collection #%i. Would "
"breech maximum number of collections, %"INT64".",
m_dbname,collnum,maxColls);
}
CollectionRec *cr = NULL;
char *coll = NULL;
if ( ! m_isCollectionLess ) cr = g_collectiondb.m_recs[collnum];
if ( cr ) coll = cr->m_coll;
if ( m_isCollectionLess )
coll = "collectionless";
// . ensure no previous one exists
// . well it will be there but will be uninitialized, m_rdb will b NULL
RdbBase *base = NULL;
if ( cr ) base = cr->getBasePtr ( m_rdbId );
if ( base ) { // m_bases [ collnum ] ) {
g_errno = EBADENGINEER;
return log("db: Rdb for db \"%s\" and "
"collection \"%s\" (collnum %"INT32") exists.",
m_dbname,coll,(int32_t)collnum);
}
// make a new one
RdbBase *newColl = NULL;
try {newColl= new(RdbBase);}
catch(...){
g_errno = ENOMEM;
return log("db: %s: Failed to allocate %"INT32" bytes for "
"collection \"%s\".",
m_dbname,(int32_t)sizeof(Rdb),coll);
}
mnew(newColl, sizeof(RdbBase), "Rdb Coll");
//m_bases [ collnum ] = newColl;
base = newColl;
// add it to CollectionRec::m_bases[] base ptrs array
addBase ( collnum , newColl );
// . set CollectionRec::m_numPos/NegKeysInTree[rdbId]
// . these counts are now stored in the CollectionRec and not
// in RdbTree since the # of collections can be huge!
if ( m_useTree ) {
m_tree.setNumKeys ( cr );
}
RdbTree *tree = NULL;
RdbBuckets *buckets = NULL;
if(m_useTree) tree = &m_tree;
else buckets = &m_buckets;
// . init it
// . g_hostdb.m_dir should end in /
if ( ! base->init ( g_hostdb.m_dir, // m_dir.getDir() ,
m_dbname ,
m_dedup ,
m_fixedDataSize ,
m_minToMerge ,
m_useHalfKeys ,
m_ks ,
m_pageSize ,
coll ,
collnum ,
tree ,
buckets ,
&m_dump ,
this ,
m_pc ,
m_isTitledb ,
m_preloadCache ,
m_biasDiskPageCache ) ) {
logf(LOG_INFO,"db: %s: Failed to initialize db for "
"collection \"%s\".", m_dbname,coll);
//exit(-1);
return false;
}
//if ( (int32_t)collnum >= m_numBases ) m_numBases = (int32_t)collnum + 1;
// Success
return true;
}
bool Rdb::resetBase ( collnum_t collnum ) {
CollectionRec *cr = g_collectiondb.getRec(collnum);
if ( ! cr ) return true;
//RdbBase *base = cr->m_bases[(unsigned char)m_rdbId];
// get the ptr, don't use CollectionRec::getBase() so we do not swapin
RdbBase *base = cr->getBasePtr (m_rdbId);
if ( ! base ) return true;
base->reset();
return true;
}
bool Rdb::deleteAllRecs ( collnum_t collnum ) {
// remove from tree
if(m_useTree) m_tree.delColl ( collnum );
else m_buckets.delColl ( collnum );
// only for doledb now, because we unlink we do not move the files
// into the trash subdir and doledb is easily regenerated. i don't
// want to take the risk with other files.
if ( m_rdbId != RDB_DOLEDB ) { char *xx=NULL;*xx=0; }
CollectionRec *cr = g_collectiondb.getRec ( collnum );
// deleted from under us?
if ( ! cr ) {
log("rdb: deleteallrecs: cr is NULL");
return true;
}
//Rdbbase *base = cr->m_bases[(unsigned char)m_rdbId];
RdbBase *base = cr->getBase(m_rdbId);
if ( ! base ) return true;
// scan files in there
for ( int32_t i = 0 ; i < base->m_numFiles ; i++ ) {
BigFile *f = base->m_files[i];
// move to trash
char newdir[1024];
sprintf(newdir, "%strash/",g_hostdb.m_dir);
f->move ( newdir );
}
// nuke all the files
base->reset();
// reset rec counts
cr->m_numNegKeysInTree[RDB_DOLEDB] = 0;
cr->m_numPosKeysInTree[RDB_DOLEDB] = 0;
return true;
}
bool makeTrashDir() {
char trash[1024];
sprintf(trash, "%strash/",g_hostdb.m_dir);
if ( ::mkdir ( trash,
S_IRUSR | S_IWUSR | S_IXUSR |
S_IRGRP | S_IWGRP | S_IXGRP |
S_IROTH | S_IXOTH ) == -1 ) {
if ( errno != EEXIST ) {
log("dir: mkdir %s had error: %s",
trash,mstrerror(errno));
return false;
}
// clear it
errno = 0;
}
return true;
}
bool Rdb::deleteColl ( collnum_t collnum , collnum_t newCollnum ) {
//char *coll = g_collectiondb.m_recs[collnum]->m_coll;
// remove these collnums from tree
if(m_useTree) m_tree.delColl ( collnum );
else m_buckets.delColl ( collnum );
// . close all files, set m_numFiles to 0 in RdbBase
// . TODO: what about outstanding merge or dump operations?
// . it seems like we can't really recycle this too easily
// because reset it not resetting filenames or directory name?
// just nuke it and rebuild using addRdbBase2()...
RdbBase *oldBase = getBase ( collnum );
mdelete (oldBase, sizeof(RdbBase), "Rdb Coll");
delete (oldBase);
//base->reset( );
// NULL it out...
CollectionRec *oldcr = g_collectiondb.getRec(collnum);
//oldcr->m_bases[(unsigned char)m_rdbId] = NULL;
oldcr->setBasePtr ( m_rdbId , NULL );
char *coll = oldcr->m_coll;
char *msg = "deleted";
// if just resetting recycle base
if ( collnum != newCollnum ) {
addRdbBase2 ( newCollnum );
// make a new base now
//RdbBase *newBase = mnew
// new cr
//CollectionRec *newcr = g_collectiondb.getRec(newCollnum);
// update this as well
//base->m_collnum = newCollnum;
// and the array
//newcr->m_bases[(unsigned char)m_rdbId] = base;
msg = "moved";
}
log(LOG_DEBUG,"db: %s base from collrec "
"rdb=%s rdbid=%"INT32" coll=%s collnum=%"INT32" newcollnum=%"INT32"",
msg,m_dbname,(int32_t)m_rdbId,coll,(int32_t)collnum,
(int32_t)newCollnum);
// new dir. otherwise RdbDump will try to dump out the recs to
// the old dir and it will end up coring
//char tmp[1024];
//sprintf(tmp , "%scoll.%s.%"INT32"",g_hostdb.m_dir,coll,(int32_t)newCollnum );
//m_dir.set ( tmp );
// move the files into trash
// nuke it on disk
char oldname[1024];
sprintf(oldname, "%scoll.%s.%"INT32"/",g_hostdb.m_dir,coll,
(int32_t)collnum);
char newname[1024];
sprintf(newname, "%strash/coll.%s.%"INT32".%"INT64"/",g_hostdb.m_dir,coll,
(int32_t)collnum,gettimeofdayInMilliseconds());
//Dir d; d.set ( dname );
// ensure ./trash dir is there
makeTrashDir();
// move into that dir
::rename ( oldname , newname );
log ( LOG_DEBUG, "db: cleared data for coll \"%s\" (%"INT32") rdb=%s.",
coll,(int32_t)collnum ,getDbnameFromId(m_rdbId));
return true;
}
// returns false and sets g_errno on error, returns true on success
bool Rdb::delColl ( char *coll ) {
collnum_t collnum = g_collectiondb.getCollnum ( coll );
RdbBase *base = getBase ( collnum );
// ensure its there
if ( collnum < (collnum_t)0 || ! base ) { // m_bases [ collnum ] ) {
g_errno = EBADENGINEER;
return log("db: %s: Failed to delete collection #%i. Does "
"not exist.", m_dbname,collnum);
}
// move all files to trash and clear the tree/buckets
deleteColl ( collnum , collnum );
// remove these collnums from tree
//if(m_useTree) m_tree.delColl ( collnum );
//else m_buckets.delColl ( collnum );
// don't forget to save the tree to disk
//m_needsSave = true;
// and from cache, just clear everything out
//m_cache.clear ( collnum );
// decrement m_numBases if we need to
//while ( ! m_bases[m_numBases-1] ) m_numBases--;
return true;
}
static void doneSavingWrapper ( void *state );
static void closeSleepWrapper ( int fd , void *state );
// . returns false if blocked true otherwise
// . sets g_errno on error
// . CAUTION: only set urgent to true if we got a SIGSEGV or SIGPWR...
bool Rdb::close ( void *state , void (* callback)(void *state ), bool urgent ,
bool isReallyClosing ) {
// unregister in case already registered
if ( m_registered )
g_loop.unregisterSleepCallback (this,closeSleepWrapper);
// reset g_errno
g_errno = 0;
// return true if no RdbBases in m_bases[] to close
if ( getNumBases() <= 0 ) return true;
// return true if already closed
if ( m_isClosed ) return true;
// don't call more than once
if ( m_isSaving ) return true;
// update last write time so main.cpp doesn't keep calling us
m_lastWrite = gettimeofdayInMilliseconds();
// set the m_isClosing flag in case we're waiting for a dump.
// then, when the dump is done, it will come here again
m_closeState = state;
m_closeCallback = callback;
m_urgent = urgent;
m_isReallyClosing = isReallyClosing;
if ( m_isReallyClosing ) m_isClosing = true;
// . don't call more than once
// . really only for when isReallyClosing is false... just a quick save
m_isSaving = true;
// suspend any merge permanently (not just for this rdb), we're exiting
if ( m_isReallyClosing ) {
g_merge.suspendMerge();
g_merge2.suspendMerge();
}
// . allow dumps to complete unless we're urgent
// . if we're urgent, we'll end up with a half dumped file, which
// is ok now, since it should get its RdbMap auto-generated for it
// when we come back up again
if ( ! m_urgent && m_inDumpLoop ) { // m_dump.isDumping() ) {
m_isSaving = false;
char *tt = "save";
if ( m_isReallyClosing ) tt = "close";
return log ( LOG_INFO,"db: Cannot %s %s until dump finishes.",
tt,m_dbname);
}
// if a write thread is outstanding, and we exit now, we can end up
// freeing the buffer it is writing and it will core... and things
// won't be in sync with the map when it is saved below...
if ( m_isReallyClosing && g_merge.isMerging() &&
// if we cored, we are urgent and need to make sure we save even
// if we are merging this rdb...
! m_urgent &&
g_merge.m_rdbId == m_rdbId &&
( g_merge.m_numThreads || g_merge.m_dump.m_isDumping ) ) {
// do not spam this message
int64_t now = gettimeofdayInMilliseconds();
if ( now - m_lastTime >= 500 ) {
log(LOG_INFO,"db: Waiting for merge to finish last "
"write for %s.",m_dbname);
m_lastTime = now;
}
g_loop.registerSleepCallback (500,this,closeSleepWrapper);
m_registered = true;
// allow to be called again
m_isSaving = false;
return false;
}
if ( m_isReallyClosing && g_merge2.isMerging() &&
// if we cored, we are urgent and need to make sure we save even
// if we are merging this rdb...
! m_urgent &&
g_merge2.m_rdbId == m_rdbId &&
( g_merge2.m_numThreads || g_merge2.m_dump.m_isDumping ) ) {
// do not spam this message
int64_t now = gettimeofdayInMilliseconds();
if ( now - m_lastTime >= 500 ) {
log(LOG_INFO,"db: Waiting for merge to finish last "
"write for %s.",m_dbname);
m_lastTime = now;
}
g_loop.registerSleepCallback (500,this,closeSleepWrapper);
m_registered = true;
// allow to be called again
m_isSaving = false;
return false;
}
// if we were merging to a file and are being closed urgently
// save the map! Also save the maps of the files we were merging
// in case the got their heads chopped (RdbMap::chopHead()) which
// we do to save disk space while merging.
// try to save the cache, may not save
//if ( m_isReallyClosing&&m_cache.useDisk() ) m_cache.save ( m_dbname);
if ( m_isReallyClosing ) {
// now loop over bases
for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
//CollectionRec *cr = g_collectiondb.m_recs[i];
// there can be holes if one was deleted
//if ( ! cr ) continue;
// shut it down
RdbBase *base = getBase ( i );
//if ( m_bases[i] ) m_bases[i]->closeMaps ( m_urgent );
if ( base ) base->closeMaps ( m_urgent );
}
//for ( int32_t i = 0 ; i < m_numFiles ; i++ )
// // this won't write it if it doesn't need to
// if ( m_maps[i] ) m_maps[i]->close ( m_urgent );
}
// if TREE doesn't need save return
//if ( ! m_needsSave ) {
// m_isSaving=false;
// if ( m_isReallyClosing ) m_isClosed = true;
// return true;
//}
// HACK: this seems to get called 20x per second!! when merging
//return log ( 0 , "Rdb::close: waiting for merge to finish.");
// suspend any merge going on, can be resumed later,saves state to disk
//s_merge.suspendMerge();
// . if there are no nodes in the tree then don't dump it
// . NO! because we could have deleted all the recs and the old saved
// version is still on disk -- it needs to be overwritten!!
//if ( m_tree.getNumUsedNodes() <= 0 ) { m_isClosed=true; return true;}
// save it using a thread?
bool useThread ;
if ( m_urgent ) useThread = false;
else if ( m_isReallyClosing ) useThread = false;
else useThread = true ;
// create an rdb file name to save tree to
//char filename[256];
//sprintf(filename,"%s-saving.dat",m_dbname);
//m_saveFile.set ( getDir() , filename );
// log msg
//log (0,"Rdb: saving to %s",filename );
// close it
// open it up
//m_saveFile.open ( O_RDWR | O_CREAT ) ;
// . assume save not needed
// . but if data should come in while we are saving then we'll
// need to save again
//m_needsSave = false;
// . use the new way now
// . returns false if blocked, true otherwise
// . sets g_errno on error
if(m_useTree) {
if ( ! m_tree.fastSave ( getDir() ,
m_dbname , // &m_saveFile ,
useThread ,
this ,
doneSavingWrapper ) )
return false;
}
else {
if ( ! m_buckets.fastSave ( getDir() ,
useThread ,
this ,
doneSavingWrapper ) )
return false;
}
//log("Rdb::close: save FAILED");
// . dump tree into this file
// . only blocks if niceness is 0
/*
if ( ! m_dump.set ( &m_saveFile ,
&m_tree ,
NULL , // RdbMap
NULL , // RdbCache
1024*100 , // 100k write buf
false , // put keys in order? no!
m_dedup ,
0 , // niceness of dump
this , // state
doneSavingWrapper) ) return false;
*/
// we saved it w/o blocking OR we had an g_errno
doneSaving();
return true;
}
void closeSleepWrapper ( int fd , void *state ) {
Rdb *THIS = (Rdb *)state;
// sanity check
if ( ! THIS->m_isClosing ) { char *xx = NULL; *xx = 0; }
// continue closing, this returns false if blocked
if ( ! THIS->close ( THIS->m_closeState,
THIS->m_closeCallback ,
false ,
true ) ) return;
// otherwise, we call the callback
THIS->m_closeCallback ( THIS->m_closeState );
}
void doneSavingWrapper ( void *state ) {
Rdb *THIS = (Rdb *)state;
THIS->doneSaving();
// . call the callback if any
// . this let's PageMaster.cpp know when we're closed
if (THIS->m_closeCallback) THIS->m_closeCallback(THIS->m_closeState);
}
void Rdb::doneSaving ( ) {
// bail if g_errno was set
if ( g_errno ) {
log("db: Had error saving %s-saved.dat: %s.",
m_dbname,mstrerror(g_errno));
g_errno = 0;
//m_needsSave = true;
m_isSaving = false;
return;
}
// . let sync file know this rdb was saved to disk here
// . only append this if we are truly 100% sync'ed on disk
//if ( ! m_needsSave ) g_sync.addOp ( OP_CLOSE , &m_dummyFile , 0 );
// a temp fix
//if ( strstr ( m_saveFile.getFilename() , "saved" ) ) {
// m_needsSave = true;
// log("Rdb::doneSaving: %s is already saved!",
// m_saveFile.getFilename());
// return;
//}
// sanity
if ( m_dbname == NULL || m_dbname[0]=='\0' ) {
char *xx=NULL;*xx=0; }
// display any error, if any, otherwise prints "Success"
logf(LOG_INFO,"db: Successfully saved %s-saved.dat.", m_dbname);
// i moved the rename to within the thread
// create the rdb file name we dumped to: "saving"
//char filename[256];
//sprintf(filename,"%s-saved.dat",m_dbname);
//m_saveFile.rename ( filename );
// close up
//m_saveFile.close();
// mdw ---> file doesn't save right, seems like it keeps the same length as the old file...
// . we're now closed
// . keep m_isClosing set to true so no one can add data
if ( m_isReallyClosing ) m_isClosed = true;
// we're all caught up
//if ( ! g_errno ) m_needsSave = false;
// . only reset this rdb if m_urgent is false... will free memory
// . seems to be a bug in pthreads so we have to do this check now
//if ( ! m_urgent && m_isReallyClosing ) reset();
// call it again now
m_isSaving = false;
// let's reset our stuff to free the memory!
//reset();
// continue closing if we were waiting for this dump
//if ( m_isClosing ) close ( );
}
bool Rdb::isSavingTree ( ) {
if ( m_useTree ) return m_tree.m_isSaving;
return m_buckets.m_isSaving;
}
bool Rdb::saveTree ( bool useThread ) {
char *dbn = m_dbname;
if ( ! dbn ) dbn = "unknown";
if ( ! dbn[0] ) dbn = "unknown";
// note it
//if ( m_useTree && m_tree.m_needsSave )
// log("db: saving tree %s",dbn);
if ( ! m_useTree && m_buckets.needsSave() )
log("db: saving buckets %s",dbn);
// . if RdbTree::m_needsSave is false this will return true
// . if RdbTree::m_isSaving is true this will return false
// . returns false if blocked, true otherwise
// . sets g_errno on error
if(m_useTree) {
return m_tree.fastSave ( getDir() ,
m_dbname , // &m_saveFile ,
useThread ,
NULL , // state
NULL );// callback
}
else {
return m_buckets.fastSave ( getDir() ,
useThread ,
NULL , // state
NULL );// callback
}
}
bool Rdb::saveMaps ( bool useThread ) {
//for ( int32_t i = 0 ; i < m_numBases ; i++ )
// if ( m_bases[i] ) m_bases[i]->saveMaps ( useThread );
// now loop over bases
for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
CollectionRec *cr = g_collectiondb.m_recs[i];
if ( ! cr ) continue;
// if swapped out, this will be NULL, so skip it
RdbBase *base = cr->getBasePtr(m_rdbId);
// shut it down
//RdbBase *base = getBase(i);
//if ( m_bases[i] ) m_bases[i]->closeMaps ( m_urgent );
//if ( base ) base->closeMaps ( m_urgent );
if ( base ) base->saveMaps ( useThread );
}
return true;
}
//bool Rdb::saveCache ( bool useThread ) {
// if ( m_cache.useDisk() ) m_cache.save ( useThread );//m_dbname );
// return true;
//}
bool Rdb::treeFileExists ( ) {
char filename[256];
sprintf(filename,"%s-saved.dat",m_dbname);
BigFile file;
file.set ( getDir() , filename , NULL ); // getStripeDir() );
return file.doesExist() > 0;
}
// returns false and sets g_errno on error
bool Rdb::loadTree ( ) {
// get the filename of the saved tree
char filename[256];
sprintf(filename,"%s-saved.dat",m_dbname);
// set this to false
//m_needsSave = false;
// msg
//log (0,"Rdb::loadTree: loading %s",filename);
// set a BigFile to this filename
BigFile file;
char *dir = getDir();
file.set ( dir , filename , NULL ); // getStripeDir() );
bool treeExists = file.doesExist() > 0;
bool status = false ;
if ( treeExists ) {
// load the table with file named "THISDIR/saved"
status = m_tree.fastLoad ( &file , &m_mem ) ;
// we close it now instead of him
}
if(m_useTree) {
//if ( ! status ) {
//log("Rdb::loadTree: could not load tree fast.Trying way 1.");
// status = m_tree.oldLoad ( &file , &m_mem );
// m_needsSave = true;
//}
// this lost my freakin data after i reduced tfndbMaxTreeMem
// and was unable to load!!
//if ( ! status ) {
//log("Rdb::loadTree: could not load tree fast.Trying way 2.");
//status =m_dump.load ( this , m_fixedDataSize , &file , m_pc);
// m_needsSave = true;
//}
file.close();
if ( ! status && treeExists)
return log("db: Could not load saved tree.");
}
else {
if(!m_buckets.loadBuckets(m_dbname))
return log("db: Could not load saved buckets.");
int32_t numKeys = m_buckets.getNumKeys();
// log("db: Loaded %"INT32" recs from %s's buckets on disk.",
// numKeys, m_dbname);
if(!m_buckets.testAndRepair()) {
log("db: unrepairable buckets, "
"remove and restart.");
char *xx = NULL; *xx = 0;
}
if(treeExists) {
m_buckets.addTree(&m_tree);
if(m_buckets.getNumKeys() - numKeys > 0) {
log("db: Imported %"INT32" recs from %s's tree to "
"buckets.",
m_buckets.getNumKeys()-numKeys, m_dbname);
}
if(g_conf.m_readOnlyMode) {
m_buckets.setNeedsSave(false);
}
else {
char newFilename[256];
sprintf(newFilename,"%s-%"INT32".old",
filename, (int32_t)getTime());
bool usingThreads = g_conf.m_useThreads;
g_conf.m_useThreads = false;
file.rename(newFilename);
g_conf.m_useThreads = usingThreads;
m_tree.reset ( );
}
file.close();
}
}
return true;
}
static time_t s_lastTryTime = 0;
static void doneDumpingCollWrapper ( void *state ) ;
// . start dumping the tree
// . returns false and sets g_errno on error
bool Rdb::dumpTree ( int32_t niceness ) {
if ( m_useTree ) {
if (m_tree.getNumUsedNodes() <= 0 ) return true;
}
else if (m_buckets.getNumKeys() <= 0 ) return true;
// never dump indexdb if we are the wikipedia cluster
if ( g_conf.m_isWikipedia && m_rdbId == RDB_INDEXDB )
return true;
// never dump doledb any more. it's rdbtree only.
if ( m_rdbId == RDB_DOLEDB )
return true;
// if we are in a quickpoll do not initiate dump.
// we might have been called by handleRequest4 with a niceness of 0
// which was niceness converted from 1
if ( g_loop.m_inQuickPoll ) return true;
// sanity checks
if ( g_loop.m_inQuickPoll ) { char *xx=NULL;*xx=0; }
// bail if already dumping
//if ( m_dump.isDumping() ) return true;
if ( m_inDumpLoop ) return true;
// . if tree is saving do not dump it, that removes things from tree
// . i think this caused a problem messing of RdbMem before when
// both happened at once
if ( m_useTree) { if(m_tree.m_isSaving ) return true; }
else if(m_buckets.isSaving()) return true;
// . if Process is saving, don't start a dump
if ( g_process.m_mode == SAVE_MODE ) return true;
// if it has been less than 3 seconds since our last failed attempt
// do not try again to avoid flooding our log
if ( getTime() - s_lastTryTime < 3 ) return true;
// or bail if we are trying to dump titledb while titledb is being
// merged because we do not want merge to overwrite tfndb recs written
// by dump using RdbDump::updateTfndbLoop()
//if ( m_rdbId == RDB_TITLEDB && g_merge.isMerging() &&
// g_merge.m_rdbId == RDB_TITLEDB ) {
// s_lastTryTime = getTime();
// log(LOG_INFO,"db: Can not dump titledb while titledb is "
// "being merged.");
// return true;
//}
// or if in repair mode, (not full repair mode) do not mess with any
// files in any coll unless they are secondary rdbs...
// this might affect us even though we have spidering paused to
// rebuild one specific collection. the other collection spiders
// are still going on...
/*
if ( g_repair.isRepairActive() &&
//! g_repair.m_fullRebuild &&
//! g_repair.m_rebuildNoSplits &&
//! g_repair.m_removeBadPages &&
! ::isSecondaryRdb ( m_rdbId ) &&
m_rdbId != RDB_TAGDB )
return true;
*/
// do not dump if a tfndb merge is going on, because the tfndb will
// lose its page cache and all the "adding links" will hog up all our
// memory.
if ( g_merge2.isMerging() && g_merge2.m_rdbId == RDB_TFNDB ) {
s_lastTryTime = getTime();
log(LOG_INFO,"db: Can not dump while tfndb is being merged.");
return true;
}
// . do not dump tfndb if indexdb is dumping
// . i haven't tried this yet, but it might help
//if ( m_rdbId == RDB_TFNDB && g_indexdb.isDumping() ) {
// s_lastTryTime = getTime();
// log(LOG_INFO,"db: Can not dump tfndb while indexdb dumping.");
// return true;
//}
// don't dump tfndb
if ( m_rdbId == RDB2_TFNDB2 || m_rdbId == RDB_TFNDB ) {
log("db: not dumping tfndb");
return true;
}
// don't dump if not 90% full
if ( ! needsDump() ) {
log(LOG_INFO,
"db: %s tree not 90 percent full but dumping.",m_dbname);
//return true;
}
// reset g_errno -- don't forget!
g_errno = 0;
// get max number of files
int32_t max = MAX_RDB_FILES - 2;
// but less if titledb, because it uses a tfn
if ( m_isTitledb && max > 240 ) max = 240;
// . keep the number of files down
// . dont dump all the way up to the max, leave one open for merging
for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
CollectionRec *cr = g_collectiondb.m_recs[i];
if ( ! cr ) continue;
// if swapped out, this will be NULL, so skip it
RdbBase *base = cr->getBasePtr(m_rdbId);
//RdbBase *base = getBase(i);
if ( base && base->m_numFiles >= max ) {
base->attemptMerge (1,false);//niceness,forced?
g_errno = ETOOMANYFILES;
break;
}
}
// . wait for all unlinking and renaming activity to flush out
// . we do not want to dump to a filename in the middle of being
// unlinked
if ( g_errno || g_numThreads > 0 ) {
// update this so we don't try too much and flood the log
// with error messages from RdbDump.cpp calling log() and
// quickly kicking the log file over 2G which seems to
// get the process killed
s_lastTryTime = getTime();
// now log a message
if ( g_numThreads > 0 )
log(LOG_INFO,"db: Waiting for previous unlink/rename "
"operations to finish before dumping %s.",m_dbname);
else
log("db: Failed to dump %s: %s.",
m_dbname,mstrerror(g_errno));
return false;
}
// remember niceness for calling setDump()
m_niceness = niceness;
// . suspend any merge going on, saves state to disk
// . is resumed when dump is completed
// m_merge.suspendMerge();
// allocate enough memory for the map of this file
//int32_t fileSize = m_tree.getMemOccupiedForList();
// . this returns false and sets g_errno on error
// . we return false if g_errno was set
//if ( ! m_maps[n]->setMapSizeFromFileSize ( fileSize ) ) return false;
// with titledb we can dump in 5meg chunks w/o worrying about
// RdbTree::deleteList() being way slow
/*
int32_t numUsedNodes = m_tree.getNumUsedNodes();
int32_t totalOverhead = m_tree.getRecOverhead() * numUsedNodes;
int32_t recSizes = m_tree.getMemOccupied() - totalOverhead;
// add the header, key plus dataSize, back in
int32_t headerSize = sizeof(key_t);
if ( m_fixedDataSize == -1 ) headerSize += 4;
recSizes += headerSize * numUsedNodes;
// get the avg rec size when serialized for a dump
int32_t avgRecSize;
if ( numUsedNodes > 0 ) avgRecSize = recSizes / numUsedNodes;
else avgRecSize = 12;
// the main problem is here is that RdbTree::deleteList() is slow
// as a function of the number of nodes
int32_t bufSize = 17000 * avgRecSize;
//if ( bufSize > 5*1024*1024 ) bufSize = 5*1024*1024;
// seems like RdbTree::getList() takes 2+ seconds when getting a 5meg
// list of titlerecs... why?
if ( bufSize > 400*1024 ) bufSize = 400*1024;
if ( bufSize < 200*1024 ) bufSize = 200*1024;
*/
// ok, no longer need token to dump!!!
/*
// bail if already waiting for it
if ( m_waitingForTokenForDump ) return true;
// debug msg
log("Rdb: %s: getting token for dump", m_dbname);
// don't repeat
m_waitingForTokenForDump = true;
// . get token before dumping
// . returns true and sets g_errno on error
// . returns true if we always have the token (just one host in group)
// . returns false if blocks (the usual case)
// . higher priority requests always supercede lower ones
// . ensure we only call this once per dump we need otherwise,
// gotTokenForDumpWrapper() may be called multiple times
if ( ! g_msg35.getToken ( this , gotTokenForDumpWrapper,1) ) //priority
return true ;
// bitch if we got token because there was an error somewhere
if ( g_errno ) {
log("Rdb::dumpTree:getToken: %s",mstrerror(g_errno));
g_errno = 0 ;
}
return gotTokenForDump();
}
void gotTokenForDumpWrapper ( void *state ) {
Rdb *THIS = (Rdb *)state;
THIS->gotTokenForDump();
}
// returns false and sets g_errno on error
bool Rdb::gotTokenForDump ( ) {
// no longer waiting for it
m_waitingForTokenForDump = false;
*/
// debug msg
log(LOG_INFO,"db: Dumping %s to disk. nice=%"INT32"",m_dbname,niceness);
// record last dump time so main.cpp will not save us this period
m_lastWrite = gettimeofdayInMilliseconds();
// only try to fix once per dump session
int64_t start = m_lastWrite; //gettimeofdayInMilliseconds();
// do not do chain testing because that is too slow
if ( m_useTree && ! m_tree.checkTree ( false /* printMsgs?*/, false/*chain?*/) ) {
log("db: %s tree was corrupted in memory. Trying to fix. "
"Your memory is probably bad. Please replace it.",
m_dbname);
// if fix failed why even try to dump?
if ( ! m_tree.fixTree() ) {
// only try to dump every 3 seconds
s_lastTryTime = getTime();
return log("db: Could not fix in memory data for %s. "
"Abandoning dump.",m_dbname);
}
}
log(LOG_INFO,
"db: Checking validity of in memory data of %s before dumping, "
"took %"INT64" ms.",m_dbname,gettimeofdayInMilliseconds()-start);
////
//
// see what collnums are in the tree and just try those
//
////
CollectionRec *cr = NULL;
for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
cr = g_collectiondb.m_recs[i];
if ( ! cr ) continue;
// reset his tree count flag thing
cr->m_treeCount = 0;
}
if ( m_useTree ) {
// now scan the rdbtree and inc treecount where appropriate
for ( int32_t i = 0 ; i < m_tree.m_minUnusedNode ; i++ ) {
// skip node if parents is -2 (unoccupied)
if ( m_tree.m_parents[i] == -2 ) continue;
// get rec from tree collnum
cr = g_collectiondb.m_recs[m_tree.m_collnums[i]];
if ( cr ) cr->m_treeCount++;
}
}
else {
for(int32_t i = 0; i < m_buckets.m_numBuckets; i++) {
RdbBucket *b = m_buckets.m_buckets[i];
collnum_t cn = b->getCollnum();
int32_t nk = b->getNumKeys();
for ( int32_t j = 0 ; j < nk; j++ ) {
cr = g_collectiondb.m_recs[cn];
if ( cr ) cr->m_treeCount++;
}
}
}
// loop through collections, dump each one
m_dumpCollnum = (collnum_t)-1;
// clear this for dumpCollLoop()
g_errno = 0;
m_dumpErrno = 0;
m_fn = -1000;
// this returns false if blocked, which means we're ok, so we ret true
if ( ! dumpCollLoop ( ) ) return true;
// if it returns true with g_errno set, there was an error
if ( g_errno ) return false;
// otherwise, it completed without blocking
doneDumping();
return true;
}
// returns false if blocked, true otherwise
bool Rdb::dumpCollLoop ( ) {
loop:
// if no more, we're done...
if ( m_dumpCollnum >= getNumBases() ) return true;
// the only was g_errno can be set here is from a previous dump
// error?
if ( g_errno ) {
hadError:
// if swapped out, this will be NULL, so skip it
RdbBase *base = NULL;
CollectionRec *cr = NULL;
if ( m_dumpCollnum>=0 )
cr = g_collectiondb.m_recs[m_dumpCollnum];
if ( cr )
base = cr->getBasePtr(m_rdbId);
//RdbBase *base = getBase(m_dumpCollnum);
log("build: Error dumping collection: %s.",mstrerror(g_errno));
// . if we wrote nothing, remove the file
// . if coll was deleted under us, base will be NULL!
if ( base && (! base->m_files[m_fn]->doesExist() ||
base->m_files[m_fn]->getFileSize() <= 0) ) {
log("build: File %s is zero bytes, removing from "
"memory.",base->m_files[m_fn]->getFilename());
base->buryFiles ( m_fn , m_fn+1 );
}
// game over, man
doneDumping();
// update this so we don't try too much and flood the log
// with error messages
s_lastTryTime = getTime();
return true;
}
// advance for next round
m_dumpCollnum++;
// don't bother getting the base for all collections because
// we end up swapping them in
for ( ; m_dumpCollnum < getNumBases() ; m_dumpCollnum++ ) {
// collection rdbs like statsdb are ok to process
if ( m_isCollectionLess ) break;
// otherwise get the coll rec now
CollectionRec *cr = g_collectiondb.m_recs[m_dumpCollnum];
// skip if empty
if ( ! cr ) continue;
// skip if no recs in tree
if ( cr->m_treeCount == 0 ) continue;
// ok, it's good to dump
break;
}
// if no more, we're done...
if ( m_dumpCollnum >= getNumBases() ) return true;
// base is null if swapped out. skip it then. is that correct?
// probably not!
//RdbBase *base = cr->getBasePtr(m_rdbId);//m_dumpCollnum);
// swap it in for dumping purposes if we have to
// "cr" is NULL potentially for collectionless rdbs, like statsdb,
// do we can't involve that...
RdbBase *base = getBase(m_dumpCollnum);
// hwo can this happen? error swappingin?
if ( ! base ) {
log("rdb: dumpcollloop base was null for cn=%"INT32"",
(int32_t)m_dumpCollnum);
goto hadError;
}
// before we create the file, see if tree has anything for this coll
//key_t k; k.setMin();
if(m_useTree) {
char *k = KEYMIN();
int32_t nn = m_tree.getNextNode ( m_dumpCollnum , k );
if ( nn < 0 ) goto loop;
if ( m_tree.m_collnums[nn] != m_dumpCollnum ) goto loop;
}
else {
if(!m_buckets.collExists(m_dumpCollnum)) goto loop;
}
// . MDW ADDING A NEW FILE SHOULD BE IN RDBDUMP.CPP NOW... NO!
// . get the biggest fileId
int32_t id2 = -1;
if ( m_isTitledb ) {
//id2 = base->getAvailId2 ( );
// this is obsolete, make it always 000 now
id2 = 000;
// only allow 254 for the merge routine so we can merge into
// another file...
if ( id2 < 0 || id2 >= 254 )
return log(LOG_LOGIC,"db: rdb: Could not get "
"available secondary id for titledb: %s." ,
mstrerror(g_errno) );
}
// this file must not exist already, we are dumping the tree into it
m_fn = base->addNewFile ( id2 ) ;
if ( m_fn < 0 ) return log(LOG_LOGIC,"db: rdb: Failed to add new file "
"to dump %s: %s." ,
m_dbname,mstrerror(g_errno) );
log(LOG_INFO,"build: Dumping to %s/%s for coll \"%s\".",
base->m_files[m_fn]->m_dir,
base->m_files[m_fn]->getFilename() ,
g_collectiondb.getCollName ( m_dumpCollnum ) );
// . append it to "sync" state we have in memory
// . when host #0 sends a OP_SYNCTIME signal we dump to disk
//g_sync.addOp ( OP_OPEN , base->m_files[m_fn] , 0 );
// turn this shit off for now, it's STILL taking forever when dumping
// spiderdb -- like 2 secs sometimes!
//bufSize = 100*1024;
// . when it's getting a list from the tree almost everything is frozen
// . like 100ms sometimes, lower down to 25k buf size
//int32_t bufSize = 25*1024;
// what is the avg rec size?
int32_t numRecs;
int32_t avgSize;
if(m_useTree) {
numRecs = m_tree.getNumUsedNodes();
if ( numRecs <= 0 ) numRecs = 1;
avgSize = m_tree.getMemOccupiedForList() / numRecs;
}
else {
numRecs = m_buckets.getNumKeys();
avgSize = m_buckets.getRecSize();
}
// . it really depends on the rdb, for small rec rdbs 200k is too big
// because when getting an indexdb list from tree of 200k that's
// a lot more recs than for titledb!! by far.
// . 200k takes 17ms to get list and 37ms to delete it for indexdb
// on a 2.8Ghz pentium
//int32_t bufSize = 40*1024;
// . don't get more than 3000 recs from the tree because it gets slow
// . we'd like to write as much out as possible to reduce possible
// file interlacing when synchronous writes are enabled. RdbTree::
// getList() should really be sped up by doing the neighbor node
// thing. would help for adding lists, too, maybe.
int32_t bufSize = 300 * 1024;
int32_t bufSize2 = 3000 * avgSize ;
if ( bufSize2 < 20*1024 ) bufSize2 = 20*1024;
if ( bufSize2 < bufSize ) bufSize = bufSize2;
if(!m_useTree) bufSize *= 4; //buckets are much faster at getting lists
//if ( this == g_titledb.getRdb () ) bufSize = 300*1024;
// when not adding new links spiderdb typically consists of just
// negative recs, so it is like indexdb...
//if ( this == g_spiderdb.getRdb () ) bufSize = 20*1024;
// how big will file be? upper bound.
int64_t maxFileSize;
// . NOTE: this is NOT an upper bound, stuff can be added to the
// tree WHILE we are dumping. this causes a problem because
// the DiskPageCache, BigFile::m_pc, allocs mem when you call
// BigFile::open() based on "maxFileSize" so it can end up
// breaching its buffer! since this is somewhat rare i will
// just modify DiskPageCache.cpp to ignore breaches.
if(m_useTree) maxFileSize = m_tree.getMemOccupiedForList ();
else maxFileSize = m_buckets.getMemOccupied();
// sanity
if ( maxFileSize < 0 ) { char *xx=NULL;*xx=0; }
// because we are actively spidering the list we dump ends up
// being more, by like 20% or so, otherwise we do not make a
// big enough diskpagecache and it logs breach msgs... does not
// seem to happen with buckets based stuff... hmmm...
if ( m_useTree ) maxFileSize = ((int64_t)maxFileSize) * 120LL/100LL;
//if(m_niceness) g_loop.quickPoll(m_niceness,
//__PRETTY_FUNCTION__, __LINE__);
RdbBuckets *buckets = NULL;
RdbTree *tree = NULL;
if(m_useTree) tree = &m_tree;
else buckets = &m_buckets;
// . RdbDump will set the filename of the map we pass to this
// . RdbMap should dump itself out CLOSE!
// . it returns false if blocked, true otherwise & sets g_errno on err
// . but we only return false on error here
if ( ! m_dump.set ( base->m_collnum ,
base->m_files[m_fn] ,
id2 , // to set tfndb recs for titledb
m_isTitledb,// tdb2? this == g_titledb.getRdb() ,
buckets ,
tree ,
base->m_maps[m_fn], // RdbMap
NULL , // integrate into cache b4 delete
//&m_cache , // integrate into cache b4 delete
bufSize , // write buf size
true , // put keys in order? yes!
m_dedup , // dedup not used for this
m_niceness , // niceness of 1 will NOT block
this , // state
doneDumpingCollWrapper ,
m_useHalfKeys ,
0LL , // dst start offset
//0 , // prev last key
KEYMIN() , // prev last key
m_ks , // keySize
m_pc , // DiskPageCache ptr
maxFileSize ,
this )) {// for setting m_needsToSave
return false;
}
// error?
if ( g_errno ) {
log("rdb: error dumping = %s . coll deleted from under us?",
mstrerror(g_errno));
// shit, what to do here? this is causing our RdbMem
// to get corrupted!
// because if we end up continuing it calls doneDumping()
// and updates RdbMem! maybe set a permanent error then!
// and if that is there do not clear RdbMem!
m_dumpErrno = g_errno;
// for now core out
//char *xx=NULL;*xx=0;
}
// loop back up since we did not block
goto loop;
}
void doneDumpingCollWrapper ( void *state ) {
Rdb *THIS = (Rdb *)state;
// return if the loop blocked
if ( ! THIS->dumpCollLoop() ) return;
// otherwise, call big wrapper
THIS->doneDumping();
}
// Moved a lot of the logic originally here in Rdb::doneDumping into
// RdbDump.cpp::dumpTree()
void Rdb::doneDumping ( ) {
// msg
//log(LOG_INFO,"db: Done dumping %s to %s (#%"INT32"): %s.",
// m_dbname,m_files[n]->getFilename(),n,mstrerror(g_errno));
log(LOG_INFO,"db: Done dumping %s: %s.",m_dbname,
mstrerror(m_dumpErrno));
// give the token back so someone else can dump or merge
//g_msg35.releaseToken();
// free mem in the primary buffer
if ( ! m_dumpErrno ) m_mem.freeDumpedMem();
// . tell RdbDump it is done
// . we have to set this here otherwise RdbMem's memory ring buffer
// will think the dumping is no longer going on and use the primary
// memory for allocating new titleRecs and such and that is not good!
m_inDumpLoop = false;
// . on g_errno the dumped file will be removed from "sync" file and
// from m_files and m_maps
// . TODO: move this logic into RdbDump.cpp
//for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
// if ( m_bases[i] ) m_bases[i]->doneDumping();
//}
// if we're closing shop then return
if ( m_isClosing ) {
// continue closing, this returns false if blocked
if ( ! close ( m_closeState,
m_closeCallback ,
false ,
true ) ) return;
// otherwise, we call the callback
m_closeCallback ( m_closeState );
return;
}
// try merge for all, first one that needs it will do it, preventing
// the rest from doing it
// don't attempt merge if we're niceness 0
if ( !m_niceness ) return;
//attemptMerge ( 1 , false );
attemptMergeAll(0,NULL);
}
// this should be called every few seconds by the sleep callback, too
void attemptMergeAll ( int fd , void *state ) {
if ( state && g_conf.m_logDebugDb ) state = NULL;
//g_checksumdb.getRdb()->attemptMerge ( 1 , false , !state);
g_linkdb.getRdb()->attemptMerge ( 1 , false , !state);
//g_sectiondb.getRdb()->attemptMerge ( 1 , false , !state);
//g_indexdb.getRdb()->attemptMerge ( 1 , false , !state);
g_posdb.getRdb()->attemptMerge ( 1 , false , !state);
//g_datedb.getRdb()->attemptMerge ( 1 , false , !state);
g_titledb.getRdb()->attemptMerge ( 1 , false , !state);
//g_tfndb.getRdb()->attemptMerge ( 1 , false , !state);
g_tagdb.getRdb()->attemptMerge ( 1 , false , !state);
g_catdb.getRdb()->attemptMerge ( 1 , false , !state);
g_clusterdb.getRdb()->attemptMerge ( 1 , false , !state);
g_statsdb.getRdb()->attemptMerge ( 1 , false , !state);
g_syncdb.getRdb()->attemptMerge ( 1 , false , !state);
//g_placedb.getRdb()->attemptMerge ( 1 , false , !state);
g_doledb.getRdb()->attemptMerge ( 1 , false , !state);
//g_revdb.getRdb()->attemptMerge ( 1 , false , !state);
g_spiderdb.getRdb()->attemptMerge ( 1 , false , !state);
g_cachedb.getRdb()->attemptMerge ( 1 , false , !state);
g_serpdb.getRdb()->attemptMerge ( 1 , false , !state);
g_monitordb.getRdb()->attemptMerge ( 1 , false , !state);
// if we got a rebuild going on
g_spiderdb2.getRdb()->attemptMerge ( 1 , false , !state);
//g_checksumdb2.getRdb()->attemptMerge ( 1 , false , !state);
//g_indexdb2.getRdb()->attemptMerge ( 1 , false , !state);
g_posdb2.getRdb()->attemptMerge ( 1 , false , !state);
//g_datedb2.getRdb()->attemptMerge ( 1 , false , !state);
//g_sectiondb2.getRdb()->attemptMerge ( 1 , false , !state);
g_titledb2.getRdb()->attemptMerge ( 1 , false , !state);
//g_tfndb2.getRdb()->attemptMerge ( 1 , false , !state);
//g_tagdb2.getRdb()->attemptMerge ( 1 , false , !state);
//g_catdb2.getRdb()->attemptMerge ( 1 , false , !state);
g_clusterdb2.getRdb()->attemptMerge ( 1 , false , !state);
//g_statsdb2.getRdb()->attemptMerge ( 1 , false , !state);
g_linkdb2.getRdb()->attemptMerge ( 1 , false , !state);
//g_placedb2.getRdb()->attemptMerge ( 1 , false , !state);
//g_revdb2.getRdb()->attemptMerge ( 1 , false , !state);
}
// called by main.cpp
void Rdb::attemptMerge ( int32_t niceness , bool forced , bool doLog ) {
for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
CollectionRec *cr = g_collectiondb.m_recs[i];
if ( ! cr ) continue;
// if swapped out, this will be NULL, so skip it
RdbBase *base = cr->getBasePtr(m_rdbId);
//RdbBase *base = getBase(i);
if ( ! base ) continue;
base->attemptMerge(niceness,forced,doLog);
// stop if we got unlink/rename threads out from a merge
// in RdbBase.cpp beause the merge can't go until this is 0
// lest we have 2000 collections all trying to merge tagdb
// at the same time!!!! this happened once...
if ( g_numThreads > 0 ) break;
}
}
// . return false and set g_errno on error
// . TODO: speedup with m_tree.addSortedKeys() already partially written
bool Rdb::addList ( collnum_t collnum , RdbList *list,
int32_t niceness/*, bool isSorted*/ ) {
// pick it
if ( collnum < 0 || collnum > getNumBases() || ! getBase(collnum) ) {
g_errno = ENOCOLLREC;
return log("db: %s bad collnum of %i.",m_dbname,collnum);
}
// make sure list is reset
list->resetListPtr();
// if nothing then just return true
if ( list->isExhausted() ) return true;
// sanity check
if ( list->m_ks != m_ks ) { char *xx = NULL; *xx = 0; }
// . do not add data to indexdb if we're in urgent merge mode!
// . sender will wait and try again
// . this is killing us! we end up adding a bunch of recs to sectiondb
// over and over again, the same recs! since msg4 works like that...
//if ( m_bases[collnum]->m_mergeUrgent ) {
// g_errno = ETRYAGAIN;
// return false;
//}
// we now call getTimeGlobal() so we need to be in sync with host #0
if ( ! isClockInSync () ) {
// log("rdb: can not add data because clock not in sync with "
// "host #0. issuing try again reply.");
g_errno = ETRYAGAIN;
return false;
}
// if we are well into repair mode, level 2, do not add anything
// to spiderdb or titledb... that can mess up our titledb scan.
// we always rebuild tfndb, clusterdb, checksumdb and spiderdb
// but we often just repair titledb, indexdb and datedb because
// they are bigger. it may add to indexdb/datedb
if ( g_repair.isRepairActive() &&
// but only check for collection we are repairing/rebuilding
collnum == g_repair.m_collnum &&
//! g_repair.m_fullRebuild &&
//! g_conf.m_rebuildNoSplits &&
//! g_conf.m_removeBadPages &&
( m_rdbId == RDB_TITLEDB ||
//m_rdbId == RDB_SECTIONDB ||
m_rdbId == RDB_PLACEDB ||
m_rdbId == RDB_TFNDB ||
m_rdbId == RDB_INDEXDB ||
m_rdbId == RDB_POSDB ||
//m_rdbId == RDB_DATEDB ||
m_rdbId == RDB_CLUSTERDB ||
m_rdbId == RDB_LINKDB ||
//m_rdbId == RDB_CHECKSUMDB ||
m_rdbId == RDB_DOLEDB ||
m_rdbId == RDB_SPIDERDB ||
m_rdbId == RDB_REVDB ) ) {
// exception, spider status docs can be deleted from titledb
// if user turns off 'index spider replies' before doing
// the rebuild, when not rebuilding titledb.
if ( m_rdbId == RDB_TITLEDB &&
list->m_listSize == 12 )
goto exception;
// allow banning of sites still
//m_rdbId == RDB_TAGDB ) ) {
log("db: How did an add come in while in repair mode?"
" rdbId=%"INT32"",(int32_t)m_rdbId);
g_errno = EREPAIRING;
return false;
}
exception:
/*
if ( g_repair.isRepairActive() &&
g_repair.m_fullRebuild &&
collnum != g_repair.m_newCollnum &&
m_rdbId != RDB_TAGDB &&
m_rdbId != RDB_TURKDB ) {
log("db: How did an add come in while in full repair mode?"
" addCollnum=%"INT32" repairCollnum=%"INT32" db=%s",
(int32_t)collnum , (int32_t)g_repair.m_newCollnum ,
m_dbname );
g_errno = EREPAIRING;
return false;
}
*/
// if we are currently in a quickpoll, make sure we are not in
// RdbTree::getList(), because we could mess that loop up by adding
// or deleting a record into/from the tree now
if ( m_tree.m_gettingList ) {
g_errno = ETRYAGAIN;
return false;
}
// prevent double entries
if ( m_inAddList ) {
// i guess the msg1 handler makes it this far!
//log("db: msg1 add in an add.");
g_errno = ETRYAGAIN;
return false;
}
// lock it
m_inAddList = true;
//log("msg1: in addlist niceness=%"INT32"",niceness);
// . if we don't have enough room to store list, initiate a dump and
// return g_errno of ETRYAGAIN
// . otherwise, we're guaranteed to have room for this list
if ( ! hasRoom(list,niceness) ) {
// stop it
m_inAddList = false;
// if tree is empty, list will never fit!!!
if ( m_useTree && m_tree.getNumUsedNodes() <= 0 ) {
g_errno = ELISTTOOBIG;
return log("db: Tried to add a record that is "
"simply too big (%"INT32" bytes) to ever fit in "
"the memory "
"space for %s. Please increase the max "
"memory for %s in gb.conf.",
list->m_listSize,m_dbname,m_dbname);
}
// force initiate the dump now, but not if we are niceness 0
// because then we can't be interrupted with quickpoll!
if ( niceness != 0 ) dumpTree( 1/*niceness*/ );
// set g_errno after intiating the dump!
g_errno = ETRYAGAIN;
// return false since we didn't add the list
return false;
}
// . if we're adding sorted, dataless keys do it this fast way
// . this will also do positive/negative key annihilations for us
// . should return false and set g_errno on error
//if ( list->getFixedDataSize() == 0 && isSorted ) {
// return m_tree.addSortedKeys( (key_t *)list->getList() ,
// size / sizeof(key_t) );
// otherwise, add one record at a time
// unprotect tree from writes
if ( m_tree.m_useProtection ) m_tree.unprotect ( );
// set this for event interval records
m_nowGlobal = 0;//getTimeGlobal();
// int16_tcut this too
CollectionRec *cr = g_collectiondb.getRec(collnum);
m_sortByDateTablePtr = &cr->m_sortByDateTable;
loop:
//key_t key = list->getCurrentKey();
char key[MAX_KEY_BYTES];
list->getCurrentKey(key);
int32_t dataSize ;
char *data ;
// negative keys have no data
if ( ! KEYNEG(key) ) {
dataSize = list->getCurrentDataSize();
data = list->getCurrentData();
}
else {
dataSize = 0;
data = NULL;
}
/* DEBUG CODE
if ( m_rdbId == RDB_TITLEDB ) {
char *s = "adding";
if ( KEYNEG(key) ) s = "removing";
// get the titledb docid
int64_t d = g_titledb.getDocIdFromKey ( (key_t *)key );
logf(LOG_DEBUG,"tfndb: %s docid %"INT64" to titledb.",s,d);
}
*/
if ( ! addRecord ( collnum , key , data , dataSize, niceness ) ) {
// bitch
static int32_t s_last = 0;
int32_t now = time(NULL);
// . do not log this more than once per second to stop log spam
// . i think this can really lockup the cpu, too
if ( now - s_last != 0 )
log(LOG_INFO,"db: Had error adding data to %s: %s.",
m_dbname,mstrerror(g_errno));
s_last = now;
// force initiate the dump now if addRecord failed for no mem
if ( g_errno == ENOMEM ) {
// start dumping the tree to disk so we have room 4 add
if ( niceness != 0 ) dumpTree( 1/*niceness*/ );
// tell caller to try again later (1 second or so)
g_errno = ETRYAGAIN;
}
// reprotect tree from writes
if ( m_tree.m_useProtection ) m_tree.protect ( );
// stop it
m_inAddList = false;
// discontinue adding any more of the list
return false;
}
/* DEBUG CODE
// verify we added it right
if ( m_rdbId == RDB_TITLEDB ) { // && KEYPOS(key) ) {
// get the titledb docid
int64_t d = g_titledb.getDocIdFromKey ( (key_t *)key );
// check the tree for this docid
RdbTree *tt = g_titledb.m_rdb.getTree();
// make titledb keys
key_t startKey = g_titledb.makeFirstTitleRecKey ( d );
key_t endKey = g_titledb.makeLastTitleRecKey ( d );
int32_t n = tt->getNextNode ( collnum , startKey );
// sanity check -- make sure url is NULL terminated
//if ( ulen > 0 && st->m_url[st->m_ulen] ) { char*xx=NULL;*xx=0; }
// Tfndb::makeExtQuick masks the host hash with TFNDB_EXTMASK
uint32_t mask1 = (uint32_t)TFNDB_EXTMASK;
// but use the smalles of these
uint32_t mask2 = (uint32_t)TITLEDB_HOSTHASHMASK;
// pick the min
uint32_t min ;
if ( mask1 < mask2 ) min = mask1;
else min = mask2;
// if url provided, set "e"
//int32_t e; if ( ulen > 0 ) e = g_tfndb.makeExtQuick ( st->m_url ) & min;
// there should only be one match, one titlerec per docid!
char *sss = "did not find";
for ( ; n >= 0 ; n = tt->getNextNode ( n ) ) {
// break if collnum does not match. we exceeded our tree range.
if ( tt->getCollnum ( n ) != collnum ) break;
// get the key of this node
key_t k = *(key_t *)tt->getKey(n);
// if passed limit, break out, no match
if ( k > endKey ) break;
// get the extended hash (aka extHash, aka hostHash)
//int32_t e2 = g_titledb.getHostHash ( k ) & min;
// if a url was provided and not a docid, must match the exts
//if ( ulen > 0 && e != e2 ) continue;
// . if we matched a negative key, then ENOTFOUND
// . just break out here and enter the normal logic
// . it should load tfndb and find that it is not in tfndb
// because when you add a negative key to titledb in
// Rdb::addList, it adds a negative rec to tfndb immediately
if ( KEYNEG((char *)&k) ) continue;//break;
// we got it
sss = "found";
break;
}
if ( KEYPOS(key) )
logf(LOG_DEBUG,"tfndb: %s docid %"INT64" at node %"INT32"",sss,d,n);
}
*/
// on success, if it was a titledb delete, delete from tfndb too
/*
if ( m_rdbId == RDB_TITLEDB && KEYNEG(key) ) {
// make the tfndb record
int64_t docId = g_titledb.getDocIdFromKey ((key_t *) key );
int64_t uh48 = g_titledb.getUrlHash48 ((key_t *)key);
// . tfn=0 delete=true
// . use a tfn of 0 because RdbList::indexMerge_r() ignores
// the "tfn bits" when merging/comparing two tfndb keys
// (HACK)
key_t tk = g_tfndb.makeKey ( docId ,uh48,0, true );
// add this negative key to tfndb
Rdb *tdb = g_tfndb.getRdb();
// debug log
//logf(LOG_DEBUG,"tfndb: REMOVING tfndb docid %"INT64".",docId);
// if no room, bail. caller should dump tfndb and retry later.
if ( ! tdb->addRecord(collnum,(char *)&tk,NULL,0,niceness) ) {
// if it is OOM... dump it!
if ( g_errno == ENOMEM && niceness != 0 )
tdb->dumpTree(1);
// and tell the title add to try again!
g_errno = ETRYAGAIN;
// stop it
m_inAddList = false;
return false;
}
}
*/
QUICKPOLL((niceness));
// skip to next record, returns false on end of list
if ( list->skipCurrentRecord() ) goto loop;
// reprotect tree from writes
if ( m_tree.m_useProtection ) m_tree.protect ( );
// stop it
m_inAddList = false;
// if tree is >= 90% full dump it
if ( m_dump.isDumping() ) return true;
// return true if not ready for dump yet
if ( ! needsDump () ) return true;
// bad?
//log("rdb: dumptree niceness=%"INT32"",niceness);
// if dump started ok, return true
if ( niceness != 0 ) if ( dumpTree( 1/*niceness*/ ) ) return true;
// technically, since we added the record, it is not an error
g_errno = 0;
// . otherwise, bitch and return false with g_errno set
// . usually this is because it is waiting for an unlink/rename
// operation to complete... so make it LOG_INFO
log(LOG_INFO,"db: Failed to dump data to disk for %s.",m_dbname);
return true;
}
bool Rdb::needsDump ( ) {
if ( m_mem.is90PercentFull () ) return true;
if ( m_useTree) {if(m_tree.is90PercentFull() ) return true;}
else if(m_buckets.needsDump() ) return true;
// if adding to doledb and it has been > 1 day then force a dump
// so that all the negative keys in the tree annihilate with the
// keys on disk to make it easier to read a doledb list
if ( m_rdbId != RDB_DOLEDB ) return false;
// set this if not valid
//static int32_t s_lastDumpTryTime = -1;
//if ( s_lastDumpTryTime == -1 )
// s_lastDumpTryTime = getTimeLocal();
// try to dump doledb every 24 hrs
//int32_t now = getTimeLocal();
//if ( now - s_lastDumpTryTime >= 3600*24 ) return true;
// or dump doledb if a ton of negative recs...
if ( m_tree.getNumNegativeKeys() > 50000 ) return true;
// otherwise, no need to dump doledb just yet
return false;
}
bool Rdb::hasRoom ( RdbList *list , int32_t niceness ) {
// how many nodes will tree need?
int32_t numNodes = list->getNumRecs( );
if ( !m_useTree && !m_buckets.hasRoom(numNodes)) return false;
// how many nodes will tree need?
// how much space will RdbMem, m_mem, need?
//int32_t overhead = sizeof(key_t);
int32_t overhead = m_ks;
if ( list->getFixedDataSize() == -1 ) overhead += 4;
// how much mem will the data use?
int64_t dataSpace = list->getListSize() - (numNodes * overhead);
// does tree have room for these nodes?
if ( m_useTree && m_tree.getNumAvailNodes() < numNodes ) return false;
// if we are doledb, we are a tree-only rdb, so try to reclaim
// memory from deleted nodes. works by condesing the used memory.
if ( m_rdbId == RDB_DOLEDB &&
// if there is no room left in m_mem (RdbMem class)...
( m_mem.m_ptr2 - m_mem.m_ptr1 < dataSpace||g_conf.m_forceIt) &&
//m_mem.m_ptr1 - m_mem.m_mem > 1024 ) {
// and last time we tried this, if any, it reclaimed 1MB+
(m_lastReclaim>1024*1024||m_lastReclaim==-1||g_conf.m_forceIt)){
// reclaim the memory now. returns -1 and sets g_errno on error
int32_t reclaimed = reclaimMemFromDeletedTreeNodes(niceness);
// reset force flag
g_conf.m_forceIt = false;
// ignore errors for now
g_errno = 0;
// how much did we free up?
if ( reclaimed >= 0 )
m_lastReclaim = reclaimed;
}
// does m_mem have room for "dataSpace"?
if ( (int64_t)m_mem.getAvailMem() < dataSpace ) return false;
// otherwise, we do have room
return true;
}
// . NOTE: low bit should be set , only antiKeys (deletes) have low bit clear
// . returns false and sets g_errno on error, true otherwise
// . if RdbMem, m_mem, has no mem, sets g_errno to ETRYAGAIN and returns false
// because dump should complete soon and free up some mem
// . this overwrites dups
bool Rdb::addRecord ( collnum_t collnum,
//key_t &key , char *data , int32_t dataSize ){
char *key , char *data , int32_t dataSize,
int32_t niceness){
if ( ! getBase(collnum) ) {
g_errno = EBADENGINEER;
log(LOG_LOGIC,"db: addRecord: collection #%i is gone.",
collnum);
return false;
}
// skip if tree not writable
if ( ! g_process.m_powerIsOn ) {
// log it every 3 seconds
static int32_t s_last = 0;
int32_t now = getTime();
if ( now - s_last > 3 ) {
s_last = now;
log("db: addRecord: power is off. try again.");
}
g_errno = ETRYAGAIN;
return false;
}
// we can also use this logic to avoid adding to the waiting tree
// because Process.cpp locks all the trees up at once and unlocks
// them all at once as well. so since SpiderRequests are added to
// spiderdb and then alter the waiting tree, this statement should
// protect us.
if ( m_useTree ) {
if(! m_tree.m_isWritable ) {
g_errno = ETRYAGAIN;
return false;
}
}
else {
if( ! m_buckets.isWritable() ) {
g_errno = ETRYAGAIN;
return false;
}
}
// bail if we're closing
if ( m_isClosing ) { g_errno = ECLOSING; return false; }
// . if we are syncing, we might have to record the key so the sync
// loop ignores this key since it is new
// . actually we should not add any new data while a sync is going on
// because the sync may incorrectly override it
//if(g_sync.m_isSyncing ) { //&& g_sync.m_base == m_bases[collnum] ) {
// g_errno = ETRYAGAIN;
// return false;
//}
// sanity check
if ( KEYNEG(key) ) {
if ( (dataSize > 0 && data) ) {
log("db: Got data for a negative key.");
char *xx=NULL;*xx=0;
}
}
// sanity check
else if ( m_fixedDataSize >= 0 && dataSize != m_fixedDataSize ) {
g_errno = EBADENGINEER;
log(LOG_LOGIC,"db: addRecord: DataSize is %"INT32" should "
"be %"INT32"", dataSize,m_fixedDataSize );
char *xx=NULL;*xx=0;
return false;
}
// save orig
char *orig = NULL;
// copy the data before adding if we don't already own it
if ( data ) {
// save orig
orig = data;
// sanity check
if ( m_fixedDataSize == 0 && dataSize > 0 ) {
g_errno = EBADENGINEER;
log(LOG_LOGIC,"db: addRecord: Data is present. "
"Should not be");
return false;
}
data = (char *) m_mem.dupData ( key, data, dataSize, collnum);
if ( ! data ) {
g_errno = ETRYAGAIN;
return log("db: Could not allocate %"INT32" bytes to add "
"data to %s. Retrying.",dataSize,m_dbname);
}
}
// sanity check
//else if ( m_fixedDataSize != 0 ) {
// g_errno = EBADENGINEER;
// log(LOG_LOGIC,"db: addRecord: Data is required for rdb rec.");
// char *xx=NULL;*xx=0;
//}
// sanity check
//if ( m_fixedDataSize >= 0 && dataSize != m_fixedDataSize ) {
// g_errno = EBADENGINEER;
// log(LOG_LOGIC,"db: addRecord: DataSize is %"INT32" should "
// "be %"INT32"", dataSize,m_fixedDataSize );
// char *xx=NULL;*xx=0;
// return false;
//}
// . TODO: save this tree-walking state for adding the node!!!
// . TODO: use somethin like getNode(key,&lastNode)
// then addNode (lastNode,key,data,dataSize)
// int32_t lastNode;
// . #1) if we're adding a positive key, replace negative counterpart
// in the tree, because we'll override the positive rec it was
// deleting
// . #2) if we're adding a negative key, replace positive counterpart
// in the tree, but we must keep negative rec in tree in case
// the positive counterpart was overriding one on disk (as in #1)
//key_t oppKey = key ;
char oppKey[MAX_KEY_BYTES];
int32_t n = -1;
// if we are TFNDB, get the node independent of the
// tfnnum bits so we can overwrite it even though the key is
// technically a different key!! the tfn bits are in the lower 10
// bits so we have to mask those out!
/*
if ( m_rdbId == RDB_TFNDB ) {
char tfnKey[MAX_KEY_BYTES];
KEYSET(tfnKey,key,m_ks);
// zero out lower bits for lookup in tree
tfnKey[0] = 0x00; // 00000000
tfnKey[1] &= 0xfc; // 11111100
// get key after that
n = m_tree.getNextNode ( collnum , tfnKey );
// assume none
char *ok = NULL;
// get it
if ( n >= 0 ) {
// get uh48 being added
int64_t uh48 = g_tfndb.getUrlHash48((key_t *)key);
// get that key
ok = m_tree.getKey( n );
// see if it matches our uh48, if not then
// do not delete it!
if ( g_tfndb.getUrlHash48((key_t *)ok) != uh48 ) n= -1;
}
// set oppkey for checking dup below
if ( n >= 0 )
KEYSET ( oppKey , ok , m_ks );
}
else if ( m_useTree ) {
*/
if ( m_useTree ) {
// make the opposite key of "key"
KEYSET(oppKey,key,m_ks);
KEYXOR(oppKey,0x01);
// look it up
n = m_tree.getNode ( collnum , oppKey );
}
if ( m_rdbId == RDB_DOLEDB && g_conf.m_logDebugSpider ) {
// must be 96 bits
if ( m_ks != 12 ) { char *xx=NULL;*xx=0; }
// set this
key_t doleKey = *(key_t *)key;
// remove from g_spiderLoop.m_lockTable too!
if ( KEYNEG(key) ) {
// log debug
logf(LOG_DEBUG,"spider: removed doledb key "
"for pri=%"INT32" time=%"UINT32" uh48=%"UINT64"",
(int32_t)g_doledb.getPriority(&doleKey),
(uint32_t)g_doledb.getSpiderTime(&doleKey),
g_doledb.getUrlHash48(&doleKey));
}
else {
// what collection?
//SpiderColl *sc = g_spiderCache.getSpiderColl(collnum)
// do not overflow!
// log debug
SpiderRequest *sreq = (SpiderRequest *)data;
logf(LOG_DEBUG,"spider: added doledb key "
"for pri=%"INT32" time=%"UINT32" "
"uh48=%"UINT64" "
//"docid=%"INT64" "
"u=%s",
(int32_t)g_doledb.getPriority(&doleKey),
(uint32_t)g_doledb.getSpiderTime(&doleKey),
g_doledb.getUrlHash48(&doleKey),
//sreq->m_probDocId,
sreq->m_url);
}
}
/*
if ( m_rdbId == RDB_DOLEDB ) {
// must be 96 bits
if ( m_ks != 12 ) { char *xx=NULL;*xx=0; }
// set this
key_t doleKey = *(key_t *)key;
// remove from g_spiderLoop.m_lockTable too!
if ( KEYNEG(key) ) {
// make it positive
doleKey.n0 |= 0x01;
// remove from locktable
g_spiderLoop.m_lockTable.removeKey ( &doleKey );
// get spidercoll
SpiderColl *sc=g_spiderCache.getSpiderColl ( collnum );
// remove from dole tables too - no this is done
// below where we call addSpiderReply()
//sc->removeFromDoleTables ( &doleKey );
// "sc" can be NULL at start up when loading
// the addsinprogress.dat file
if ( sc ) {
// remove the local lock on this
HashTableX *ht = &g_spiderLoop.m_lockTable;
// int16_tcut
int64_t uh48=g_doledb.getUrlHash48(&doleKey);
// check tree
int32_t slot = ht->getSlot ( &uh48 );
// nuke it
if ( slot >= 0 ) ht->removeSlot ( slot );
// get coll
if ( g_conf.m_logDebugSpider)//sc->m_isTestCol
// log debug
logf(LOG_DEBUG,"spider: rdb: "
"got negative doledb "
"key for uh48=%"UINT64" - removing "
"spidering lock",
g_doledb.getUrlHash48(&doleKey));
}
// make it negative again
doleKey.n0 &= 0xfffffffffffffffeLL;
}
*/
// uncomment this if we have too many "gaps"!
/*
else {
// get the SpiderColl, "sc"
SpiderColl *sc = g_spiderCache.m_spiderColls[collnum];
// jump start "sc" if it is waiting for the sleep
// sleep wrapper to jump start it...
if ( sc && sc->m_didRound ) {
// reset it
sc->m_didRound = false;
// start doledb scan from beginning
sc->m_nextDoledbKey.setMin();
// jump start another dole loop before
// Spider.cpp's doneSleepingWrapperSL() does
sc->doleUrls();
}
}
*/
/*
}
*/
// debug testing
//if ( m_rdbId == RDB_CATDB ) {
// // show key
// log("rdb: adding key=%s to tree n=%"INT32"",KEYSTR(key,12) ,n);
//}
//jumpdown:
// if it exists then annihilate it
if ( n >= 0 ) {
// CAUTION: we should not annihilate with oppKey if oppKey may
// be in the process of being dumped to disk! This would
// render our annihilation useless and make undeletable data
if ( m_dump.isDumping() &&
//oppKey >= m_dump.getFirstKeyInQueue() &&
m_dump.m_lastKeyInQueue &&
KEYCMP(oppKey,m_dump.getFirstKeyInQueue(),m_ks)>=0 &&
//oppKey <= m_dump.getLastKeyInQueue () ) goto addIt;
KEYCMP(oppKey,m_dump.getLastKeyInQueue (),m_ks)<=0 )
goto addIt;
// BEFORE we delete it, save it. this is a special hack
// so we can UNDO this deleteNode() should the titledb rec
// add fail.
//if ( m_rdbId == RDB_TFNDB ) {
// s_tfndbHadOppKey = true;
// s_tfndbOppKey = *(key_t *)oppKey;
//}
// . otherwise, we can REPLACE oppKey
// . we NO LONGER annihilate with him. why?
// . freeData should be true, the tree doesn't own the data
// so it shouldn't free it really
m_tree.deleteNode3 ( n , true ); // false =freeData?);
// mark as changed
//if ( ! m_needsSave ) {
// m_needsSave = true;
// // add the "gotData" line to sync file
// g_sync.addOp ( OP_OPEN , &m_dummyFile , 0 );
//}
}
// if we have no files on disk for this db, don't bother
// preserving a a negative rec, it just wastes tree space
//if ( key.isNegativeKey () ) {
if ( KEYNEG(key) && m_useTree ) {
// . or if our rec size is 0 we don't need to keep???
// . is this going to be a problem?
// . TODO: how could this be problematic?
// . w/o this our IndexTable stuff doesn't work right
// . dup key overriding is allowed in an Rdb so you
// can't NOT add a negative rec because it
// collided with one positive key BECAUSE that
// positive key may have been overriding another
// positive or negative key on disk
// . well, i just reindexed some old pages, with
// the new code they re-add all terms to the index
// even if unchanged since last time in case the
// truncation limit has been increased. so when
// i banned the page and re-added again, the negative
// key annihilated with the 2nd positive key in
// the tree and left the original key on disk in
// tact resulting in a "docid not found" msg!
// so we really should add the negative now. thus
// i commented this out.
//if ( m_fixedDataSize == 0 ) return true;
// return if all data is in the tree
if ( getBase(collnum)->m_numFiles == 0 ) return true;
// . otherwise, assume we match a positive...
// . datedb special counting of events
// . check termid quickly
// . NO! now use the sortbydatetable
//if ( m_rdbId == RDB_DATEDB &&
// key[15] == m_gbcounteventsTermId[5] &&
// key[14] == m_gbcounteventsTermId[4] &&
// key[13] == m_gbcounteventsTermId[3] &&
// key[12] == m_gbcounteventsTermId[2] &&
// key[11] == m_gbcounteventsTermId[1] &&
// key[10] == m_gbcounteventsTermId[0] ) {
// // get coll rec
// CollectionRec *cr = g_collectiondb.m_recs[collnum];
// // count
// unsigned score = ((unsigned char *)key)[5];
// // complement
// score = 255 - score;
// // increment event count
// cr->m_numEventsOnHost -= score;
// // and all colls
// g_collectiondb.m_numEventsAllColls -= score;
//}
}
// if we did not find an oppKey and are tfndb, flag this
//if ( n<0 && m_rdbId == RDB_TFNDB ) s_tfndbHadOppKey = false;
addIt:
// mark as changed
//if ( ! m_needsSave ) {
// m_needsSave = true;
// // add the "gotData" line to sync file
// g_sync.addOp ( OP_OPEN , &m_dummyFile , 0 );
//}
// . if we are syncing, we might have to record the key so the sync
// loop ignores this key since it is new
// . actually we should not add any new data while a sync is going on
// because the sync may incorrectly override it
//if ( g_sync.m_isSyncing){ //&& g_sync.m_base == m_bases[collnum] ) {
// g_errno = ETRYAGAIN;
// return false;
//}
// . TODO: add using "lastNode" as a start node for the insertion point
// . should set g_errno if failed
// . caller should retry on g_errno of ETRYAGAIN or ENOMEM
int32_t tn;
if ( !m_useTree ) {
// debug indexdb
/*
if ( m_rdbId == RDB_INDEXDB ) {
int64_t termId = g_indexdb.getTermId ( (key_t *)key);
logf(LOG_DEBUG,"rdb: adding tid=%"UINT64" to indexb",
termId);
}
*/
if ( m_buckets.addNode ( collnum , key , data , dataSize )>=0){
// sanity test
//int64_t tid = g_datedb.getTermId((key128_t *)key);
//if ( tid == *(int64_t *)m_gbcounteventsTermId )
// log("ghey");
// . datedb special counting of events
// . check termid quickly
//if ( m_rdbId == RDB_DATEDB &&
// key[15] == m_gbcounteventsTermId[5] &&
// key[14] == m_gbcounteventsTermId[4] &&
// key[13] == m_gbcounteventsTermId[3] &&
// key[12] == m_gbcounteventsTermId[2] &&
// key[11] == m_gbcounteventsTermId[1] &&
// key[10] == m_gbcounteventsTermId[0] ) {
// // get coll rec
// CollectionRec *cr ;
// cr = g_collectiondb.m_recs[collnum];
// // count
// unsigned score = ((unsigned char *)key)[5];
// // complement
// score = 255 - score;
// // increment event count
// cr->m_numEventsOnHost += score;
// // and all colls
// g_collectiondb.m_numEventsAllColls += score;
//}
return true;
}
}
// . cancel any spider request that is a dup in the dupcache to save
// disk space
// . twins might have different dupcaches so they might have different
// dups, but it shouldn't be a big deal because they are dups!
if ( m_rdbId == RDB_SPIDERDB && ! KEYNEG(key) ) {
// . this will create it if spiders are on and its NULL
// . even if spiders are off we need to create it so
// that the request can adds its ip to the waitingTree
SpiderColl *sc = g_spiderCache.getSpiderColl(collnum);
// skip if not there
if ( ! sc ) return true;
SpiderRequest *sreq=(SpiderRequest *)(orig-4-sizeof(key128_t));
// is it really a request and not a SpiderReply?
char isReq = g_spiderdb.isSpiderRequest ( &sreq->m_key );
// skip if in dup cache. do NOT add to cache since
// addToWaitingTree() in Spider.cpp will do that when called
// from addSpiderRequest() below
if ( isReq && sc->isInDupCache ( sreq , false ) ) {
if ( g_conf.m_logDebugSpider )
log("spider: adding spider req %s is dup. "
"skipping.",sreq->m_url);
return true;
}
// if we are overflowing...
if ( isReq &&
! sreq->m_isAddUrl &&
! sreq->m_isPageReindex &&
! sreq->m_urlIsDocId &&
! sreq->m_forceDelete &&
sc->isFirstIpInOverflowList ( sreq->m_firstIp ) ) {
if ( g_conf.m_logDebugSpider )
log("spider: skipping for overflow url %s ",
sreq->m_url);
g_stats.m_totalOverflows++;
return true;
}
}
if ( m_useTree && (tn=m_tree.addNode (collnum,key,data,dataSize))>=0) {
// if adding to spiderdb, add to cache, too
if ( m_rdbId != RDB_SPIDERDB && m_rdbId != RDB_DOLEDB )
return true;
// or if negative key
if ( KEYNEG(key) ) return true;
// . this will create it if spiders are on and its NULL
// . even if spiders are off we need to create it so
// that the request can adds its ip to the waitingTree
SpiderColl *sc = g_spiderCache.getSpiderColl(collnum);
// skip if not there
if ( ! sc ) return true;
// if doing doledb...
if ( m_rdbId == RDB_DOLEDB ) {
int32_t pri = g_doledb.getPriority((key_t *)key);
// skip over corruption
if ( pri < 0 || pri >= MAX_SPIDER_PRIORITIES )
return true;
// if added positive key is before cursor, update curso
if ( KEYCMP((char *)key,
(char *)&sc->m_nextKeys[pri],
sizeof(key_t)) < 0 ) {
KEYSET((char *)&sc->m_nextKeys[pri],
(char *)key,
sizeof(key_t) );
// debug log
if ( g_conf.m_logDebugSpider )
log("spider: cursor reset pri=%"INT32" to "
"%s",
pri,KEYSTR(key,12));
}
// that's it for doledb mods
return true;
}
// . ok, now add that reply to the cache
// . g_now is in milliseconds!
//int32_t nowGlobal = localToGlobalTimeSeconds ( g_now/1000 );
//int32_t nowGlobal = getTimeGlobal();
// assume this is the rec (4 byte dataSize,spiderdb key is
// now 16 bytes)
SpiderRequest *sreq=(SpiderRequest *)(orig-4-sizeof(key128_t));
// is it really a request and not a SpiderReply?
char isReq = g_spiderdb.isSpiderRequest ( &sreq->m_key );
// add the request
if ( isReq ) {
// log that. why isn't this undoling always
if ( g_conf.m_logDebugSpider )
logf(LOG_DEBUG,"spider: rdb: added spider "
"request to spiderdb rdb tree "
"addnode=%"INT32" "
"request for uh48=%"UINT64" prntdocid=%"UINT64" "
"firstIp=%s spiderdbkey=%s",
tn,
sreq->getUrlHash48(),
sreq->getParentDocId(),
iptoa(sreq->m_firstIp),
KEYSTR((char *)&sreq->m_key,
sizeof(key128_t)));
// false means to NOT call evaluateAllRequests()
// because we call it below. the reason we do this
// is because it does not always get called
// in addSpiderRequest(), like if its a dup and
// gets "nuked". (removed callEval arg since not
// really needed)
sc->addSpiderRequest ( sreq, g_now );
}
// otherwise repl
else {
// int16_tcut - cast it to reply
SpiderReply *rr = (SpiderReply *)sreq;
// log that. why isn't this undoling always
if ( g_conf.m_logDebugSpider )
logf(LOG_DEBUG,"rdb: rdb: got spider reply"
" for uh48=%"UINT64"",rr->getUrlHash48());
// add the reply
sc->addSpiderReply(rr);
// don't actually add it if "fake". i.e. if it
// was an internal error of some sort... this will
// make it try over and over again i guess...
// no because we need some kinda reply so that gb knows
// the pagereindex docid-based spider requests are done,
// at least for now, because the replies were not being
// added for now. just for internal errors at least...
// we were not adding spider replies to the page reindexes
// as they completed and when i tried to rerun it
// the title recs were not found since they were deleted,
// so we gotta add the replies now.
int32_t indexCode = rr->m_errCode;
if ( //indexCode == EINTERNALERROR ||
indexCode == EABANDONED ||
indexCode == EHITCRAWLLIMIT ||
indexCode == EHITPROCESSLIMIT ) {
log("rdb: not adding spiderreply to rdb "
"because "
"it was an internal error for uh48=%"UINT64" "
"errCode = %s",
rr->getUrlHash48(),
mstrerror(indexCode));
m_tree.deleteNode3(tn,false);
}
}
// clear errors from adding to SpiderCache
g_errno = 0;
// all done
return true;
}
// . rollback the add to tfndb if the titledb add failed
// . MDW: likewise, this is not needed
/*
if ( m_rdbId == RDB_TITLEDB ) {
// get the tree directly
RdbTree *tree = g_tfndb.getRdb()->getTree();
// remove the key we added
int32_t n = tree->deleteNode ( collnum, (char *)&uk , true ) ;
// sanity check
if ( n < 0 ) {
log("db: Did not find tfndb key to rollback.");
char *xx = NULL; *xx = 0;
}
// did we have an "oppKey"?
if ( s_tfndbHadOppKey ) {
// add it back
int32_t n = tree->addNode(collnum,(char *)&s_tfndbOppKey);
// see if this can ever fail, i do not see why it
// would since we deleted it above
if ( n < 0 ) {
log("db: Failed to re-add tfndb key.");
char *xx = NULL; *xx = 0;
}
}
}
*/
// enhance the error message
char *ss ="";
if ( m_tree.m_isSaving ) ss = " Tree is saving.";
if ( !m_useTree && m_buckets.isSaving() ) ss = " Buckets are saving.";
// return ETRYAGAIN if out of memory, this should tell
// addList to call the dump routine
//if ( g_errno == ENOMEM ) g_errno = ETRYAGAIN;
// log the error
//g_errno = EBADENGINEER;
return log(LOG_INFO,"db: Had error adding data to %s: %s.%s",
m_dbname,mstrerror(g_errno),ss);
// if we flubbed then free the data, if any
//if ( doCopy && data ) mfree ( data , dataSize ,"Rdb");
//return false;
}
// . use the maps and tree to estimate the size of this list w/o hitting disk
// . used by Indexdb.cpp to get the size of a list for IDF weighting purposes
int64_t Rdb::getListSize ( collnum_t collnum,
//key_t startKey , key_t endKey , key_t *max ,
char *startKey , char *endKey , char *max ,
int64_t oldTruncationLimit ) {
// pick it
//collnum_t collnum = g_collectiondb.getCollnum ( coll );
if ( collnum < 0 || collnum > getNumBases() || ! getBase(collnum) )
return log("db: %s bad collnum of %i",m_dbname,collnum);
return getBase(collnum)->getListSize(startKey,endKey,max,
oldTruncationLimit);
}
int64_t Rdb::getNumGlobalRecs ( ) {
return getNumTotalRecs() * g_hostdb.m_numShards;//Groups;
}
// . return number of positive records - negative records
int64_t Rdb::getNumTotalRecs ( bool useCache ) {
// are we catdb or statsdb? then we have no associated collections
// because we are used globally, by all collections
if ( m_isCollectionLess )
return m_collectionlessBase->getNumTotalRecs();
// this gets slammed w/ too many collections so use a cache...
//if ( g_collectiondb.m_numRecsUsed > 10 ) {
int32_t now = 0;
if ( useCache ) {
now = getTimeLocal();
if ( now - m_cacheLastTime == 0 )
return m_cacheLastTotal;
}
// same as num recs
int32_t nb = getNumBases();
int64_t total = 0LL;
//return 0; // too many collections!!
for ( int32_t i = 0 ; i < nb ; i++ ) {
CollectionRec *cr = g_collectiondb.m_recs[i];
if ( ! cr ) continue;
// if swapped out, this will be NULL, so skip it
RdbBase *base = cr->getBasePtr(m_rdbId);
if ( ! base ) continue;
total += base->getNumTotalRecs();
}
// . add in the btree
// . TODO: count negative and positive recs in the b-tree
//total += m_tree.getNumPositiveKeys();
//total -= m_tree.getNumNegativeKeys();
if ( now ) {
m_cacheLastTime = now;
m_cacheLastTotal = total;
}
return total;
}
int64_t Rdb::getCollNumTotalRecs ( collnum_t collnum ) {
if ( collnum < 0 ) return 0;
CollectionRec *cr = g_collectiondb.m_recs[collnum];
if ( ! cr ) return 0;
// if swapped out, this will be NULL, so skip it
RdbBase *base = cr->getBasePtr(m_rdbId);
if ( ! base ) {
log("rdb: getcollnumtotalrecs: base swapped out");
return 0;
}
return base->getNumTotalRecs();
}
// . how much mem is alloced for all of our maps?
// . we have one map per file
int64_t Rdb::getMapMemAlloced () {
int64_t total = 0;
for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
// skip null base if swapped out
CollectionRec *cr = g_collectiondb.m_recs[i];
if ( ! cr ) return true;
RdbBase *base = cr->getBasePtr(m_rdbId);
//RdbBase *base = getBase(i);
if ( ! base ) continue;
total += base->getMapMemAlloced();
}
return total;
}
// sum of all parts of all big files
int32_t Rdb::getNumSmallFiles ( ) {
int32_t total = 0;
for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
// skip null base if swapped out
CollectionRec *cr = g_collectiondb.m_recs[i];
if ( ! cr ) return true;
RdbBase *base = cr->getBasePtr(m_rdbId);
//RdbBase *base = getBase(i);
if ( ! base ) continue;
total += base->getNumSmallFiles();
}
return total;
}
// sum of all parts of all big files
int32_t Rdb::getNumFiles ( ) {
int32_t total = 0;
for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
CollectionRec *cr = g_collectiondb.m_recs[i];
if ( ! cr ) continue;
// if swapped out, this will be NULL, so skip it
RdbBase *base = cr->getBasePtr(m_rdbId);
//RdbBase *base = getBase(i);
if ( ! base ) continue;
total += base->getNumFiles();
}
return total;
}
int64_t Rdb::getDiskSpaceUsed ( ) {
int64_t total = 0;
for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
CollectionRec *cr = g_collectiondb.m_recs[i];
if ( ! cr ) continue;
// if swapped out, this will be NULL, so skip it
RdbBase *base = cr->getBasePtr(m_rdbId);
//RdbBase *base = getBase(i);
if ( ! base ) continue;
total += base->getDiskSpaceUsed();
}
return total;
}
bool Rdb::isMerging ( ) {
// use this for speed
return (bool)m_numMergesOut;
for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
CollectionRec *cr = g_collectiondb.m_recs[i];
if ( ! cr ) continue;
// if swapped out, this will be NULL, so skip it
RdbBase *base = cr->getBasePtr(m_rdbId);
//RdbBase *base = getBase(i);
if ( ! base ) continue;
if ( base->isMerging() ) return true;
}
return false;
}
Rdb *s_table9 [ 50 ];
// maps an rdbId to an Rdb
Rdb *getRdbFromId ( uint8_t rdbId ) {
static bool s_init = false;
if ( ! s_init ) {
s_init = true;
memset ( s_table9 , 0 , 50 * 4 );
s_table9 [ RDB_TAGDB ] = g_tagdb.getRdb();
s_table9 [ RDB_INDEXDB ] = g_indexdb.getRdb();
s_table9 [ RDB_POSDB ] = g_posdb.getRdb();
s_table9 [ RDB_TITLEDB ] = g_titledb.getRdb();
s_table9 [ RDB_SECTIONDB ] = g_sectiondb.getRdb();
s_table9 [ RDB_PLACEDB ] = g_placedb.getRdb();
s_table9 [ RDB_SYNCDB ] = g_syncdb.getRdb();
s_table9 [ RDB_SPIDERDB ] = g_spiderdb.getRdb();
s_table9 [ RDB_DOLEDB ] = g_doledb.getRdb();
//s_table9 [ RDB_TFNDB ] = g_tfndb.getRdb();
s_table9 [ RDB_CLUSTERDB ] = g_clusterdb.getRdb();
s_table9 [ RDB_CATDB ] = g_catdb.getRdb();
//s_table9 [ RDB_DATEDB ] = g_datedb.getRdb();
s_table9 [ RDB_LINKDB ] = g_linkdb.getRdb();
s_table9 [ RDB_CACHEDB ] = g_cachedb.getRdb();
s_table9 [ RDB_SERPDB ] = g_serpdb.getRdb();
s_table9 [ RDB_MONITORDB ] = g_monitordb.getRdb();
s_table9 [ RDB_STATSDB ] = g_statsdb.getRdb();
s_table9 [ RDB_REVDB ] = g_revdb.getRdb();
//s_table9 [ RDB_FAKEDB ] = NULL;
s_table9 [ RDB_PARMDB ] = NULL;
s_table9 [ RDB2_INDEXDB2 ] = g_indexdb2.getRdb();
s_table9 [ RDB2_POSDB2 ] = g_posdb2.getRdb();
s_table9 [ RDB2_TITLEDB2 ] = g_titledb2.getRdb();
s_table9 [ RDB2_SECTIONDB2 ] = g_sectiondb2.getRdb();
s_table9 [ RDB2_PLACEDB2 ] = g_placedb2.getRdb();
s_table9 [ RDB2_SPIDERDB2 ] = g_spiderdb2.getRdb();
//s_table9 [ RDB2_TFNDB2 ] = g_tfndb2.getRdb();
s_table9 [ RDB2_CLUSTERDB2 ] = g_clusterdb2.getRdb();
//s_table9 [ RDB2_DATEDB2 ] = g_datedb2.getRdb();
s_table9 [ RDB2_LINKDB2 ] = g_linkdb2.getRdb();
s_table9 [ RDB2_REVDB2 ] = g_revdb2.getRdb();
s_table9 [ RDB2_TAGDB2 ] = g_tagdb2.getRdb();
}
if ( rdbId >= RDB_END ) return NULL;
return s_table9 [ rdbId ];
}
// the opposite of the above
char getIdFromRdb ( Rdb *rdb ) {
if ( rdb == g_tagdb.getRdb () ) return RDB_TAGDB;
if ( rdb == g_catdb.getRdb () ) return RDB_CATDB;
if ( rdb == g_indexdb.getRdb () ) return RDB_INDEXDB;
if ( rdb == g_posdb.getRdb () ) return RDB_POSDB;
//if ( rdb == g_datedb.getRdb () ) return RDB_DATEDB;
if ( rdb == g_titledb.getRdb () ) return RDB_TITLEDB;
if ( rdb == g_sectiondb.getRdb () ) return RDB_SECTIONDB;
if ( rdb == g_placedb.getRdb () ) return RDB_PLACEDB;
//if ( rdb == g_checksumdb.getRdb() ) return RDB_CHECKSUMDB;
if ( rdb == g_spiderdb.getRdb () ) return RDB_SPIDERDB;
if ( rdb == g_doledb.getRdb () ) return RDB_DOLEDB;
//if ( rdb == g_tfndb.getRdb () ) return RDB_TFNDB;
if ( rdb == g_clusterdb.getRdb () ) return RDB_CLUSTERDB;
if ( rdb == g_statsdb.getRdb () ) return RDB_STATSDB;
if ( rdb == g_linkdb.getRdb () ) return RDB_LINKDB;
if ( rdb == g_cachedb.getRdb () ) return RDB_CACHEDB;
if ( rdb == g_serpdb.getRdb () ) return RDB_SERPDB;
if ( rdb == g_monitordb.getRdb () ) return RDB_MONITORDB;
if ( rdb == g_syncdb.getRdb () ) return RDB_SYNCDB;
if ( rdb == g_revdb.getRdb () ) return RDB_REVDB;
//if ( rdb == g_sitedb.getRdb () ) return RDB_SITEDB;
//if ( rdb == g_tagdb2.getRdb () ) return RDB2_SITEDB2;
if ( rdb == g_catdb.getRdb () ) return RDB_CATDB;
if ( rdb == g_indexdb2.getRdb () ) return RDB2_INDEXDB2;
if ( rdb == g_posdb2.getRdb () ) return RDB2_POSDB2;
//if ( rdb == g_datedb2.getRdb () ) return RDB2_DATEDB2;
if ( rdb == g_tagdb2.getRdb () ) return RDB2_TAGDB2;
if ( rdb == g_titledb2.getRdb () ) return RDB2_TITLEDB2;
if ( rdb == g_sectiondb2.getRdb () ) return RDB2_SECTIONDB2;
if ( rdb == g_placedb2.getRdb () ) return RDB2_PLACEDB2;
//if ( rdb == g_checksumdb2.getRdb() ) return RDB2_CHECKSUMDB2;
if ( rdb == g_spiderdb2.getRdb () ) return RDB2_SPIDERDB2;
//if ( rdb == g_tfndb2.getRdb () ) return RDB2_TFNDB2;
if ( rdb == g_clusterdb2.getRdb () ) return RDB2_CLUSTERDB2;
//if ( rdb == g_statsdb2.getRdb () ) return RDB2_STATSDB2;
if ( rdb == g_linkdb2.getRdb () ) return RDB2_LINKDB2;
if ( rdb == g_revdb2.getRdb () ) return RDB2_REVDB2;
// if ( rdb == g_userdb.getRdb () ) return 7;
log(LOG_LOGIC,"db: getIdFromRdb: no rdbId for %s.",rdb->m_dbname);
return 0;
}
char isSecondaryRdb ( uint8_t rdbId ) {
switch ( rdbId ) {
//case RDB2_SITEDB2 : return true;
case RDB2_CATDB2 : return true;
case RDB2_INDEXDB2 : return true;
case RDB2_POSDB2 : return true;
//case RDB2_DATEDB2 : return true;
case RDB2_TAGDB2 : return true;
case RDB2_TITLEDB2 : return true;
case RDB2_SECTIONDB2 : return true;
case RDB2_PLACEDB2 : return true;
//case RDB2_CHECKSUMDB2: return true;
case RDB2_SPIDERDB2 : return true;
case RDB2_TFNDB2 : return true;
case RDB2_CLUSTERDB2 : return true;
case RDB2_REVDB2 : return true;
//case RDB2_STATSDB2 : return true;
case RDB2_LINKDB2 : return true;
}
return false;
}
// use a quick table now...
char getKeySizeFromRdbId ( uint8_t rdbId ) {
static bool s_flag = true;
static char s_table1[50];
if ( s_flag ) {
// only stock the table once
s_flag = false;
// sanity check. do not breach s_table1[]!
if ( RDB_END >= 50 ) { char *xx=NULL;*xx=0; }
// . loop over all possible rdbIds
// . RDB_NONE is 0!
for ( int32_t i = 1 ; i < RDB_END ; i++ ) {
// assume 12
int32_t ks = 12;
// only these are 16 as of now
if ( //i == RDB_DATEDB ||
i == RDB_SPIDERDB ||
i == RDB_TAGDB ||
i == RDB_SYNCDB ||
i == RDB_SECTIONDB ||
i == RDB_PLACEDB ||
//i == RDB2_DATEDB2 ||
i == RDB2_SPIDERDB2 ||
i == RDB2_TAGDB2 ||
i == RDB2_SECTIONDB2 ||
i == RDB2_PLACEDB2 )
ks = 16;
if ( i == RDB_POSDB || i == RDB2_POSDB2 )
ks = sizeof(key144_t);
if ( i == RDB_LINKDB || i == RDB2_LINKDB2 )
ks = sizeof(key224_t);
// set the table
s_table1[i] = ks;
}
}
// sanity check
if ( s_table1[rdbId] == 0 ) {
log("rdb: bad lookup rdbid of %i",(int)rdbId);
char *xx=NULL;*xx=0;
}
return s_table1[rdbId];
}
// returns -1 if dataSize is variable
int32_t getDataSizeFromRdbId ( uint8_t rdbId ) {
static bool s_flag = true;
static int32_t s_table2[80];
if ( s_flag ) {
// only stock the table once
s_flag = false;
// sanity check
if ( RDB_END >= 80 ) { char *xx=NULL;*xx=0; }
// loop over all possible rdbIds
for ( int32_t i = 1 ; i < RDB_END ; i++ ) {
// assume none
int32_t ds = 0;
// only these are 16 as of now
if ( i == RDB_POSDB ||
i == RDB_INDEXDB ||
i == RDB_TFNDB ||
i == RDB_CLUSTERDB ||
i == RDB_DATEDB ||
//i == RDB_FAKEDB ||
i == RDB_LINKDB )
ds = 0;
else if ( i == RDB_TITLEDB ||
i == RDB_REVDB ||
i == RDB_SYNCDB ||
i == RDB_CACHEDB ||
i == RDB_SERPDB ||
i == RDB_MONITORDB ||
i == RDB_TAGDB ||
i == RDB_PARMDB ||
i == RDB_SPIDERDB ||
i == RDB_DOLEDB ||
i == RDB_CATDB ||
i == RDB_PLACEDB )
ds = -1;
else if ( i == RDB_STATSDB )
ds = sizeof(StatData);
else if ( i == RDB_SECTIONDB )
ds = sizeof(SectionVote);
else if ( i == RDB2_POSDB2 ||
i == RDB2_INDEXDB2 ||
i == RDB2_TFNDB2 ||
i == RDB2_CLUSTERDB2 ||
i == RDB2_LINKDB2 ||
i == RDB2_DATEDB2 )
ds = 0;
else if ( i == RDB2_TITLEDB2 ||
i == RDB2_REVDB2 ||
i == RDB2_TAGDB2 ||
i == RDB2_CATDB2 ||
i == RDB2_SPIDERDB2 ||
i == RDB2_PLACEDB2 )
ds = -1;
else if ( i == RDB2_SECTIONDB2 )
ds = sizeof(SectionVote);
else { char *xx=NULL;*xx=0; }
// get the rdb for this rdbId
//Rdb *rdb = getRdbFromId ( i );
// sanity check
//if ( ! rdb ) continue;//{ char *xx=NULL;*xx=0; }
// sanity!
//if ( rdb->m_ks == 0 ) { char *xx=NULL;*xx=0; }
// set the table
s_table2[i] = ds;//rdb->m_fixedDataSize;
}
}
return s_table2[rdbId];
}
// get the dbname
char *getDbnameFromId ( uint8_t rdbId ) {
Rdb *rdb = getRdbFromId ( rdbId );
if ( rdb ) return rdb->m_dbname;
log(LOG_LOGIC,"db: rdbId of %"INT32" is invalid.",(int32_t)rdbId);
return "INVALID";
}
// get the RdbBase class for an rdbId and collection name
RdbBase *getRdbBase ( uint8_t rdbId , char *coll ) {
Rdb *rdb = getRdbFromId ( rdbId );
if ( ! rdb ) {
log("db: Collection \"%s\" does not exist.",coll);
return NULL;
}
// catdb is a special case
collnum_t collnum ;
if ( rdb->m_isCollectionLess )
collnum = (collnum_t) 0;
else
collnum = g_collectiondb.getCollnum ( coll );
if(collnum == -1) return NULL;
//return rdb->m_bases [ collnum ];
return rdb->getBase(collnum);
}
// get the RdbBase class for an rdbId and collection name
RdbBase *getRdbBase ( uint8_t rdbId , collnum_t collnum ) {
Rdb *rdb = getRdbFromId ( rdbId );
if ( ! rdb ) {
log("db: Collection #%"INT32" does not exist.",(int32_t)collnum);
return NULL;
}
if ( rdb->m_isCollectionLess ) collnum = (collnum_t) 0;
return rdb->getBase(collnum);
}
// get group responsible for holding record with this key
/*
RdbCache *getCache ( uint8_t rdbId ) {
if ( rdbId == RDB_INDEXDB )
return g_indexdb.getRdb()->getCache();
if ( rdbId == RDB_DATEDB )
return g_datedb.getRdb()->getCache();
if ( rdbId == RDB_TITLEDB)
return g_titledb.getRdb()->getCache();
if ( rdbId == RDB_SECTIONDB)
return g_sectiondb.getRdb()->getCache();
if ( rdbId == RDB_PLACEDB)
return g_placedb.getRdb()->getCache();
//if ( rdbId == RDB_CHECKSUMDB)
// return g_checksumdb.getRdb()->getCache();
if ( rdbId == RDB_SPIDERDB )
return g_spiderdb.getRdb()->getCache();
if ( rdbId == RDB_DOLEDB )
return g_doledb.getRdb()->getCache();
if ( rdbId == RDB_TFNDB )
return g_tfndb.getRdb()->getCache();
if ( rdbId == RDB_CLUSTERDB )
return g_clusterdb.getRdb()->getCache();
if ( rdbId == RDB_STATSDB )
return g_statsdb.getRdb()->getCache();
if ( rdbId == RDB_LINKDB )
return g_linkdb.getRdb()->getCache();
return NULL;
}
*/
// calls addList above
bool Rdb::addList ( char *coll , RdbList *list, int32_t niceness ) {
// catdb has no collection per se
if ( m_isCollectionLess )
return addList ((collnum_t)0,list,niceness);
collnum_t collnum = g_collectiondb.getCollnum ( coll );
if ( collnum < (collnum_t) 0 ) {
g_errno = ENOCOLLREC;
return log("db: Could not add list because collection \"%s\" "
"does not exist.",coll);
}
return addList ( collnum , list, niceness );
}
//bool Rdb::addRecord ( char *coll , key_t &key, char *data, int32_t dataSize ) {
bool Rdb::addRecord ( char *coll , char *key, char *data, int32_t dataSize,
int32_t niceness) {
// catdb has no collection per se
if ( m_isCollectionLess )
return addRecord ((collnum_t)0,
key,data,dataSize,
niceness);
collnum_t collnum = g_collectiondb.getCollnum ( coll );
if ( collnum < (collnum_t) 0 ) {
g_errno = ENOCOLLREC;
return log("db: Could not add rec because collection \"%s\" "
"does not exist.",coll);
}
return addRecord ( collnum , key , data , dataSize,niceness );
}
int32_t Rdb::getNumUsedNodes ( ) {
if(m_useTree) return m_tree.getNumUsedNodes();
return m_buckets.getNumKeys();
}
int32_t Rdb::getMaxTreeMem() {
if(m_useTree) return m_tree.getMaxMem();
return m_buckets.getMaxMem();
}
int32_t Rdb::getNumNegativeKeys() {
if(m_useTree) return m_tree.getNumNegativeKeys();
return m_buckets.getNumNegativeKeys();
}
int32_t Rdb::getTreeMemOccupied() {
if(m_useTree) return m_tree.getMemOccupied();
return m_buckets.getMemOccupied();
}
int32_t Rdb::getTreeMemAlloced () {
if(m_useTree) return m_tree.getMemAlloced();
return m_buckets.getMemAlloced();
}
void Rdb::disableWrites () {
if(m_useTree) m_tree.disableWrites();
else m_buckets.disableWrites();
}
void Rdb::enableWrites () {
if(m_useTree) m_tree.enableWrites();
else m_buckets.enableWrites();
}
bool Rdb::isWritable ( ) {
if(m_useTree) return m_tree.m_isWritable;
return m_buckets.m_isWritable;
}
bool Rdb::needsSave() {
if(m_useTree) return m_tree.m_needsSave;
else return m_buckets.needsSave();
}
// if we are doledb, we are a tree-only rdb, so try to reclaim
// memory from deleted nodes. works by condensing the used memory.
// returns how much we reclaimed.
int32_t Rdb::reclaimMemFromDeletedTreeNodes( int32_t niceness ) {
log("rdb: reclaiming tree mem for doledb");
// this only works for non-dumped RdbMem right now, i.e. doledb only
if ( m_rdbId != RDB_DOLEDB ) { char *xx=NULL;*xx=0; }
// start scanning the mem pool
char *p = m_mem.m_mem;
char *pend = m_mem.m_ptr1;
char *dst = p;
int32_t inUseOld = pend - p;
char *pstart = p;
int32_t marked = 0;
int32_t occupied = 0;
HashTableX ht;
if (!ht.set ( 4,
4,
m_tree.m_numUsedNodes*2,
NULL , 0 ,
false ,
niceness ,
"trectbl",
true )) // useMagic? yes..
return -1;
int32_t dups = 0;
// mark the data of unoccupied nodes somehow
int32_t nn = m_tree.m_minUnusedNode;
for ( int32_t i = 0 ; i < nn ; i++ ) {
//QUICKPOLL ( niceness );
// skip empty nodes in tree
if ( m_tree.m_parents[i] == -2 ) {marked++; continue; }
// get data ptr
char *data = m_tree.m_data[i];
// and key ptr, if negative skip it
//char *key = m_tree.getKey(i);
//if ( (key[0] & 0x01) == 0x00 ) { occupied++; continue; }
// sanity, ensure legit
if ( data < pstart ) { char *xx=NULL;*xx=0; }
// offset
int32_t doff = (int32_t)(data - pstart);
// a dup? sanity check
if ( ht.isInTable ( &doff ) ) {
int32_t *vp = (int32_t *) ht.getValue ( &doff );
log("rdb: reclaim got dup oldi=0x%"PTRFMT" "
"newi=%"INT32" dataoff=%"INT32"."
,(PTRTYPE)vp,i,doff);
//while ( 1 == 1 ) sleep(1);
dups++;
continue;
}
// indicate it is legit
int32_t val = i;
ht.addKey ( &doff , &val );
occupied++;
}
if ( occupied + dups != m_tree.getNumUsedNodes() )
log("rdb: reclaim mismatch1");
if ( ht.getNumSlotsUsed() + dups != m_tree.m_numUsedNodes )
log("rdb: reclaim mismatch2");
int32_t skipped = 0;
// the spider requests should be linear in there. so we can scan
// them. then put their offset into a map that maps it to the new
// offset after doing the memmove().
for ( ; p < pend ; ) {
//QUICKPOLL ( niceness );
SpiderRequest *sreq = (SpiderRequest *)p;
int32_t oldOffset = p - pstart;
int32_t recSize = sreq->getRecSize();
// negative key? this shouldn't happen
if ( (sreq->m_key.n0 & 0x01) == 0x00 ) {
log("rdb: reclaim got negative doldb key in scan");
p += sizeof(DOLEDBKEY);
skipped++;
continue;
}
// if not in hash table it was deleted from tree i guess
if ( ! ht.isInTable ( &oldOffset ) ) {
p += recSize;
skipped++;
continue;
}
//
//// re -add with the proper value now
//
// otherwise, copy it over if still in tree
gbmemcpy ( dst , p , recSize );
int32_t newOffset = dst - pstart;
// store in map, overwrite old value of 1
ht.addKey ( &oldOffset , &newOffset );
dst += recSize;
p += recSize;
}
//if ( skipped != marked ) { char *xx=NULL;*xx=0; }
// sanity -- this breaks us. i tried taking the quickpolls out to stop
// if(ht.getNumSlotsUsed()!=m_tree.m_numUsedNodes){
// log("rdb: %"INT32" != %"INT32
// ,ht.getNumSlotsUsed()
// ,m_tree.m_numUsedNodes
// );
// while(1==1)sleep(1);
// char *xx=NULL;*xx=0;
// }
int32_t inUseNew = dst - pstart;
// update mem class as well
m_mem.m_ptr1 = dst;
// how much did we reclaim
int32_t reclaimed = inUseOld - inUseNew;
if ( reclaimed < 0 ) { char *xx=NULL;*xx=0; }
//if ( reclaimed == 0 && marked ) { char *xx=NULL;*xx=0;}
// now update data ptrs in the tree, m_data[]
for ( int i = 0 ; i < nn ; i++ ) {
//QUICKPOLL ( niceness );
// skip empty nodes in tree
if ( m_tree.m_parents[i] == -2 ) continue;
// update the data otherwise
char *data = m_tree.m_data[i];
// sanity, ensure legit
if ( data < pstart ) { char *xx=NULL;*xx=0; }
int32_t offset = data - pstart;
int32_t *newOffsetPtr = (int32_t *)ht.getValue ( &offset );
if ( ! newOffsetPtr ) { char *xx=NULL;*xx=0; }
char *newData = pstart + *newOffsetPtr;
m_tree.m_data[i] = newData;
}
log("rdb: reclaimed %"INT32" bytes after scanning %"INT32" "
"undeleted nodes and %"INT32" deleted nodes for doledb"
,reclaimed,nn,marked);
// return # of bytes of mem we reclaimed
return reclaimed;
}