open-source-search-engine/RdbCache.cpp

1877 lines
58 KiB
C++
Raw Normal View History

2013-08-03 00:12:24 +04:00
#include "gb-include.h"
#undef _XOPEN_SOURCE
#define _XOPEN_SOURCE 500
#include <unistd.h>
#include "Threads.h"
#include "RdbCache.h"
#include "Collectiondb.h"
#include "Loop.h"
#include "Msg17.h"
//#include "Dns.h" // g_dns
//#include "Msg36.h" // g_qtable
#include "Msg13.h"
//#include "Msg10.h" // g_deadWaitCache
#include "Dns.h"
#include "BigFile.h"
bool g_cacheWritesEnabled = true;
RdbCache::RdbCache () {
m_totalBufSize = 0;
m_numBufs = 0;
m_ptrs = NULL;
m_numPtrsMax = 0;
reset();
m_needsSave = false;
m_corruptionDetected = false;
}
RdbCache::~RdbCache ( ) { reset (); }
#define BUFSIZE (128*1024*1024)
//#define BUFSIZE (100000)
//#define BUFSIZE (200000)
void RdbCache::reset ( ) {
//if ( m_numBufs > 0 )
// log("db: resetting record cache");
m_offset = 0;
m_tail = 0;
2014-11-11 01:45:11 +03:00
for ( int32_t i = 0 ; i < m_numBufs ; i++ )
2013-08-03 00:12:24 +04:00
// all bufs, but not necessarily last, are BUFSIZE bytes big
mfree ( m_bufs[i] , m_bufSizes[i] , "RdbCache" );
m_numBufs = 0;
m_totalBufSize= 0;
if ( m_ptrs ) mfree ( m_ptrs , m_numPtrsMax*sizeof(char *),"RdbCache");
m_ptrs = NULL;
m_numPtrsUsed = 0;
// can't reset this, breaks the load!
//m_numPtrsMax = 0;
m_memOccupied = 0;
m_memAlloced = 0;
m_numHits = 0;
m_numMisses = 0;
//m_wrapped = false;
m_adds = 0;
m_deletes = 0;
// assume no need to call convertCache()
m_convert = false;
m_isSaving = false;
}
2014-11-11 01:45:11 +03:00
bool RdbCache::init ( int32_t maxMem ,
int32_t fixedDataSize ,
2013-08-03 00:12:24 +04:00
bool supportLists ,
2014-11-11 01:45:11 +03:00
int32_t maxRecs ,
2013-08-03 00:12:24 +04:00
bool useHalfKeys ,
char *dbname ,
bool loadFromDisk ,
char cacheKeySize ,
char dataKeySize ,
2014-11-11 01:45:11 +03:00
int32_t numPtrsMax ) {
2013-08-03 00:12:24 +04:00
// reset all
reset();
// watch out
if ( maxMem < 0 ) return log(LOG_LOGIC,"db: cache for %s had "
"negative maxMem." , dbname);
if ( maxRecs < 0 ) return log(LOG_LOGIC,"db: cache for %s had "
"negative maxRecs.", dbname);
// don't use more mem than this
m_maxMem = maxMem;
m_maxColls = (1LL << (sizeof(collnum_t)*8));
2013-08-03 00:12:24 +04:00
RdbCache *robots = Msg13::getHttpCacheRobots();
RdbCache *others = Msg13::getHttpCacheOthers();
// do not use some cache if we are the tmp cluster
RdbCache *th = NULL;
if ( g_hostdb.m_useTmpCluster ) th = this;
//if ( th == &g_genericCache[SITEQUALITY_CACHEID] ) maxMem = 0;
if ( th == g_dns.getCache () ) maxMem = 0;
if ( th == g_dns.getCacheLocal () ) maxMem = 0;
if ( th == robots ) maxMem = 0;
if ( th == others ) maxMem = 0;
//if ( th == &g_forcedCache ) maxMem = 0;
//if ( th == &g_alreadyAddedCache ) maxMem = 0;
// this is the fixed dataSize of all records in a list, not the
// fixed dataSize of a list itself. Note that.
m_fixedDataSize = fixedDataSize;
m_supportLists = supportLists;
m_useHalfKeys = useHalfKeys;
m_useDisk = loadFromDisk;
m_dbname = dbname;
m_dks = dataKeySize;
m_cks = cacheKeySize;
// if maxMem is zero just return true
if ( m_maxMem <= 0 ) return true;
// assume no need to call convertCache()
m_convert = false;
// . double what they had so hash table is somewhat sparse
// . ::load() uses this so set it here
m_numPtrsMax = maxRecs * 2;
// it might have been provided though, too
if ( numPtrsMax > 0 ) m_numPtrsMax = numPtrsMax;
// try loading from disk before anything else
if ( m_useDisk ) {
if ( load ( m_dbname ) ) return true;
//log("RdbCache::init: cache load failed");
g_errno = 0;
}
// . make our hash table, zero it out
// . don't allow it more than 50% full for performance
m_threshold = maxRecs; // (maxRecs * 50 ) / 100;
if ( m_threshold == m_numPtrsMax ) m_threshold--;
char ttt[128];
sprintf(ttt,"cptrs-%s",m_dbname);
m_ptrs = (char **) mcalloc (sizeof(char *)*m_numPtrsMax , ttt );
if ( ! m_ptrs ) return log("RdbCache::init: %s", mstrerror(g_errno));
// debug testing -- remove later
2014-11-11 01:45:11 +03:00
//m_crcs=(int32_t *)mcalloc(4*m_numPtrsMax,"RdbCache");
2013-08-03 00:12:24 +04:00
//if (!m_crcs)return log("RdbCache::init: %s", mstrerror(g_errno));
// update OUR mem alloced
m_memAlloced = m_numPtrsMax * sizeof(char *);
// include this
m_memOccupied = 0; // m_memOccupied = m_memAlloced;
sprintf(ttt,"cbuf-%s",m_dbname);
// . make the 128MB buffers
// . if we do more than 128MB per buf then pthread_create() will fail
2014-11-11 01:45:11 +03:00
int32_t bufMem = m_maxMem - m_memAlloced;
if( bufMem <= 0 ) {
log("rdbcache: cache for %s does not have enough mem. fix "
"by increasing maxmem or number of recs, etc.",m_dbname);
char *xx=NULL;*xx=0;
}
if ( bufMem && m_fixedDataSize > 0 &&
bufMem / m_fixedDataSize < maxRecs / 2 ) {
log("cache: warning. "
"cache for %s can have %i ptrs but buf mem "
"can only hold %i objects"
,m_dbname
,(int)maxRecs
,(int)(bufMem/m_fixedDataSize));
}
2013-08-03 00:12:24 +04:00
m_totalBufSize = 0LL;
m_offset = 0LL;
while ( bufMem > 0 && m_numBufs < 32 ) {
2014-11-11 01:45:11 +03:00
int32_t size = bufMem;
2013-08-03 00:12:24 +04:00
if ( size > BUFSIZE ) size = BUFSIZE;
m_bufSizes [ m_numBufs ] = size;
m_bufs [ m_numBufs ] = (char *)mcalloc(size,ttt);
//m_bufEnds [ m_numBufs ] = NULL;
if ( ! m_bufs [ m_numBufs ] ) {
reset();
2014-11-11 01:45:11 +03:00
return log("db: Could not allocate %"INT32" bytes for "
2013-08-03 00:12:24 +04:00
"cache for %s.",size,dbname);
}
m_numBufs++;
bufMem -= size;
m_memAlloced += size;
m_totalBufSize += size;
}
// now fill ourselves up
if ( m_convert )
convertCache ( m_convertNumPtrsMax , m_convertMaxMem );
return true;
}
2014-11-11 01:45:11 +03:00
//bool RdbCache::isInCache ( collnum_t collnum, key_t cacheKey, int32_t maxAge ) {
bool RdbCache::isInCache ( collnum_t collnum, char *cacheKey, int32_t maxAge ) {
2013-08-03 00:12:24 +04:00
// maxAge of 0 means don't check cache
if ( maxAge == 0 ) return false;
// bail if no cache
if ( m_numPtrsMax <= 0 ) return false;
// look up in hash table
2014-11-11 01:45:11 +03:00
//int32_t n=(cacheKey.n0 + (uint64_t)cacheKey.n1)% m_numPtrsMax;
int32_t n = hash32 ( cacheKey , m_cks ) % m_numPtrsMax;
2013-08-03 00:12:24 +04:00
// chain
while ( m_ptrs[n] &&
( *(collnum_t *)(m_ptrs[n]+0 ) != collnum ||
//*(key_t *)(m_ptrs[n]+sizeof(collnum_t)) != cacheKey ) )
KEYCMP(m_ptrs[n]+sizeof(collnum_t),cacheKey,m_cks) != 0 ) )
if ( ++n >= m_numPtrsMax ) n = 0;
// return false if not found
if ( ! m_ptrs[n] ) return false;
// get timestamp
char *p = m_ptrs[n];
// skip over collnum_t and key
//p += sizeof(collnum_t) + sizeof(key_t);
p += sizeof(collnum_t) + m_cks;
// get time stamp
2014-11-11 01:45:11 +03:00
int32_t timestamp = *(int32_t *)p;
2013-08-03 00:12:24 +04:00
// return false if too old
if ( maxAge > 0 && getTimeLocal() - timestamp > maxAge ) return false;
// return true if found
return true;
}
// . a quick hack for SpiderCache.cpp
2014-11-11 01:45:11 +03:00
// . if your record is always a 4 byte int32_t call this
2013-08-03 00:12:24 +04:00
// . returns -1 if not found, so don't store -1 in there then
2014-10-30 22:36:39 +03:00
int64_t RdbCache::getLongLong ( collnum_t collnum ,
2014-11-11 01:45:11 +03:00
uint32_t key , int32_t maxAge ,
2013-08-03 00:12:24 +04:00
bool promoteRecord ) {
char *rec;
2014-11-11 01:45:11 +03:00
int32_t recSize;
2013-08-03 00:12:24 +04:00
key_t k;
k.n0 = 0;
k.n1 = (uint64_t)key;
2013-08-03 00:12:24 +04:00
// sanity check
//if ( m_cks != 4 ) { char *xx = NULL; *xx = 0; }
// return -1 if not found
if ( ! getRecord ( collnum ,
//k ,
//(char *)&key ,
(char *)&k,
&rec ,
&recSize ,
false ,
maxAge , // in seconds, -1 means none
true , // incCounts?
NULL , // cacheTime ptr
promoteRecord ) )
return -1LL;
if ( recSize != 8 ) {
2014-11-11 01:45:11 +03:00
log(LOG_LOGIC,"db: cache: Bad engineer. RecSize = %"INT32".",
2013-08-03 00:12:24 +04:00
recSize);
return -1LL;
}
// otherwise, it was found and the right length, so return it
2014-10-30 22:36:39 +03:00
return *(int64_t *)rec;
2013-08-03 00:12:24 +04:00
}
2014-10-30 22:36:39 +03:00
// both key and returned value are int64_ts for this
int64_t RdbCache::getLongLong2 ( collnum_t collnum ,
2014-11-11 01:45:11 +03:00
uint64_t key , int32_t maxAge ,
2013-08-03 00:12:24 +04:00
bool promoteRecord ) {
char *rec;
2014-11-11 01:45:11 +03:00
int32_t recSize;
2013-08-03 00:12:24 +04:00
key_t k;
k.n0 = (uint64_t)key;
2013-08-03 00:12:24 +04:00
k.n1 = 0;
// sanity check
if ( m_cks != 8 ) { char *xx = NULL; *xx = 0; }
if ( m_dks != 0 ) { char *xx = NULL; *xx = 0; }
// return -1 if not found
if ( ! getRecord ( collnum ,
(char *)&k,
&rec ,
&recSize ,
false ,
maxAge , // in seconds, -1 means none
true , // incCounts?
NULL , // cacheTime ptr
promoteRecord ) )
return -1LL;
if ( recSize != 8 ) {
2014-11-11 01:45:11 +03:00
log(LOG_LOGIC,"db: cache: Bad engineer. RecSize = %"INT32".",
2013-08-03 00:12:24 +04:00
recSize);
return -1LL;
}
// otherwise, it was found and the right length, so return it
2014-10-30 22:36:39 +03:00
return *(int64_t *)rec;
2013-08-03 00:12:24 +04:00
}
2014-11-11 01:45:11 +03:00
// this puts a int32_t in there
2013-08-03 00:12:24 +04:00
void RdbCache::addLongLong2 ( collnum_t collnum ,
2014-10-30 22:36:39 +03:00
uint64_t key , int64_t value ,
2013-08-03 00:12:24 +04:00
char **retRecPtr ) {
key_t k;
k.n0 = (uint64_t)key;
2013-08-03 00:12:24 +04:00
k.n1 = 0;
// sanity check
if ( m_cks != 8 ) { char *xx = NULL; *xx = 0; }
if ( m_dks != 0 ) { char *xx = NULL; *xx = 0; }
addRecord ( collnum , (char *)&k , NULL , 0 , (char *)&value , 8 ,
0 , // timestamp=now
retRecPtr );
// clear error in case addRecord set it
g_errno = 0;
}
2014-11-11 01:45:11 +03:00
// this puts a int32_t in there
2013-08-03 00:12:24 +04:00
void RdbCache::addLongLong ( collnum_t collnum ,
2014-11-11 01:45:11 +03:00
uint32_t key , int64_t value ,
2013-08-03 00:12:24 +04:00
char **retRecPtr ) {
key_t k;
k.n0 = 0;
k.n1 = (uint64_t)key;
2013-08-03 00:12:24 +04:00
// sanity check
//if ( m_cks != 4 ) { char *xx = NULL; *xx = 0; }
// sanity check
2014-11-11 01:45:11 +03:00
if ( m_cks > (int32_t)sizeof(key_t) ) { char *xx = NULL; *xx = 0; }
//if ( m_dks != 0 ) { char *xx = NULL; *xx = 0; }
2013-08-03 00:12:24 +04:00
//addRecord ( collnum , k , NULL , 0 , (char *)&value , 8 ,
//addRecord ( collnum , (char *)&key , NULL , 0 , (char *)&value , 8 ,
addRecord ( collnum , (char *)&k , NULL , 0 , (char *)&value , 8 ,
0 , // timestamp=now
retRecPtr );
// clear error in case addRecord set it
g_errno = 0;
}
2014-11-11 01:45:11 +03:00
int32_t RdbCache::getLong ( collnum_t collnum ,
uint64_t key , int32_t maxAge ,
2013-08-03 00:12:24 +04:00
bool promoteRecord ) {
char *rec;
2014-11-11 01:45:11 +03:00
int32_t recSize;
2013-08-03 00:12:24 +04:00
key_t k;
// TODO: fix this!?! k.n0 = key, k.n1 = 0?
k.n0 = 0;
k.n1 = key;
// return -1 if not found
if ( ! getRecord ( collnum ,
(char *)&k,
&rec ,
&recSize ,
false , // do copy?
2013-08-03 00:12:24 +04:00
maxAge , // in seconds, -1 means none
true , // incCounts?
NULL , // cacheTime ptr
promoteRecord ) )
return -1;
if ( recSize != 4 ) {
2014-11-11 01:45:11 +03:00
log(LOG_LOGIC,"db: cache: Bad engineer. RecSize = %"INT32".",
2013-08-03 00:12:24 +04:00
recSize);
return -1;
}
// otherwise, it was found and the right length, so return it
2014-11-11 01:45:11 +03:00
return *(int32_t *)rec;
2013-08-03 00:12:24 +04:00
}
2014-11-11 01:45:11 +03:00
// this puts a int32_t in there
2013-08-03 00:12:24 +04:00
void RdbCache::addLong ( collnum_t collnum ,
2014-11-11 01:45:11 +03:00
uint64_t key , int32_t value ,
2013-08-03 00:12:24 +04:00
char **retRecPtr ) {
key_t k;
k.n0 = 0;
k.n1 = key;
// sanity check
2014-11-11 01:45:11 +03:00
if ( m_cks > (int32_t)sizeof(key_t) ) { char *xx = NULL; *xx = 0; }
2014-11-15 04:30:32 +03:00
addRecord ( collnum , (char *)&k , NULL , 0 , (char *)&value ,
2014-12-01 18:45:59 +03:00
// by long we really mean 32 bits!
4,//sizeof(char *), // 4 , now 8 for 64 bit archs
2013-08-03 00:12:24 +04:00
0 , // timestamp=now
retRecPtr );
// clear error in case addRecord set it
g_errno = 0;
}
bool RdbCache::getRecord ( char *coll ,
//key_t cacheKey ,
char *cacheKey ,
char **rec ,
2014-11-11 01:45:11 +03:00
int32_t *recSize ,
2013-08-03 00:12:24 +04:00
bool doCopy ,
2014-11-11 01:45:11 +03:00
int32_t maxAge ,
2013-08-03 00:12:24 +04:00
bool incCounts ,
time_t *cachedTime ,
bool promoteRecord) {
collnum_t collnum = g_collectiondb.getCollnum ( coll );
if ( collnum < (collnum_t) 0 ) {
log("db: Could not get cache rec for collection \"%s\".",coll);
return false;
}
return getRecord ( collnum , cacheKey , rec , recSize , doCopy ,
maxAge , incCounts , cachedTime, promoteRecord );
}
// returns false if was not in the cache, true otherwise
bool RdbCache::setTimeStamp ( collnum_t collnum ,
char *cacheKey ,
2014-11-11 01:45:11 +03:00
int32_t newTimeStamp ) {
2013-08-03 00:12:24 +04:00
// return now if table empty
if ( m_numPtrsMax <= 0 ) return false;
// look up in hash table
2014-11-11 01:45:11 +03:00
int32_t n = hash32 ( cacheKey , m_cks ) % m_numPtrsMax;
2013-08-03 00:12:24 +04:00
// chain
while ( m_ptrs[n] &&
( *(collnum_t *)(m_ptrs[n]+0 ) != collnum ||
KEYCMP(m_ptrs[n]+sizeof(collnum_t),cacheKey,m_cks) != 0 ) )
if ( ++n >= m_numPtrsMax ) n = 0;
// return ptr to rec
char *p = m_ptrs[n];
// return false if not found
if ( ! p ) return false;
// skip over collnum and key
p += sizeof(collnum_t) + m_cks;
// set the timestamp
2014-11-11 01:45:11 +03:00
*(int32_t *)p = newTimeStamp;
2013-08-03 00:12:24 +04:00
return true;
}
// . returns true if found, false if not found in cache
// . sets *rec and *recSize iff found
bool RdbCache::getRecord ( collnum_t collnum ,
//key_t cacheKey ,
char *cacheKey ,
char **rec ,
2014-11-11 01:45:11 +03:00
int32_t *recSize ,
2013-08-03 00:12:24 +04:00
bool doCopy ,
2014-11-11 01:45:11 +03:00
int32_t maxAge ,
2013-08-03 00:12:24 +04:00
bool incCounts ,
time_t *cachedTime ,
bool promoteRecord ) {
// maxAge of 0 means don't check cache
if ( maxAge == 0 ) return false;
// bail if no cache
if ( m_numPtrsMax <= 0 ) return false;
// if init() called failed because of oom...
if ( ! m_ptrs )
return log("cache: getRecord: failed because oom");
2013-08-03 00:12:24 +04:00
// time it -- debug
2014-10-30 22:36:39 +03:00
int64_t t = 0LL ;
2013-08-03 00:12:24 +04:00
if ( g_conf.m_logTimingDb ) t = gettimeofdayInMillisecondsLocal();
// reset this
if ( cachedTime ) *cachedTime = 0;
// only do copy supported
//if ( ! doCopy )
// return log("RdbCache::getRecord: only doCopy supported");
// look up in hash table
2014-11-11 01:45:11 +03:00
//int32_t n =(cacheKey.n0 + (uint64_t)cacheKey.n1)%m_numPtrsMax;
int32_t n = hash32 ( cacheKey , m_cks ) % m_numPtrsMax;
//int32_t n = cacheKey.n0 % m_numPtrsMax;
2013-08-03 00:12:24 +04:00
// chain
while ( m_ptrs[n] &&
( *(collnum_t *)(m_ptrs[n]+0 ) != collnum ||
//*(key_t *)(m_ptrs[n]+sizeof(collnum_t)) != cacheKey ) )
KEYCMP(m_ptrs[n]+sizeof(collnum_t),cacheKey,m_cks) != 0 ) )
if ( ++n >= m_numPtrsMax ) n = 0;
//while ( m_ptrs[n] && *(key_t *)m_ptrs[n] != cacheKey )
// if ( ++n >= m_numPtrsMax ) n = 0;
// return false if not found
if ( ! m_ptrs[n] ) {
if ( incCounts ) m_numMisses++;
return false;
}
// return ptr to rec
char *p = m_ptrs[n];
// skip over collnum and key
//p += sizeof(collnum_t) + sizeof(key_t);
p += sizeof(collnum_t) + m_cks;
// skip over time stamp
2014-11-11 01:45:11 +03:00
int32_t timestamp = *(int32_t *)p;
2013-08-03 00:12:24 +04:00
if ( cachedTime ) *cachedTime = timestamp;
// return false if too old
if ( maxAge > 0 && getTimeLocal() - timestamp > maxAge ) {
// debug msg
// don't print for tagdb, however, spider prints it
// too much and i don't care about it
if ( m_dbname[0]!='s' || m_dbname[1]!='i' )
log(LOG_DEBUG,"db: Found rec in cache for %s, "
2014-11-11 01:45:11 +03:00
"but elapsed time of %"INT32" is greater "
"than %"INT32".",
m_dbname,
(int32_t)(getTimeLocal() - timestamp) ,
maxAge );
2013-08-03 00:12:24 +04:00
if ( incCounts ) m_numMisses++;
return false;
}
2014-11-15 04:30:32 +03:00
// skip timestamp
2013-08-03 00:12:24 +04:00
p += 4;
// store data size if our recs are var length or we cache lists of
// fixed length recs, and those lists need a dataSize
if ( m_fixedDataSize == -1 || m_supportLists ) {
2014-11-11 01:45:11 +03:00
*recSize = *(int32_t *)p; p += 4; }
2013-08-03 00:12:24 +04:00
else
*recSize = m_fixedDataSize;
// . debug testing -- remove later
// . get checksum
//char *s = m_ptrs[n];
//char *send = s + (p - s) + *recSize - 3;
2014-11-11 01:45:11 +03:00
//int32_t crc = 0;
//while ( s < send ) { crc += *(int32_t *)s; s += 4; }
2013-08-03 00:12:24 +04:00
//if ( crc != m_crcs[n] ) {
// log("BAD ENGINNEER. CRC MISMATCH.");
// char *pp = NULL;
// *pp = 1;
// sleep (10000);
//}
// set it for return
*rec = p;
// copy the data and set "list" with it iff "doCopy" is true
if ( doCopy && *recSize > 0 ) {
*rec = mdup ( p , *recSize , "RdbCache3" );
if ( ! *rec ) {
return log("db: Could not allocate space for "
2014-11-11 01:45:11 +03:00
"cached record for %s of %"INT32" bytes.",
2013-08-03 00:12:24 +04:00
m_dbname,*recSize);
}
}
RdbCache *robots = Msg13::getHttpCacheRobots();
RdbCache *others = Msg13::getHttpCacheOthers();
//
// now we only promote the record if it is near the delete head
// in order to avoid having massive duplicity. if it is with 10%
// of the delete head's space i guess.
// i do this for all caches now... what are the downsides? i forget.
//
bool check = false;
//if ( this == &g_genericCache[SITEQUALITY_CACHEID] ) check = true;
if ( this == g_dns.getCache () ) check = true;
if ( this == g_dns.getCacheLocal () ) check = true;
if ( this == robots ) check = true;
if ( this == others ) check = true;
//if ( this == &g_deadWaitCache ) check = true;
//if ( this == &g_forcedCache ) check = true;
//if ( this == &g_alreadyAddedCache ) check = true;
// this algo seems to have issues with really large recs
// because spaces.live.com list was 570k and the data was foobar
// so just don't do promotion on it ever...
//if ( this == &g_tagdb.m_listCache ) check = true;
// the exact count cache...
//if ( this == &g_qtable ) check = true;
if ( m_totalBufSize < 20000 ) check = false;
if ( check ) promoteRecord = false;
// sanity check, do not allow the site quality cache or dns cache to
// be > 128MB, that just does not make sense and it complicates things
if ( check && m_totalBufSize > BUFSIZE ) { char *xx = NULL; *xx = 0; }
// sanity check
if ( m_tail < 0 || m_tail > m_totalBufSize ) {
char *xx = NULL; *xx = 0; }
// get the window of promotion
2014-11-11 01:45:11 +03:00
int32_t tenPercent = (int32_t)(((float)m_totalBufSize) * .10);
2013-08-03 00:12:24 +04:00
char *start1 = m_bufs[0] + m_tail ;
char *end1 = start1 + tenPercent;
char *start2 = NULL;
char *end2 = NULL;
char *max = m_bufs[0] + m_totalBufSize;
if ( end1 > max ) {
start2 = m_bufs[0];
end2 = start2 + (end1 - max);
end1 = max;
}
// are we in 10% range? if so, promote to head of the ring buffer
// to avoid losing it in a delete operation
if ( check && *rec >= start1 && *rec <= end1 ) promoteRecord = true;
if ( check && *rec >= start2 && *rec <= end2 ) promoteRecord = true;
// debug
//if ( check )
// logf(LOG_DEBUG,
2014-11-11 01:45:11 +03:00
// "db: promote=%"UINT32" "
// "start1=%"UINT32" end1=%"UINT32" "
// "start2=%"UINT32" end2=%"UINT32" "
// "rec=%"UINT32" m_tail=%"UINT32" bufs[0]=%"UINT32" total=%"UINT32"",
// (int32_t)promoteRecord ,
// (int32_t)start1,(int32_t)end1,(int32_t)start2,(int32_t)end2,
// (int32_t)*rec,(int32_t)m_tail,(int32_t)m_bufs[0],m_totalBufSize);
2013-08-03 00:12:24 +04:00
// . now promote the record, same as adding (this always copies)
// . do this after mdup as there is a chance it will overwrite
// the original record with the copy of the same record
// . Process.cpp turns off g_cacheWritesEnabled while it saves them
if ( promoteRecord && ! m_isSaving && g_cacheWritesEnabled ) {
//char *ptr = m_ptrs[n];
//removeKey ( collnum , cacheKey , ptr );
//markDeletedRecord(ptr);
2014-11-11 01:45:11 +03:00
//int32_t n = hash32 ( cacheKey , m_cks ) % m_numPtrsMax;
2013-08-03 00:12:24 +04:00
//if ( this == &g_robotdb.m_rdbCache )
// logf(LOG_DEBUG, "db: cachebug: promoting record "
2014-11-11 01:45:11 +03:00
// "k.n0=0x%"XINT64" n=%"INT32"",((key_t *)cacheKey)->n0,
2013-08-03 00:12:24 +04:00
// *recSize);
char *retRec = NULL;
addRecord ( collnum , cacheKey , *rec , *recSize , timestamp ,
&retRec );
// update our rec, it might have been deleted then re-added
// and we have to be careful of that delimter clobbering
// memset() below
if ( ! doCopy ) *rec = retRec;
}
// keep track of cache stats if we should
if ( incCounts ) {
m_numHits++;
m_hitBytes += *recSize;
}
// debug msg time
if ( g_conf.m_logTimingDb )
2014-11-11 01:45:11 +03:00
log(LOG_TIMING,"db: cache: %s getRecord %"INT32" bytes took %"INT64" "
2013-08-03 00:12:24 +04:00
"ms.",m_dbname,*recSize,
gettimeofdayInMillisecondsLocal()-t);
// it was found, so return true
return true;
}
// . returns true if found,
// . returns false if not found or on error
// . sets errno on error
// . list's data may be empty if it's a cached not found
// . we use "endKey" so we know if the FULL list was needed or not
// . if "incCounts" is true and we hit we inc the hit count
// . if "incCounts" is true and we miss we inc the miss count
bool RdbCache::getList ( collnum_t collnum ,
//key_t cacheKey ,
//key_t startKey ,
char *cacheKey ,
char *startKey ,
RdbList *list ,
bool doCopy ,
2014-11-11 01:45:11 +03:00
int32_t maxAge ,
2013-08-03 00:12:24 +04:00
bool incCounts ) {
// reset the list
list->reset();
// maxAge of 0 means don't check cache
if ( maxAge == 0 ) return false;
// get pure record
2014-11-11 01:45:11 +03:00
int32_t recSize;
2013-08-03 00:12:24 +04:00
char *rec;
// return false right away if not found
if ( ! getRecord ( collnum ,
cacheKey ,
&rec ,
&recSize ,
doCopy ,
maxAge ,
incCounts ,
NULL ) ) return false;
// first 2 keys of bytes are the start and end keys
//key_t endKey = *(key_t *)rec;
//char *data = rec + sizeof(key_t);
2014-11-11 01:45:11 +03:00
//int32_t dataSize = recSize - sizeof(key_t);
2013-08-03 00:12:24 +04:00
char *endKey = rec;
char *data = rec + m_dks;
2014-11-11 01:45:11 +03:00
int32_t dataSize = recSize - m_dks;
2013-08-03 00:12:24 +04:00
// use NULL if empty
if ( dataSize == 0 ) data = NULL;
// how could this happen
if ( dataSize < 0 ) return log(LOG_LOGIC,"db: cache: getList: "
"Bad data size.");
// . set the list!
// . data is NULL if it's a cached not found (empty list)
list->set ( data ,
dataSize ,
rec , // alloc
recSize , // alloc size
startKey ,
endKey ,
m_fixedDataSize ,
doCopy , // ownData?
m_useHalfKeys ,
m_dks );
// sanity check
//bool ok = list->checkList_r ( false , true );
//if ( ! ok ) log("RDBCACHE::GETLIST had problem");
// break out
//if ( ! ok ) { char *xx = NULL; *xx = 0; }
return true;
}
// returns false and sets errno on error
//bool RdbCache::addList ( char *coll , key_t cacheKey , RdbList *list ) {
bool RdbCache::addList ( char *coll , char *cacheKey , RdbList *list ) {
collnum_t collnum = g_collectiondb.getCollnum ( coll );
if ( collnum < 0 ) {
log("query: Collection %s does not exist. Cache failed.",
coll);
return false;
}
return addList ( collnum , cacheKey , list );
}
// returns false and sets errno on error
//bool RdbCache::addList ( collnum_t collnum , key_t cacheKey , RdbList *list){
bool RdbCache::addList ( collnum_t collnum , char *cacheKey , RdbList *list ) {
// . sanity check
// . msg2 sometimes fails this check when it adds to the cache
if ( list->m_ks != m_dks ) {
//g_errno = EBADENGINEER;
2014-11-11 01:45:11 +03:00
return log("cache: key size %"INT32" != %"INT32"",
(int32_t)list->m_ks,(int32_t)m_dks);
2013-08-03 00:12:24 +04:00
//char *xx = NULL; *xx = 0; }
}
// store endkey then list data in the record data slot
//key_t k;
//k = list->getLastKey ();
char *k = list->getLastKey ();
// just to make sure
char *data = list->getList();
2014-11-11 01:45:11 +03:00
int32_t dataSize = list->getListSize();
2013-08-03 00:12:24 +04:00
if ( ! data ) dataSize = 0;
// . add as a record
// . key is combo of startKey/endKey
// . return false on error (and set errno), false otherwise
return addRecord ( collnum ,
cacheKey ,
//(char *)&k , sizeof(key_t) ,
k , m_dks ,
list->getList() , list->getListSize() ,
0 );
}
// . basically adding a list of only 1 record
// . used by dns/Dns.cpp to store ip's whose key is the hash of a hostname
// . "rec" may be a raw RdbList (format=key/recSize/rec) or may just be data
// . we do not copy "rec" so caller must malloc it
// . returns -1 on error and sets errno
// . returns node # in tree we added it to on success
bool RdbCache::addRecord ( collnum_t collnum ,
//key_t cacheKey ,
char *cacheKey ,
char *rec ,
2014-11-11 01:45:11 +03:00
int32_t recSize ,
int32_t timestamp ,
2013-08-03 00:12:24 +04:00
char **retRecPtr ) {
return addRecord (collnum, cacheKey, NULL, 0, rec, recSize, timestamp,
retRecPtr);
}
bool RdbCache::addRecord ( char *coll ,
//key_t cacheKey ,
char *cacheKey ,
char *rec ,
2014-11-11 01:45:11 +03:00
int32_t recSize ,
int32_t timestamp ) {
2013-08-03 00:12:24 +04:00
collnum_t collnum = g_collectiondb.getCollnum ( coll );
if ( collnum < (collnum_t) 0 ) {
log("db: Could not cache rec for collection \"%s\".",coll);
return false;
}
return addRecord (collnum, cacheKey, NULL, 0, rec, recSize, timestamp);
}
bool RdbCache::addRecord ( collnum_t collnum ,
//key_t cacheKey ,
char *cacheKey ,
char *rec1 ,
2014-11-11 01:45:11 +03:00
int32_t recSize1 ,
2013-08-03 00:12:24 +04:00
char *rec2 ,
2014-11-11 01:45:11 +03:00
int32_t recSize2 ,
int32_t timestamp ,
2013-08-03 00:12:24 +04:00
char **retRecPtr ) {
2014-10-30 22:36:39 +03:00
//int64_t startTime = gettimeofdayInMillisecondsLocal();
2013-08-03 00:12:24 +04:00
if ( collnum < (collnum_t)0) {char *xx=NULL;*xx=0; }
if ( collnum >= m_maxColls ) {char *xx=NULL;*xx=0; }
// full key not allowed because we use that in markDeletedRecord()
if ( KEYCMP(cacheKey,KEYMAX(),m_cks) == 0 ) { char *xx=NULL;*xx=0; }
2013-08-03 00:12:24 +04:00
// bail if cache empty
if ( m_totalBufSize <= 0 ) return true;
// debug msg
2014-10-30 22:36:39 +03:00
int64_t t = 0LL ;
2013-08-03 00:12:24 +04:00
if ( g_conf.m_logTimingDb ) t = gettimeofdayInMillisecondsLocal();
// need space for record data
2014-11-11 01:45:11 +03:00
int32_t need = recSize1 + recSize2;
2013-08-03 00:12:24 +04:00
// are we bad?
if (m_fixedDataSize>=0 && ! m_supportLists && need != m_fixedDataSize){
char *xx=NULL;*xx=0;
2014-11-11 01:45:11 +03:00
return log(LOG_LOGIC,"db: cache: addRecord: %"INT32" != %"INT32".",
2013-08-03 00:12:24 +04:00
need,m_fixedDataSize);
}
// don't allow 0 timestamps, those are special indicators
if ( timestamp == 0 ) timestamp = getTimeLocal();
//if ( timestamp == 0 && cacheKey.n0 == 0LL && cacheKey.n1 == 0 )
if ( timestamp == 0 && KEYCMP(cacheKey,KEYMIN(),m_cks)==0 )
return log(LOG_LOGIC,"db: cache: addRecord: Bad "
"key/timestamp.");
// bail if no writing ops allowed now
if ( ! g_cacheWritesEnabled ) return false;
if ( m_isSaving ) return false;
// collnum_t and cache key
//need += sizeof(collnum_t) + sizeof(key_t);
need += sizeof(collnum_t) + m_cks;
// timestamp
need += 4;
// . trailing 0 collnum_t, key and trailing time stamp
// . this DELIMETER tells us to go to the next buf
2014-11-18 05:13:36 +03:00
//need += sizeof(collnum_t) + sizeof(key_t) + 4 ; // timestamp
2013-08-03 00:12:24 +04:00
need += sizeof(collnum_t) + m_cks + 4 ;
// and size, if not fixed or we support lists
if ( m_fixedDataSize == -1 || m_supportLists ) need += 4;
// watch out
if ( need >= m_totalBufSize )
return log(LOG_INFO,
2014-11-11 01:45:11 +03:00
"db: Could not fit record of %"INT32" bytes into %s "
"cache. Max size is %"INT32".",need,m_dbname,
2013-08-03 00:12:24 +04:00
m_totalBufSize);
if ( need >= BUFSIZE )
return log(LOG_INFO,
2014-11-11 01:45:11 +03:00
"db: Could not fit record of %"INT32" bytes into %s "
2013-08-03 00:12:24 +04:00
"cache. Max size is %i.",need,m_dbname,BUFSIZE);
// if too many slots in hash table used free one up
while ( m_numPtrsUsed >= m_threshold )
if ( ! deleteRec() ) {
return false;
}
// . do NOT split across buffers, align on a boundary if we need to
// . "i1" is where we PLAN to store the record
2014-11-11 01:45:11 +03:00
int32_t i1 = m_offset;
int32_t bufNum1 = i1 / BUFSIZE;
2013-08-03 00:12:24 +04:00
// what buffer does the byte AFTER our last byte fall into?
2014-11-11 01:45:11 +03:00
int32_t i2 = m_offset + need;
int32_t bufNum2 = i2 / BUFSIZE;
2013-08-03 00:12:24 +04:00
// BUT if bufNum1 is the last buffer, it will most likely be SMALLER
// than "BUFSIZE" byts, so do a special check to see if "i2" falls
// outside of it!
if ( i2 >= m_totalBufSize ) bufNum2 = bufNum1 + 1;
// . "i1b" is offset of where we REALLY store the record
// . "i2b" is the offset of the byte after the last byte that we store
2014-11-11 01:45:11 +03:00
int32_t i1b = i1;
int32_t i2b = i2;
2013-08-03 00:12:24 +04:00
if ( bufNum1 != bufNum2 ) {
// advance to first byte of the next buffer if not enough room
// in bufNum1 to FULLY contain the record
i1b = bufNum2 * BUFSIZE;
i2b = i1b + need;
}
// . no, "i1c" is where we "really really" store it
// . and "i2c" is the offset of the byte after the last we store
2014-11-11 01:45:11 +03:00
int32_t i1c = i1b;
int32_t i2c = i2b;
2013-08-03 00:12:24 +04:00
if ( i2b >= m_totalBufSize ) {
// reset back to the very beginning...
i1c = 0;
i2c = i1c + need;
}
// . increase m_tail so it is NOT in the range: [i1,i2b)
// . NEVER do this if we are the first rec added though, because
// m_tail will equal i1 at that point...
while ( m_numPtrsUsed!=0 && m_tail>=i1 && m_tail<=i2 )
deleteRec();
while ( m_numPtrsUsed!=0 && m_tail>=i1b && m_tail<=i2b )
deleteRec();
while ( m_numPtrsUsed!=0 && m_tail>=i1c && m_tail<=i2c )
deleteRec();
// store rec at "start"
2014-11-11 01:45:11 +03:00
int32_t bufNumStart = i1c / BUFSIZE;
2013-08-03 00:12:24 +04:00
char *start = m_bufs[bufNumStart] + i1c % BUFSIZE;
// point to storage area
char *p = start;
// before we start writing over possible record data,
// if we are promoting a rec, "rec" may actually point
// somewhere into here, so be careful!
//if ( rec2 <= start && rec2+recSize2 > start ) { char*xx=NULL;*xx=0;}
//if ( start <= rec2 && start+32>= rec2 ) { char*xx=NULL;*xx=0;}
//if ( this == &g_robotdb.m_rdbCache )
2014-11-11 01:45:11 +03:00
// logf(LOG_DEBUG, "db: cachebug: adding rec k.n0=0x%"XINT64" rs=%"INT32" "
// "off=%"INT32" bufNum=%"INT32" ptr=0x%"XINT32" tail=%"INT32" numPtrs=%"INT32"",
2013-08-03 00:12:24 +04:00
// ((key_t *)cacheKey)->n0,recSize1+recSize2,
2014-11-11 01:45:11 +03:00
// i1c,bufNumStart,(int32_t)p,m_tail,m_numPtrsUsed);
2013-08-03 00:12:24 +04:00
// if we wiped out all recs then reset tail to m_offset
if ( m_numPtrsUsed == 0 ) {
//if ( this == &g_robotdb.m_rdbCache )
// log("db: cachebug: full tail reset. tail=0");
m_tail = 0;
}
// store collnum
*(collnum_t *)p = collnum; p += sizeof(collnum_t);
// store key
//*(key_t *)p = cacheKey; p += sizeof(key_t);
KEYSET(p,cacheKey,m_cks); p += m_cks;
// store timestamp
2014-11-11 01:45:11 +03:00
*(int32_t *)p = timestamp; p += 4;
2013-08-03 00:12:24 +04:00
// then dataSize if we need to
if ( m_fixedDataSize == -1 || m_supportLists ) {
2014-11-18 05:13:36 +03:00
*(int32_t *)p = recSize1+recSize2; p +=4; } //datasize
2013-08-03 00:12:24 +04:00
// sanity : check if the recSizes add up right
else if ( m_fixedDataSize != recSize1 + recSize2 ){
char *xx = NULL; *xx = 0; }
// save for returning
if ( retRecPtr ) *retRecPtr = p;
// sanity check
//if ( rec1 < p && rec1 + recSize1 > p ) { char*xx=NULL;*xx=0;}
//if ( rec2 < p && rec2 + recSize2 > p ) { char*xx=NULL;*xx=0;}
//if ( rec1 >= p && rec1 < p + need ) { char*xx=NULL;*xx=0;}
//if ( rec2 >= p && rec2 < p + need ) {
// log("cache: poop");}//char*xx=NULL;*xx=0;}
// then data
memcpy ( p , rec1 , recSize1 ); p += recSize1;
memcpy ( p , rec2 , recSize2 ); p += recSize2;
// . store 0 collnum, key AND timestamp at end of record --> delimeter
// . CAUTION: if doing a "promote" we can end up deleting the rec
// we are pointing to, then clobbering it with this memset!
//memset ( p , 0 , sizeof(collnum_t) + 16 /*key+timestamp*/);
memset ( p , 0 , sizeof(collnum_t) + m_cks + 4 /*key+timestamp*/);
// count the occupied memory, excluding the terminating 0 key
m_memOccupied += ( p - start );
// debug msg (MDW)
2014-11-11 01:45:11 +03:00
//log("cache: adding rec @ %"UINT32" size=%"INT32" tail=%"UINT32"",
2013-08-03 00:12:24 +04:00
// i1c,p-start,m_tail);
2014-11-11 01:45:11 +03:00
//log("cache: stored k.n1=%"UINT32" k.n0=%"UINT64" %"INT32" bytes @ %"UINT32" tail=%"UINT32"",
2013-08-03 00:12:24 +04:00
// ((key_t *)cacheKey)->n1,
// ((key_t *)cacheKey)->n0,p-start,i1c,m_tail);
//if ( m_cks == 4 )
2014-11-11 01:45:11 +03:00
// log("stored k=%"XINT32" %"INT32" bytes @ %"UINT32"",
// *(int32_t *)cacheKey,p-start,i);//(uint32_t)start);
2013-08-03 00:12:24 +04:00
// update offset, excluding the terminating 0 key
m_offset = i1c + ( p - start );
// . debug testing -- remove later
// . get the crc of the whole thing
//char *s = start;
//char *send = p - 3;
2014-11-11 01:45:11 +03:00
//int32_t crc = 0;
//while ( s < send ) { crc += *(int32_t *)s; s += 4; }
2013-08-03 00:12:24 +04:00
// . add to hash table
// . if we are already in there, preserve the
addKey ( collnum , cacheKey , start ); // , crc ); // debug test
// debug msg time
2014-11-11 01:45:11 +03:00
log(LOG_TIMING,"db: cache: %s addRecord %"INT32" bytes took %"INT64" "
"ms this=0x%"PTRFMT" key.n1=%"UINT32" n0=%"UINT64"",
2014-11-11 01:45:11 +03:00
m_dbname, (int32_t)(p - start) ,
2013-08-03 00:12:24 +04:00
gettimeofdayInMillisecondsLocal()-t,
(PTRTYPE)this,
2013-08-03 00:12:24 +04:00
((key_t *)(&cacheKey))->n1 ,
((key_t *)(&cacheKey))->n0 );
2014-11-11 01:45:11 +03:00
//log("%s addRecord %"INT32" bytes @ offset=%"INT32" k.n1=%"UINT32" n0=%"UINT64" "
// "TOOK %"INT64" ms" ,
2013-08-03 00:12:24 +04:00
// m_dbname , need , i ,
// cacheKey.n1 , cacheKey.n0 ,
m_adds++;
2014-10-30 22:36:39 +03:00
//int64_t now = gettimeofdayInMillisecondsLocal();
//int64_t took = now - startTime;
2013-08-03 00:12:24 +04:00
//if(took > 10)
2014-11-11 01:45:11 +03:00
// log(LOG_INFO, "admin: adding to RdbCache %s of %"INT32" bytes "
// "took %"INT64" ms.",m_dbname,recSize1+recSize2,took);
2013-08-03 00:12:24 +04:00
m_needsSave = true;
return true;
}
// delete the rec at m_tail from the hashtable
bool RdbCache::deleteRec ( ) {
// sanity.
if ( m_tail < 0 || m_tail >= m_totalBufSize ) {
2013-08-03 00:12:24 +04:00
char *xx = NULL; *xx = 0;}
// don't do anything if we're empty
// ...fix...we ned to make sure the head doesn't eat the tail, so
// don't ever skip this stuff
//if ( m_numPtrsUsed <= 0 ) return;
//if ( b == 36887 )
// log("hey");
// delete all recs in [a,b]
//if (b > m_totalBufSize) b = m_totalBufSize;
//while ( m_tail < b ) {
// get ptr from offset
2014-11-11 01:45:11 +03:00
int32_t bufNum = m_tail / BUFSIZE;
2013-08-03 00:12:24 +04:00
char *p = m_bufs[bufNum] + m_tail % BUFSIZE;
top:
// get ptr to where tail is currently
char *start = p;
// get collnum
collnum_t collnum = *(collnum_t *)p; p += sizeof(collnum_t);
// NSD: trying to find the error where removeKey() doesn't
// find the key even after going through all the records
// I think that the data here is corrupted or not pointed right
// . collnum can be 0 in case we have to go to next buffer
// . allow -1 collnum to exist, seems to happen in robots.txt cache
// sometimes, maybe for delete collnum... not sure, but the timestamp
// seems to be legit
if ( collnum >= m_maxColls || collnum < -1
2013-10-19 04:49:36 +04:00
// we now call ::reset(oldcollnum)
// when resetting a collection in
// Collectiondb::resetColl() which calls
// SpiderColl::clear() which calls
// lastDownloadTime.reset(oldcollnum)
// and then we nuke the collrec so it was
// triggering this. so check m_ptrs[i]==-1
//|| !g_collectiondb.m_recs[collnum]
) {
2013-08-03 00:12:24 +04:00
log (LOG_WARN,"db: cache: deleteRec: possible "
"corruption, start=%"PTRFMT" collNum=%"INT32" "
"maxCollNum=%"INT32" dbname=%s", (PTRTYPE)start,
2014-11-11 01:45:11 +03:00
(int32_t)collnum, g_collectiondb.m_numRecsUsed,
2013-08-03 00:12:24 +04:00
m_dbname);
char *xx=NULL;*xx=0;
2013-08-03 00:12:24 +04:00
// exception for gourav's bug (dbname=Users)
// i am tired of it craping out every 2-3 wks
//if ( m_dbname[0]=='U' ) return true;
2013-08-03 00:12:24 +04:00
// some records might have been deleted
m_needsSave = true;
// but its corrupt so don't save to disk
m_corruptionDetected = true;
//char *xx=NULL;*xx=0;
return false;
}
// get key
//key_t k = *(key_t *)p ; p += sizeof(key_t);
char *k = p ; p += m_cks;
// get time stamp
2014-11-11 01:45:11 +03:00
int32_t timestamp = *(int32_t *)p ; p += 4;
2013-08-03 00:12:24 +04:00
// a timestamp of 0 and 0 key, means go to next buffer
//if ( timestamp == 0 && k.n0 == 0LL && k.n1 == 0 ) {
if ( timestamp == 0 && KEYCMP(k,KEYMIN(),m_cks)==0 ) {
// if we wrap around back to first buffer then
// change the "wrapped" state to false. that means
2014-11-18 05:13:36 +03:00
// we are no longer directly in front of the write
2013-08-03 00:12:24 +04:00
// head, but behind him again.
if ( ++bufNum >= m_numBufs ) {
bufNum = 0;
//m_tail = 0;
//m_wrapped = false;
//return true; //continue;
}
// otherwise, point to the start of the next buffer
p = m_bufs[bufNum];
m_tail = bufNum * BUFSIZE;
// sanity
//if ( m_tail < 0 || m_tail > m_totalBufSize ) {
// char *xx = NULL; *xx = 0;}
//return true; // continue;
goto top;
}
// get data size
2014-11-11 01:45:11 +03:00
int32_t dataSize;
2013-08-03 00:12:24 +04:00
// get dataSize and data
if ( m_fixedDataSize == -1 || m_supportLists ) {
2014-11-11 01:45:11 +03:00
dataSize = *(int32_t *)p; p += 4; }
2013-08-03 00:12:24 +04:00
else
dataSize = m_fixedDataSize;
// sanity
if ( dataSize < 0 || dataSize > m_totalBufSize ){
char *xx = NULL; *xx = 0;
}
//if ( this == &g_robotdb.m_rdbCache )
2014-11-11 01:45:11 +03:00
// logf(LOG_DEBUG, "db: cachebug: removing k.n0=0x%"XINT64" "
// "tail=%"INT32" ds=%"INT32"", ((key_t *)k)->n0,m_tail,dataSize);
2013-08-03 00:12:24 +04:00
// debug msg (MDW)
2014-11-11 01:45:11 +03:00
//log("cache: deleting rec @ %"INT32" size=%"INT32"",m_tail,
2013-08-03 00:12:24 +04:00
// dataSize+2+12+4+4);
// skip over rest of rec
p += dataSize;
// remove this rec from the count, (4 bytes for ptr)
//m_memOccupied -= (p - start) + 4;
// otherwise, it's a simple advance
m_tail += (p - start);
// sanity. this must be failing due to a corrupt dataSize...
if ( m_tail < 0 ||
m_tail +(int32_t)sizeof(collnum_t)+m_cks+4>m_totalBufSize){
2013-08-03 00:12:24 +04:00
char *xx = NULL; *xx = 0;}
// delete key from hash table, iff is for THIS record
// but if it has not already been voided.
// we set key to KEYMAX() in markDeletedRecord()
2013-08-03 00:12:24 +04:00
if ( KEYCMP(k,KEYMAX(),m_cks) != 0 ){
removeKey ( collnum , k , start );
markDeletedRecord(start);
}
//else
// logf(LOG_DEBUG,"test: oops");
// count as a delete
m_deletes++;
// void this key in the buffer,
// so it doesn't try to delete it later from
// the hash table
// memset(start+sizeof(collnum_t), 0xff, m_cks);
// debug msg
2014-11-11 01:45:11 +03:00
//log("%s m_tail = %"INT32", #ptrs=%"INT32"",
2013-08-03 00:12:24 +04:00
// m_dbname,m_tail,m_numPtrsUsed);
//}
// debug msg
2014-11-11 01:45:11 +03:00
//log("%s m_tail = %"INT32", #ptrs=%"INT32"",m_dbname,m_tail,m_numPtrsUsed);
2013-08-03 00:12:24 +04:00
m_needsSave = true;
return true;
}
// mark a record in the buffer deleted to ensure that we reclaim the memory
// and attempt to delete the key only once.
void RdbCache::markDeletedRecord(char *ptr){
2014-11-11 01:45:11 +03:00
int32_t dataSize = sizeof(collnum_t)+m_cks+sizeof(int32_t);
2013-08-03 00:12:24 +04:00
// debug it
2014-11-11 01:45:11 +03:00
//logf(LOG_DEBUG,"cache: makeDeleteRecord ptr=0x%"XINT32" off=%"INT32"",
// (int32_t)ptr,ptr-m_bufs[0]);
2013-08-03 00:12:24 +04:00
// get dataSize and data
if ( m_fixedDataSize == -1 || m_supportLists ) {
dataSize += 4 + // size
2014-11-11 01:45:11 +03:00
*(int32_t*)(ptr+
2013-08-03 00:12:24 +04:00
sizeof(collnum_t)+ // collnum
m_cks+ // key
2014-11-11 01:45:11 +03:00
sizeof(int32_t)); // timestamp
2013-08-03 00:12:24 +04:00
}
else
dataSize += m_fixedDataSize;
// mark newly freed mem
m_memOccupied -= dataSize;
// relabel old record key as 0xffffffff... so key is not removed
// more than once.
memset(ptr+sizeof(collnum_t), 0xff, m_cks);
}
// patch the hole so chaining still works
//void RdbCache::removeKey ( collnum_t collnum , key_t key , char *rec ) {
void RdbCache::removeKey ( collnum_t collnum , char *key , char *rec ) {
2014-11-11 01:45:11 +03:00
//int32_t n = (key.n0 + (uint64_t)key.n1)% m_numPtrsMax;
int32_t n = hash32 ( key , m_cks ) % m_numPtrsMax;
2013-08-03 00:12:24 +04:00
// debug msg
//if ( m_cks == 4)
2014-11-11 01:45:11 +03:00
// log("remove first try = slot #%"INT32" (%"INT32")",n,m_numPtrsMax);
2013-08-03 00:12:24 +04:00
// debug msg
2014-11-11 01:45:11 +03:00
//log("%s removing key.n1=%"UINT32" key.n0=%"UINT64"",m_dbname,key.n1,key.n0);
2013-08-03 00:12:24 +04:00
//if ( m_cks == 4 )
2014-11-11 01:45:11 +03:00
// log("removing k=%"XINT32"",*(int32_t *)key);
2013-08-03 00:12:24 +04:00
// chain
while ( m_ptrs[n] &&
( *(collnum_t *)(m_ptrs[n]+0 ) != collnum ||
//*(key_t *)(m_ptrs[n]+sizeof(collnum_t)) != key ) )
KEYCMP(m_ptrs[n]+sizeof(collnum_t),key,m_cks) != 0 ) )
if ( ++n >= m_numPtrsMax ) n = 0;
//while ( m_ptrs[n] && *(key_t *)m_ptrs[n] != key )
// if ( ++n >= m_numPtrsMax ) n = 0;
// . return false if key not found
// . this happens sometimes, if m_tail wraps to 0 and the new rec
// the gets placed at the end of the last m_buf, changing m_bufEnds
// then m_tail may revisit many recs it already removed from hashtbl
if ( ! m_ptrs[n] ) {
log(LOG_LOGIC,"db: cache: removeKey: Could not find key. "
"Trying to scan whole table.");
// try scanning whole table
2014-11-11 01:45:11 +03:00
int32_t i;
2013-08-03 00:12:24 +04:00
for ( i = 0 ; i < m_numPtrsMax ; i++ ) {
// skip if empty
if ( ! m_ptrs[i] ) continue;
// skip if no match
if (KEYCMP(m_ptrs[i]+sizeof(collnum_t),key,m_cks) != 0)
continue;
// got a match
log(LOG_LOGIC,"db: cache: removeKey. Found key in "
"linear scan. Wierd.");
n = i;
break;
}
if ( i >= m_numPtrsMax ) {
log(LOG_LOGIC,"db: cache: removeKey: BAD ENGINEER. "
"dbname=%s",m_dbname );
char *xx = NULL;
*xx = 1;
return;
}
}
// if does not point to this rec , it is now pointing to the latest,
// promoted copy of the rec, so do not delete
if ( m_ptrs[n] != rec ) {
// debug msg
// This shouldn't happen anymore -partap
char *xx = NULL; xx = 0;
return;
}
// debug msg
//key_t *k = (key_t *)(m_ptrs[n]+2);
2014-11-11 01:45:11 +03:00
//log("cache: %s removing key.n1=%"UINT32" key.n0=%"UINT64" from slot #%"INT32"",
2013-08-03 00:12:24 +04:00
// m_dbname,k->n1,k->n0,n);
// all done if already cleared
if ( ! m_ptrs[n] ) return;
// clear it
m_ptrs[n] = NULL;
m_numPtrsUsed--;
2014-11-18 05:13:36 +03:00
m_memOccupied -= sizeof(char *);//4;
2013-08-03 00:12:24 +04:00
// advance through list after us now
if ( ++n >= m_numPtrsMax ) n = 0;
// keep looping until we hit an empty slot
while ( m_ptrs[n] ) {
char *ptr = m_ptrs[n];
// point to the key
char *kptr = ptr + sizeof(collnum_t);
// clear it
m_ptrs[n] = NULL;
// undo stats
m_numPtrsUsed--;
2014-11-15 04:30:32 +03:00
m_memOccupied -= sizeof(char *);//4;
2013-08-03 00:12:24 +04:00
// re-hash it back to possibly fill the "gap"
addKey ( *(collnum_t *)ptr , kptr , ptr );
if ( ++n >= m_numPtrsMax ) n = 0;
}
}
//void RdbCache::addKey ( collnum_t collnum , key_t key , char *ptr ) {
void RdbCache::addKey ( collnum_t collnum , char *key , char *ptr ) {
// look up in hash table
2014-11-11 01:45:11 +03:00
//int32_t n = (key.n0 + (uint64_t)key.n1)% m_numPtrsMax;
int32_t n = hash32 ( key , m_cks ) % m_numPtrsMax;
2013-08-03 00:12:24 +04:00
// save orig for debugging
2014-11-11 01:45:11 +03:00
//int32_t n2 = n;
2013-08-03 00:12:24 +04:00
// debug msg
2014-11-11 01:45:11 +03:00
//log("add first try = slot #%"INT32" (%"INT32")",n,m_numPtrsMax);
//int32_t n = key.n0 % m_numPtrsMax;
2013-08-03 00:12:24 +04:00
// chain
while ( m_ptrs[n] &&
( *(collnum_t *)(m_ptrs[n]+0 ) != collnum ||
//*(key_t *)(m_ptrs[n]+sizeof(collnum_t)) != key ) )
KEYCMP(m_ptrs[n]+sizeof(collnum_t),key,m_cks) != 0 ) )
if ( ++n >= m_numPtrsMax ) n = 0;
//while ( m_ptrs[n] && *(key_t *)m_ptrs[n] != key )
// if ( ++n >= m_numPtrsMax ) n = 0;
// if already there don't inc the count
if ( ! m_ptrs[n] ) {
m_numPtrsUsed++;
2014-11-15 04:30:32 +03:00
m_memOccupied += sizeof(char *);
2013-08-03 00:12:24 +04:00
// debug msg
//key_t *k = (key_t *)key;
2014-11-11 01:45:11 +03:00
//log("cache: %s added key.n1=%"UINT32" key.n0=%"UINT64" to slot #%"INT32" "
// "ptr=0x%"XINT32" off=%"INT32" size=%"INT32"",
2013-08-03 00:12:24 +04:00
// m_dbname,k->n1,k->n0,n,ptr,ptr-m_bufs[0],
2014-11-11 01:45:11 +03:00
// *(int32_t *)(ptr+2+12+4));
2013-08-03 00:12:24 +04:00
}
// debug msg
//else
2014-11-11 01:45:11 +03:00
// log("%s update key.n1=%"UINT32" key.n0=%"UINT64" in slot #%"INT32"",
2013-08-03 00:12:24 +04:00
// m_dbname,key.n1,key.n0,n);
//if ( this == &g_robotdb.m_rdbCache )
2014-11-11 01:45:11 +03:00
// log("db: cachebug: key @ slot #%"INT32" has ptr=0x%"XINT32"",
// n,(int32_t)ptr);
2013-08-03 00:12:24 +04:00
// If this pointer is already set, we may be replacing it from
// Msg5::needRecall. We need to mark the old record as deleted
if (m_ptrs[n]){
//char *xx = NULL; *xx = 0;
markDeletedRecord(m_ptrs[n]);
}
// store the ptr
m_ptrs[n] = ptr;
// debug testing
//m_crcs[n] = crc;
}
2013-10-19 04:49:36 +04:00
/*
void RdbCache::clearAll ( ) {
//if ( m_numBufs > 0 )
// log("db: resetting record cache");
m_offset = 0;
m_tail = 0;
2014-11-11 01:45:11 +03:00
//for ( int32_t i = 0 ; i < m_numBufs ; i++ )
2013-10-19 04:49:36 +04:00
// // all bufs, but not necessarily last, are BUFSIZE bytes big
// mfree ( m_bufs[i] , m_bufSizes[i] , "RdbCache" );
//m_numBufs = 0;
//m_totalBufSize= 0;
//if(m_ptrs ) mfree ( m_ptrs , m_numPtrsMax*sizeof(char *),"RdbCache");
//m_ptrs = NULL;
m_numPtrsUsed = 0;
// can't reset this, breaks the load!
//m_numPtrsMax = 0;
m_memOccupied = 0;
//m_memAlloced = 0;
m_numHits = 0;
m_numMisses = 0;
//m_wrapped = false;
m_adds = 0;
m_deletes = 0;
// assume no need to call convertCache()
m_convert = false;
m_isSaving = false;
}
*/
//
// . MDW: took out clear() for corruption suspicision... i think ninad's
// corruption detection would panic on collnum_t's of -1 anyway...
//
2013-08-03 00:12:24 +04:00
// . this just clears the contents of the cache
// . used when deleting a collection in Rdb::delColl() and used in
// Rdb::updateToRebuild() when updating/setting the rdb to a rebuilt rdb
/*
2013-08-03 00:12:24 +04:00
void RdbCache::clear ( collnum_t collnum ) {
// bail if no writing ops allowed now
if ( ! g_cacheWritesEnabled ) { char *xx=NULL;*xx=0; }
if ( m_isSaving ) { char *xx=NULL;*xx=0; }
2014-11-11 01:45:11 +03:00
for ( int32_t i = 0 ; i < m_numPtrsMax ; i++ ) {
2013-08-03 00:12:24 +04:00
// skip if empty bucket
if ( ! m_ptrs[i] ) continue;
// skip if wrong collection
if ( *(collnum_t *)m_ptrs[i] != collnum ) continue;
// change to the -1 collection, nobody should use that and
// it should get kicked out over time
*(collnum_t *)m_ptrs[i] = -1;
}
}
*/
2013-08-03 00:12:24 +04:00
bool RdbCache::load ( ) {
return load ( m_dbname );
}
static void *saveWrapper ( void *state , ThreadEntry *te ) ;
static void threadDoneWrapper ( void *state , ThreadEntry *te ) ;
// . just like RdbTree::fastSave()
// . returns false if blocked and is saving
bool RdbCache::save ( bool useThreads ) {
if ( g_conf.m_readOnlyMode ) return true;
// if we do not need it, don't bother
if ( ! m_needsSave ) return true;
// if corruption was detected, don't bother
if ( m_corruptionDetected ) return true;
// return true if already in the middle of saving
if ( m_isSaving ) return false;
// log
2014-11-11 01:45:11 +03:00
log(LOG_INIT,"db: Saving %"INT32" bytes of cache to %s/%s.cache",
2013-08-03 00:12:24 +04:00
m_memAlloced,g_hostdb.m_dir,m_dbname);
// spawn the thread
if ( useThreads ) {
// lock cache while saving
m_isSaving = true;
// make a thread. returns true on success, in which case
// we return false to indicate we blocked.
if ( g_threads.call ( SAVETREE_THREAD ,
1 , // niceness
this , // state
threadDoneWrapper , // callback
saveWrapper ) )
return false;
// crap had an error spawning thread
if ( ! g_threads.m_disabled )
log("db: Error spawning cache write thread. "
"Not using threads.");
}
// do it directly with no thread
save_r();
// wrap it up
threadDone ();
return true;
}
void threadDoneWrapper ( void *state , ThreadEntry *te ) {
RdbCache *THIS = (RdbCache *)state;
THIS->threadDone ( );
}
void RdbCache::threadDone ( ) {
// allow cache to change now
m_isSaving = false;
// and we are in sync with that data saved on disk
m_needsSave = false;
// report
if ( m_saveError )
log("db: Had error saving cache to disk for %s: %s.",
m_dbname,mstrerror(m_saveError));
}
void *saveWrapper ( void *state , ThreadEntry *te ) {
RdbCache *THIS = (RdbCache *)state;
// assume no error
THIS->m_saveError = 0;
// do it
if ( THIS->save_r () ) return NULL;
// we got an error, save it
THIS->m_saveError = errno;
return NULL;
}
// returns false withe rrno set on error
bool RdbCache::save_r ( ) {
// append .cache to "dbname" to get cache filename
char filename [ 64 ];
if ( gbstrlen(m_dbname) > 50 )
return log("db: Dbname too long. Could not save cache.");
2013-08-03 00:12:24 +04:00
sprintf ( filename , "%s%s.cache" , g_hostdb.m_dir , m_dbname );
//File f;
//f.set ( g_hostdb.m_dir , filename );
// open the file
//if ( ! f.open ( O_RDWR | O_CREAT ) )
int fd = open ( filename , O_RDWR | O_CREAT , S_IRWXU );
if ( fd < 0 )
return log("db: Had opening file to save cache to: %s.",
mstrerror(errno));
bool status = save2_r ( fd );
close ( fd );
return status;
}
bool RdbCache::save2_r ( int fd ) {
2013-08-03 00:12:24 +04:00
int n;
2014-11-11 01:45:11 +03:00
int32_t off = 0;
2013-08-03 00:12:24 +04:00
// general info
n = gbpwrite ( fd , &m_numPtrsMax , 4 , off ); off += 4;
if ( n != 4 ) return false;
n = gbpwrite ( fd , &m_maxMem , 4 , off ); off += 4;
if ( n != 4 ) return false;
// mem stuff
n = gbpwrite ( fd , &m_memAlloced , 4 , off ); off += 4;
if ( n != 4 ) return false;
n = gbpwrite ( fd , &m_memOccupied , 4 , off ); off += 4;
if ( n != 4 ) return false;
// save the buffer stuff
n = gbpwrite ( fd , &m_numBufs , 4 , off ); off += 4;
if ( n != 4 ) return false;
n = gbpwrite ( fd , &m_totalBufSize, 4 , off ); off += 4;
if ( n != 4 ) return false;
n = gbpwrite ( fd , &m_offset , 4 , off ); off += 4;
if ( n != 4 ) return false;
n = gbpwrite ( fd , &m_tail , 4 , off ); off += 4;
if ( n != 4 ) return false;
n = gbpwrite ( fd , &m_wrapped , 1 , off ); off += 1;
if ( n!= 1 ) return false;
// write each buf
2014-11-11 01:45:11 +03:00
for ( int32_t i = 0 ; i < m_numBufs ; i++ ) {
2013-08-03 00:12:24 +04:00
// write end relative
2014-11-11 01:45:11 +03:00
//int32_t end = (m_bufEnds[i] - m_bufs[i]);
2013-08-03 00:12:24 +04:00
//if ( end < 0 ) end = -1;
//n = pwrite ( fd ,&end , 4 , off ); off += 4;
//if ( n != 4 ) return false;
// and buf size
2014-11-11 01:45:11 +03:00
int32_t bufSize = m_bufSizes[i];
2013-08-03 00:12:24 +04:00
n = gbpwrite ( fd , &bufSize , 4 , off ); off += 4;
if ( n != 4 ) return false;
// then write contents of buffer #i
n = gbpwrite ( fd, m_bufs[i] , bufSize , off ); off += bufSize;
if ( n != bufSize ) return false;
}
// save the hash table stuff
n = gbpwrite ( fd , &m_numPtrsUsed , 4 , off ); off += 4;
if ( n != 4 ) return false;
n = gbpwrite ( fd , &m_threshold , 4 , off ); off += 4;
if ( n != 4 ) return false;
// save 100k at a time
2014-11-11 01:45:11 +03:00
int32_t i = 0;
2013-08-03 00:12:24 +04:00
while ( i < m_numPtrsMax )
if ( ! saveSome_r ( fd, &i , &off) ) return false;
//close ( fd ) ;
2013-08-03 00:12:24 +04:00
return true;
}
#define SAVEBUFSIZE (256*1024)
2014-11-11 01:45:11 +03:00
bool RdbCache::saveSome_r ( int fd , int32_t *iptr , int32_t *off ) {
2013-08-03 00:12:24 +04:00
char buf[SAVEBUFSIZE];
char *bufEnd = buf + SAVEBUFSIZE;
// point to buf
char *bp = buf;
int32_t used = 0;
2013-08-03 00:12:24 +04:00
// make hash table ptrs relative to offset
for ( ; *iptr < m_numPtrsMax && bp + 4 < bufEnd ; *iptr = *iptr + 1 ) {
// resume at i
char *p = m_ptrs[*iptr];
// if empty, write a -1 offset
if ( ! p ) {
2014-11-11 01:45:11 +03:00
//int32_t tt = -1;
2013-08-03 00:12:24 +04:00
// store that as it is
2014-11-11 01:45:11 +03:00
*(int32_t *)bp = -1; bp += 4;
2013-08-03 00:12:24 +04:00
//n = pwrite ( fd ,&tt , 4 , off ) ; off += 4;
//if ( n != 4 ) return false;
continue;
}
// otherwise convert ptr to offset... bitch
2014-11-11 01:45:11 +03:00
int32_t converted = -1;
for ( int32_t j = 0 ; j < m_numBufs ; j++ )
2013-08-03 00:12:24 +04:00
// is p pointing into the jth buffer?
if ( p >= m_bufs[j] && p < m_bufs[j] + m_bufSizes[j] ){
// if so, make it relative
converted = p - m_bufs[j] + BUFSIZE*j ;
break;
}
// bitch if not found
if ( converted == -1 )
return log(LOG_LOGIC,"db: cache: save: Bad "
"engineer");
// store that as it is
2014-11-11 01:45:11 +03:00
*(int32_t *)bp = converted; bp += 4;
used++;
2013-08-03 00:12:24 +04:00
//n = pwrite ( fd ,&converted , 4 , off ) ; off += 4;
//if ( n != 4 ) return false;
}
if ( used != m_numPtrsUsed ) {
log("cache: error saving cache. %"INT32" != %"INT32""
, used , m_numPtrsUsed );
//char *xx=NULL;*xx=0; }
return false;
}
2013-08-03 00:12:24 +04:00
// now write it all at once
2014-11-11 01:45:11 +03:00
int32_t size = bp - buf;
int32_t n = gbpwrite ( fd , buf , size , *off ); *off = *off + size;
2013-08-03 00:12:24 +04:00
if ( n != size ) return false;
return true;
}
bool RdbCache::load ( char *dbname ) {
// append .cache to "dbname" to get cache filename
char filename [ 64 ];
if ( gbstrlen(dbname) > 50 )
return log(LOG_LOGIC,"db: cache: load: dbname too long.");
2013-08-03 00:12:24 +04:00
sprintf ( filename , "%s.cache" , dbname );
// does the file exist?
File f;
f.set ( g_hostdb.m_dir , filename );
// having cache file not existing on disk is not so bad, it's a cache
2013-08-03 00:12:24 +04:00
if ( ! f.doesExist() )
return false;
// return log("db: Could not load cache from %s: does not exist.",
// f.getFilename());
2013-08-03 00:12:24 +04:00
// open the file
if ( ! f.open ( O_RDWR ) )
return log("db: Could not open cache save file for %s: %s.",
dbname,mstrerror(g_errno));
// log
log(LOG_INIT,"db: Loading cache from %s/%s.cache",
g_hostdb.m_dir,dbname);
// clear everything
reset();
int n;
2014-11-11 01:45:11 +03:00
int32_t off = 0;
2013-08-03 00:12:24 +04:00
// general info
2014-11-11 01:45:11 +03:00
int32_t numPtrsMax ;
int32_t maxMem ;
2013-08-03 00:12:24 +04:00
n = f.read ( &numPtrsMax , 4 , off ); off += 4;
if ( n != 4 ) return false;
n = f.read ( &maxMem , 4 , off ); off += 4;
if ( n != 4 ) return false;
// . they need to match our current config to continue loading
// . attempt to convert if not, because it is painful to rebuild
// the site quality cache
if ( numPtrsMax != m_numPtrsMax ||
maxMem != m_maxMem ) {
log("db: Error while loading cache file %s. Does not match "
"current cache config. "
2014-11-11 01:45:11 +03:00
"current numPtrsMax=%"INT32" maxMem=%"INT32", "
"ondisk numPtrsMax=%"INT32" maxMem=%"INT32". "
2013-08-03 00:12:24 +04:00
"Attempting to convert.",
//"Ignoring file.",
f.getFilename() ,
m_numPtrsMax , m_maxMem ,
numPtrsMax , maxMem );
//log("RdbCache::load: not loading");
m_convert = true;
m_convertNumPtrsMax = numPtrsMax ;
m_convertMaxMem = maxMem ;
return false;
}
// mem stuff
n = f.read ( &m_memAlloced , 4 , off ); off += 4;
if ( n != 4 ) return false;
n = f.read ( &m_memOccupied , 4 , off ); off += 4;
if ( n != 4 ) return false;
// load the buffer stuff
n = f.read ( &m_numBufs , 4 , off ); off += 4;
if ( n != 4 ) return false;
n = f.read ( &m_totalBufSize, 4 , off ); off += 4;
if ( n != 4 ) return false;
n = f.read ( &m_offset , 4 , off ); off += 4;
if ( n != 4 ) return false;
n = f.read ( &m_tail , 4 , off ); off += 4;
if ( n != 4 ) return false;
n = f.read ( &m_wrapped , 1 , off ); off += 1;
if ( n != 1 ) return false;
// load each buf
2014-11-11 01:45:11 +03:00
for ( int32_t i = 0 ; i < m_numBufs ; i++ ) {
2013-08-03 00:12:24 +04:00
// load end relative
2014-11-11 01:45:11 +03:00
//int32_t end ; //= (m_bufEnds[i] - m_bufs[i]);
2013-08-03 00:12:24 +04:00
//n = f.read ( &end , 4 , off ); off += 4;
//if ( n != 4 ) return false;
// and buf size
2014-11-11 01:45:11 +03:00
int32_t bufSize ; //= m_bufSizes[i];
2013-08-03 00:12:24 +04:00
n = f.read ( &bufSize , 4 , off ); off += 4;
if ( n != 4 ) return false;
// alloc the buf
char ttt[64];
sprintf(ttt,"clb-%s",m_dbname);
m_bufs[i] = (char *) mcalloc ( bufSize , ttt );
if ( ! m_bufs[i] ) return false;
m_bufSizes[i] = bufSize;
//m_bufEnds [i] = m_bufs[i] + end;
//if ( end < 0 ) m_bufEnds[i] = NULL;
// then read contents of buffer #i
n = f.read ( m_bufs[i] , bufSize , off ); off += bufSize;
if ( n != bufSize ) return false;
}
// load the hash table stuff
n = f.read ( &m_numPtrsUsed , 4 , off ); off += 4;
if ( n != 4 ) return false;
n = f.read ( &m_threshold , 4 , off ); off += 4;
if ( n != 4 ) return false;
2014-11-18 05:13:36 +03:00
// load the OFFSETS into "fix"
int32_t total = sizeof(int32_t) * m_numPtrsMax ;
2014-11-18 05:13:36 +03:00
SafeBuf fix;
fix.reserve ( total );
//n = f.read ( m_ptrs , total , off ); off += total;
n = f.read ( fix.getBufStart() , total , off ); off += total;
2013-08-03 00:12:24 +04:00
if ( n != total ) return false;
fix.setLength ( total );
2014-11-18 05:13:36 +03:00
int32_t *poff = (int32_t *)fix.getBufStart();
// ptrs can be 8 bytes each, if we are 64-bit
m_ptrs = (char **) mcalloc (m_numPtrsMax * sizeof(char *),m_dbname);
if ( ! m_ptrs ) return false;
int32_t used = 0;
// convert offsets into pointers
2014-11-18 05:13:36 +03:00
for ( int32_t i = 0 ; i < m_numPtrsMax ; i++ , poff++ ) {
//uint32_t j = (SPTRTYPE) m_ptrs[i];
2013-08-03 00:12:24 +04:00
// is it a NULL?
2014-11-18 05:13:36 +03:00
//if ( j == -1 ) { m_ptrs[i] = NULL; continue; }
if ( *poff == -1 ) { m_ptrs[i] = NULL; continue; }
// sanity
if ( *poff >= m_numBufs * BUFSIZE ) { char *xx=NULL;*xx=0;}
2013-08-03 00:12:24 +04:00
// get buffer
2014-11-18 05:13:36 +03:00
int32_t bufNum = (*poff) / BUFSIZE;
char *p = m_bufs[bufNum] + (*poff) % BUFSIZE ;
2013-08-03 00:12:24 +04:00
// re-assign
m_ptrs[i] = p;
// count it
used++;
// see what is there
2013-08-03 00:12:24 +04:00
// debug msg
//key_t kk = *(key_t *)p;
2014-11-11 01:45:11 +03:00
//log("loaded k.n1=%"UINT32" k.n0=%"UINT64"",kk.n1,kk.n0);
2013-08-03 00:12:24 +04:00
//if ( m_fixedDataSize || m_supportLists )
2014-11-11 01:45:11 +03:00
// log("loaded k.n1=%"UINT32" k.n0=%"UINT64" size=%"INT32"",
// kk.n1,kk.n0, 20+*(int32_t *)(p+sizeof(key_t)+4));
2013-08-03 00:12:24 +04:00
//else
2014-11-11 01:45:11 +03:00
// log("loaded k.n1=%"UINT32" k.n0=%"UINT64"", kk.n1,kk.n0);
2013-08-03 00:12:24 +04:00
}
if ( used != m_numPtrsUsed ) {
log("cache: error loading cache. %"INT32" != %"INT32""
, used , m_numPtrsUsed );
return false;
}
2013-08-03 00:12:24 +04:00
m_needsSave = false;
return true;
}
// remove a key range from the cache
void RdbCache::removeKeyRange ( collnum_t collnum ,
char *startKey ,
char *endKey ) {
2014-11-11 01:45:11 +03:00
//int32_t n = (key.n0 + (uint64_t)key.n1)% m_numPtrsMax;
2013-08-03 00:12:24 +04:00
// unused now!!
2014-11-11 01:45:11 +03:00
int32_t n = hash32 ( startKey , m_cks ) % m_numPtrsMax;
int32_t startn = n;
2013-08-03 00:12:24 +04:00
// chain
for ( ; n+1 != startn; n++ ) {
// check for wrap
if ( n >= m_numPtrsMax )
n = 0;
// make sure it's not null
if ( !m_ptrs[n] )
return;
// check collection number
if ( *(collnum_t *)(m_ptrs[n]) != collnum )
continue;
// check the range
if ( KEYCMP ( m_ptrs[n]+sizeof(collnum_t),
startKey,
m_cks ) >= 0 &&
KEYCMP ( m_ptrs[n]+sizeof(collnum_t),
endKey,
m_cks ) <= 0 ) {
// remove the key
2014-11-11 01:45:11 +03:00
int32_t rem = n;
2013-08-03 00:12:24 +04:00
m_ptrs[rem] = NULL;
m_numPtrsUsed--;
2014-11-18 05:13:36 +03:00
m_memOccupied -= sizeof(char *);
2013-08-03 00:12:24 +04:00
if ( ++rem >= m_numPtrsMax ) rem = 0;
// keep looping until we hit an empty slot
while ( m_ptrs[rem] ) {
char *ptr = m_ptrs[rem];
m_ptrs[rem] = NULL;
m_numPtrsUsed--;
2014-11-18 05:13:36 +03:00
m_memOccupied -= sizeof(char *);
2013-08-03 00:12:24 +04:00
char k[MAX_KEY_BYTES];
KEYSET(k,ptr+sizeof(collnum_t),m_cks);
addKey ( *(collnum_t *)ptr ,
k ,
ptr );
if ( ++rem >= m_numPtrsMax ) rem = 0;
}
}
}
m_needsSave = true;
}
2014-11-11 01:45:11 +03:00
bool RdbCache::convertCache ( int32_t numPtrsMax , int32_t maxMem ) {
2013-08-03 00:12:24 +04:00
// divide numPtrsMax by 2 to get maxRecs (see above)
2014-11-11 01:45:11 +03:00
int32_t maxRecs = numPtrsMax / 2;
2013-08-03 00:12:24 +04:00
// load the cache stored on disk into the "tmp" cache
RdbCache tmp;
if ( ! tmp.init ( maxMem ,
m_fixedDataSize ,
m_supportLists ,
maxRecs ,
m_useHalfKeys ,
m_dbname ,
true , // loadFromDisk
m_cks , // cacheKeySize
m_dks , // dataKeySize
numPtrsMax ))
return false;
// load it from disk
//if ( ! tmp.load() ) return false;
// copy its recs into our space
2014-11-11 01:45:11 +03:00
int32_t failed = 0;
int32_t success = 0;
2013-08-03 00:12:24 +04:00
char key[16];
2014-11-11 01:45:11 +03:00
for ( int32_t i = 0 ; i < tmp.m_numPtrsMax ; i++ ) {
2013-08-03 00:12:24 +04:00
// get ptr to slot in hash table
char *p = tmp.m_ptrs[i];
// skip if empty bucket
if ( ! p ) continue;
// otherwise, get collnum
collnum_t collnum = *(collnum_t *)p;
// get key
memcpy ( key , p + sizeof(collnum_t), m_cks );
// now get the record proper
bool found;
char *rec;
2014-11-11 01:45:11 +03:00
int32_t recSize;
int32_t timestamp;
2013-08-03 00:12:24 +04:00
found = tmp.getRecord ( collnum ,
key ,
&rec ,
&recSize ,
false , // do copy?
-1 , // maxAge
false , // inc counts?
2014-11-11 01:45:11 +03:00
// when it was cached
(time_t *)&timestamp ,
2013-08-03 00:12:24 +04:00
false );// promote rec?
// sanity check
if ( ! found ) {
log("db: key is in hash table but no rec.");
continue;
}
if ( ! timestamp ) {
log("db: has a timestamp of 0");
}
// now add it to our table
bool status;
status = addRecord ( collnum ,
key ,
rec ,
recSize ,
timestamp );
if ( ! status ) failed++;
else success++;
}
// log it
2014-11-11 01:45:11 +03:00
logf(LOG_INFO,"db: Successfully converted %"INT32" recs from cache on disk "
2013-08-03 00:12:24 +04:00
"for %s.", success,m_dbname);
if ( failed > 0 )
2014-11-11 01:45:11 +03:00
logf(LOG_INFO,"db: Failed to convert %"INT32" recs from cache on "
2013-08-03 00:12:24 +04:00
"disk for %s.", failed,m_dbname);
return true;
}
// goes through all the pointers and checks the integrity of the data they
// point to. Also checks if m_tail is pointing right or not
void RdbCache::verify(){
bool foundTail = false;
2014-11-11 01:45:11 +03:00
int32_t count = 0;
for ( int32_t i = 0; i < m_numPtrsMax; i++ ){
2013-08-03 00:12:24 +04:00
char *start = m_ptrs[i];
if ( !start ) continue;
if ( start == m_bufs[0] + m_tail )
foundTail = true;
char *p = start;
// get collnum
collnum_t collnum = *(collnum_t *)p; p += sizeof(collnum_t);
// collnum can be 0 in case we have to go to next buffer
if ( collnum != 0 && ( collnum >= m_maxColls || collnum < 0 ||
2013-08-03 00:12:24 +04:00
!g_collectiondb.m_recs[collnum] ) ) {
char *xx = NULL; *xx = 0;
}
// get key
//char *k = p ;
p += m_cks;
// get time stamp
2014-11-11 01:45:11 +03:00
//int32_t timestamp = *(int32_t *)p ;
2013-08-03 00:12:24 +04:00
p += 4;
2014-11-11 01:45:11 +03:00
//logf(LOG_DEBUG, "db: cachebug: removing key. tail=%"INT32" ",
2013-08-03 00:12:24 +04:00
// m_tail);
// get data size
2014-11-11 01:45:11 +03:00
int32_t dataSize;
2013-08-03 00:12:24 +04:00
// get dataSize and data
if ( m_fixedDataSize == -1 || m_supportLists ) {
2014-11-11 01:45:11 +03:00
dataSize = *(int32_t *)p; p += 4; }
2013-08-03 00:12:24 +04:00
else
dataSize = m_fixedDataSize;
// sanity
if ( dataSize < 0 || dataSize > m_totalBufSize ){
char *xx = NULL; *xx = 0;
}
// count it
count++;
}
if ( !foundTail && m_wrapped ){
char *xx = NULL; *xx = 0 ;
}
if ( count != m_numPtrsUsed ) {
char *xx = NULL; *xx = 0 ;
}
}