open-source-search-engine/RdbTree.cpp

3145 lines
102 KiB
C++
Raw Normal View History

2013-08-03 00:12:24 +04:00
// JAB: this is required for pwrite() in this module
#undef _XOPEN_SOURCE
#define _XOPEN_SOURCE 500
#include "gb-include.h"
#include "RdbTree.h"
#include "Loop.h"
#include "Threads.h"
#include "Datedb.h"
#include "Linkdb.h"
RdbTree::RdbTree () {
//m_countsInitialized = false;
2013-08-03 00:12:24 +04:00
m_collnums= NULL;
m_keys = NULL;
m_data = NULL;
m_sizes = NULL;
m_left = NULL;
m_right = NULL;
m_parents = NULL;
m_depth = NULL;
m_headNode = -1;
m_numNodes = 0;
m_numUsedNodes = 0;
m_memAlloced = 0;
m_memOccupied = 0;
m_nextNode = 0;
m_minUnusedNode = 0;
m_fixedDataSize = -1; // variable dataSize, depends on individual node
m_isProtected = false;
m_needsSave = false;
m_useProtection = false;
m_pickRight = false;
m_gettingList = 0;
// before resetting... we have to set this so clear() won't breach buffers
m_rdbId = -1;
2013-08-03 00:12:24 +04:00
reset();
}
RdbTree::~RdbTree ( ) {
reset ( );
}
/*
#include <asm/page.h> // PAGE_SIZE
// return #of bytes scanned for timing purposes
long RdbTree::scanMem ( ) {
// ahh.. just scan the whole thing to keep it simple
char *p ;
char *pend ;
char c;
long size = 0; // count number of bytes scanned
// keys
p = (char *)m_keys ; pend = p + m_numNodes * m_ks;
size += pend - p;
if ( p ) while ( p < pend ) { c = *p; p += PAGE_SIZE; }
// data ptrs
p = (char *)m_data ; pend = p + m_numNodes * sizeof(char *);
size += pend - p;
if ( p ) while ( p < pend ) { c = *p; p += PAGE_SIZE; }
// sizes
p = (char *)m_sizes ; pend = p + m_numNodes * sizeof(long);
size += pend - p;
if ( p ) while ( p < pend ) { c = *p; p += PAGE_SIZE; }
// left
p = (char *)m_left ; pend = p + m_numNodes * sizeof(long);
size += pend - p;
if ( p ) while ( p < pend ) { c = *p; p += PAGE_SIZE; }
// right
p = (char *)m_right ; pend = p + m_numNodes * sizeof(long);
size += pend - p;
if ( p ) while ( p < pend ) { c = *p; p += PAGE_SIZE; }
// parents
p = (char *)m_parents ; pend = p + m_numNodes * sizeof(long);
size += pend - p;
if ( p ) while ( p < pend ) { c = *p; p += PAGE_SIZE; }
// depth
p = (char *)m_right ; pend = p + m_numNodes * sizeof(char);
size += pend - p;
if ( p ) while ( p < pend ) { c = *p; p += PAGE_SIZE; }
return size;
}
*/
// "memMax" includes records plus the overhead
bool RdbTree::set ( long fixedDataSize ,
long maxNumNodes ,
bool doBalancing ,
long memMax ,
bool ownData ,
char *allocName ,
bool dataInPtrs ,
char *dbname ,
char keySize ,
bool useProtection ,
bool allowDups ,
char rdbId ) {
2013-08-03 00:12:24 +04:00
reset();
m_fixedDataSize = fixedDataSize;
m_doBalancing = doBalancing;
m_maxMem = memMax;
m_ownData = ownData;
m_allocName = allocName;
m_dataInPtrs = dataInPtrs;
//m_dbname = dbname;
m_ks = keySize;
m_useProtection = useProtection;
m_allowDups = allowDups;
m_needsSave = false;
m_dbname[0] = '\0';
if ( dbname ) {
long dlen = strlen(dbname);
if ( dlen > 30 ) dlen = 30;
memcpy(m_dbname,dbname,dlen);
m_dbname[dlen] = '\0';
}
// a malloc tag, must be LESS THAN 16 bytes including the NULL
char *p = m_memTag;
memcpy ( p , "RdbTree" , 7 ); p += 7;
if ( dbname ) strncpy ( p , dbname , 8 ); p += 8;
*p++ = '\0';
// set rdbid
m_rdbId = rdbId; // -1;
// sanity
if ( rdbId < -1 ) { char *xx=NULL;*xx=0; }
if ( rdbId >= RDB_END ) { char *xx=NULL;*xx=0; }
2013-08-03 00:12:24 +04:00
// if its doledb, set it
//if ( dbname && strcmp(dbname,"doledb") == 0 ) m_rdbId = RDB_DOLEDB;
2013-08-03 00:12:24 +04:00
// adjust m_maxMem to virtual infinity if it was -1
if ( m_maxMem < 0 ) m_maxMem = 0x7fffffff;
// . compute each node's memory overhead
// . size of a key/left/right/parent
m_overhead = (m_ks + 4*3 );
// include collection number, currently an unsigned short
m_overhead += sizeof(collnum_t);
// if we're a non-zero data length include a dataptr (-1 means variabl)
if ( m_fixedDataSize != 0 ) m_overhead += 4;
// include dataSize if our dataSize is variable (-1)
if ( m_fixedDataSize == -1 ) m_overhead += 4;
// if we're balanced include 1 byte per node for the depth
if ( m_doBalancing ) m_overhead += 1;
if( maxNumNodes == -1) {
maxNumNodes = m_maxMem / m_overhead;
if(maxNumNodes > 10000000) maxNumNodes = 10000000;
}
// initiate protection
if ( m_useProtection ) protect();
// allocate the nodes
return growTree ( maxNumNodes , 0 );
2013-08-03 00:12:24 +04:00
}
void RdbTree::reset ( ) {
// . sanity check
// . SpiderCache.cpp uses a tree, but withou a dbname
if ( m_needsSave && m_dbname[0] &&
strcmp(m_dbname,"accessdb") &&
strcmp(m_dbname,"statsdb") ) {
//strcmp(m_dbname,"doledb") ) {
log("rdb: Resetting unsaved tree %s.",m_dbname);
// when DELETING a collection from pagecrawlbot.cpp
// it calls Collectiondb::deleteRec() which calls
2013-10-19 04:49:36 +04:00
// SpiderColl::reset() which calls m_waitingTree.reset()
// which was coring here! so take this out
//char *xx = NULL; *xx = 0;
2013-08-03 00:12:24 +04:00
}
// unprotect it all
if ( m_useProtection ) unprotect ( );
// make sure string is NULL temrinated. this gbstrlen() should
2014-01-23 11:52:23 +04:00
if ( m_numNodes > 0 &&
m_dbname[0] &&
gbstrlen(m_dbname) >= 0 &&
// don't be spammy we can have thousands of these, one per coll
strcmp(m_dbname,"waitingtree") )
2013-08-03 00:12:24 +04:00
log(LOG_INFO,"db: Resetting tree for %s.",m_dbname);
// liberate all the nodes
clear();
// do not require saving after a reset
m_needsSave = false;
// now free all the overhead structures of this tree
long n = m_numNodes;
// free array of collectio numbers (shorts for now)
if ( m_collnums) mfree ( m_collnums, sizeof(collnum_t) *n,m_allocName);
// free the array of keys
if ( m_keys ) mfree ( m_keys , m_ks * n , m_allocName );
// free the data ptrs
if ( m_data ) mfree ( m_data , sizeof(char *) * n , m_allocName );
// free the array of dataSizes
if ( m_sizes ) mfree ( m_sizes , n * 4 , m_allocName );
// free the sorted node #'s
if ( m_left ) mfree ( m_left , n * 4 ,m_allocName);
if ( m_right ) mfree ( m_right , n * 4 ,m_allocName);
if ( m_parents ) mfree ( m_parents , n * 4 ,m_allocName);
if ( m_depth ) mfree ( m_depth , n ,m_allocName);
m_collnums = NULL;
m_keys = NULL;
m_data = NULL;
m_sizes = NULL;
m_left = NULL;
m_right = NULL;
m_parents = NULL;
m_depth = NULL;
// tree description vars
m_headNode = -1;
m_numNodes = 0;
m_numUsedNodes = 0;
m_memAlloced = 0;
m_memOccupied = 0;
m_nextNode = 0;
m_minUnusedNode = 0;
m_fixedDataSize = -1; // variable dataSize, depends on individual node
// clear counts
m_numNegativeKeys = 0;
m_numPositiveKeys = 0;
//memset ( m_numNegKeysPerColl , 0 , 4*MAX_COLLS );
//memset ( m_numPosKeysPerColl , 0 , 4*MAX_COLLS );
2013-08-03 00:12:24 +04:00
m_isSaving = false;
m_isLoading = false;
m_isWritable = true;
}
void RdbTree::delColl ( collnum_t collnum ) {
m_needsSave = true;
//key_t startKey;
//key_t endKey;
//startKey.setMin();
//endKey.setMax();
char *startKey = KEYMIN();
char *endKey = KEYMAX();
deleteNodes ( collnum , startKey , endKey , true/*freeData*/) ;
}
// . this just makes all the nodes available for occupation (liberates them)
// . it does not free this tree's control structures
// . returns # of occupied nodes we liberated
long RdbTree::clear ( ) {
if ( m_numUsedNodes > 0 ) m_needsSave = true;
// the liberation count
long count = 0;
// liberate all of our nodes
long dataSize = m_fixedDataSize;
for ( long i = 0 ; i < m_minUnusedNode ; i++ ) {
// skip node if parents is -2 (unoccupied)
if ( m_parents[i] == -2 ) continue;
// we no longer count the overhead of this node as occupied
m_memOccupied -= m_overhead;
// make the ith node available for occupation
m_parents[i] = -2;
// keep count
count++;
// continue if we have no data to free
if ( ! m_data ) continue;
// read dataSize from m_sizes[i] if it's not fixed
if ( m_fixedDataSize == -1 ) dataSize = m_sizes[i];
// free the data being pointed to
if ( m_ownData ) mfree ( m_data[i] , dataSize ,m_allocName);
// adjust our reported memory usage
m_memAlloced -= dataSize;
m_memOccupied -= dataSize;
}
// reset all these
m_headNode = -1;
m_numUsedNodes = 0;
m_nextNode = 0;
m_minUnusedNode = 0;
// clear counts
m_numNegativeKeys = 0;
m_numPositiveKeys = 0;
// clear tree counts for all collections!
long nc = g_collectiondb.m_numRecs;
// BUT only if we are an Rdb::m_tree!!!
if ( m_rdbId == -1 ) nc = 0;
// otherwise, we overwrite stuff in CollectionRec we shouldn't
for ( long i = 0 ; i < nc ; i++ ) {
CollectionRec *cr = g_collectiondb.getRec(i);
if ( ! cr ) continue;
//if (((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
cr->m_numNegKeysInTree[(unsigned char)m_rdbId] = 0;
cr->m_numPosKeysInTree[(unsigned char)m_rdbId] = 0;
}
2013-08-03 00:12:24 +04:00
return count;
}
// . used by cache
// . wrapper for getNode()
long RdbTree::getNode ( collnum_t collnum , char *key ) { // key_t &key ) {
long i = m_headNode;
// get the node (about 4 cycles per loop, 80cycles for 1 million items)
while ( i != -1 ) {
if ( collnum < m_collnums[i] ) { i = m_left [i]; continue;}
if ( collnum > m_collnums[i] ) { i = m_right[i]; continue;}
//if ( key < m_keys[i] ) { i = m_left [i]; continue;}
//if ( key > m_keys[i] ) { i = m_right[i]; continue;}
if ( KEYCMP(key,0,m_keys,i,m_ks)<0) { i=m_left [i]; continue;}
if ( KEYCMP(key,0,m_keys,i,m_ks)>0) { i=m_right[i]; continue;}
return i;
}
return -1;
}
// . returns node # whose key is >= "key"
// . returns -1 if none
// . used by RdbTree::getList()
// . TODO: spiderdb stores records by time so our unbalanced tree really hurts
// us for that.
// . TODO: keep a m_lastStartNode and start from that since it tends to only
// increase startKey via Msg3. if the key at m_lastStartNode is <=
// the provided key then we did well.
long RdbTree::getNextNode ( collnum_t collnum , char *key ) { //key_t &key ) {
// return -1 if no non-empty nodes in the tree
if ( m_headNode < 0 ) return -1;
// get the node (about 4 cycles per loop, 80cycles for 1 million items)
long parent;
long i = m_headNode ;
// . set i tom_hint if it's < key
// . this helps out severly unbalanced trees made by spiderdb
// . it may hurt other guys a bit though
//if (m_hint >= 0 &&
//m_lastStartNode < m_numNodes &&
//m_parents [m_hint ] != -2 &&
//m_keys [m_hint ] <= key )
//i =m_hint;
while ( i != -1 ) {
parent = i;
if ( collnum < m_collnums[i] ) { i = m_left [i]; continue;}
if ( collnum > m_collnums[i] ) { i = m_right[i]; continue;}
//if ( key < m_keys[i] ) { i = m_left [i]; continue;}
//if ( key > m_keys[i] ) { i = m_right[i]; continue;}
if (KEYCMP(key,0,m_keys,i,m_ks)<0) { i = m_left [i]; continue;}
if (KEYCMP(key,0,m_keys,i,m_ks)>0) { i = m_right[i]; continue;}
return i;
}
if ( m_collnums [ parent ] > collnum ) return parent;
if ( m_collnums [ parent ] == collnum && //m_keys [ parent ] > key )
KEYCMP(m_keys,parent,key,0,m_ks)>0 )
return parent;
return getNextNode ( parent );
}
long RdbTree::getFirstNode ( ) {
//key_t k; k.n0 = 0LL; k.n1 = 0;
char *k = KEYMIN();
return getNextNode ( 0 , k );
}
long RdbTree::getFirstNode2 ( collnum_t collnum ) {
//key_t k; k.n0 = 0LL; k.n1 = 0;
char *k = KEYMIN();
return getNextNode ( collnum , k );
}
long RdbTree::getLastNode ( ) {
//key_t k; k.setMax();
char *k = KEYMAX();
return getPrevNode ( (collnum_t)0x7fff , k );
}
// . get the node whose key is <= "key"
// . returns -1 if none
long RdbTree::getPrevNode ( collnum_t collnum , char *key ) { // key_t &key ) {
// return -1 if no non-empty nodes in the tree
if ( m_headNode < 0 ) return -1;
// get the node (about 4 cycles per loop, 80cycles for 1 million items)
long parent;
long i = m_headNode ;
while ( i != -1 ) {
parent = i;
if ( collnum < m_collnums[i] ) { i = m_left [i]; continue;}
if ( collnum > m_collnums[i] ) { i = m_right[i]; continue;}
//if ( key < m_keys[i] ) { i = m_left [i]; continue;}
//if ( key > m_keys[i] ) { i = m_right[i]; continue;}
if ( KEYCMP(key,0,m_keys,i,m_ks)<0) {i=m_left [i];continue;}
if ( KEYCMP(key,0,m_keys,i,m_ks)>0) {i=m_right[i];continue;}
return i;
}
if ( m_collnums [ parent ] < collnum ) return parent;
if ( m_collnums [ parent ] == collnum && //m_keys [ parent ] < key )
KEYCMP(m_keys,parent,key,0,m_ks) < 0 ) return parent;
return getPrevNode ( parent );
}
char *RdbTree::getData ( collnum_t collnum , char *key ) { // key_t &key ) {
long n = getNode ( collnum , key ); if ( n < 0 ) return NULL;
return m_data[n];
};
// . "i" is the previous node number
// . we could eliminate m_parents[] array if we limited tree depth!
// . 24 cycles to get the first kid
// . averages around 50 cycles per call probably
// . 8 cycles are spent entering/exiting this subroutine (inline it? TODO)
long RdbTree::getNextNode ( long i ) {
// cruise the kids if we have a right one
if ( m_right[i] >= 0 ) {
// go to the right kid
i = m_right [ i ];
// now go left as much as we can
while ( m_left [ i ] >= 0 ) i = m_left [ i ];
// return that node (it's a leaf or has one right kid)
return i;
}
// now keep getting parents until one has a key bigger than i's key
long p = m_parents[i];
// if parent is negative we're done
if ( p < 0 ) return -1;
// if we're the left kid of the parent, then the parent is the
// next biggest node
if ( m_left[p] == i ) return p;
// otherwise keep getting the parent until it has a bigger key
// or until we're the LEFT kid of the parent. that's better
// cuz comparing keys takes longer. loop is 6 cycles per iteration.
while ( p >= 0 && (m_collnums[p] < m_collnums[i] ||
( m_collnums[p] == m_collnums[i] &&
KEYCMP(m_keys,p,m_keys,i,m_ks) < 0 )) )
p = m_parents[p];
// p will be -1 if none are left
return p;
}
// . "i" is the next node number
long RdbTree::getPrevNode ( long i ) {
// cruise the kids if we have a left one
if ( m_left[i] >= 0 ) {
// go to the left kid
i = m_left [ i ];
// now go right as much as we can
while ( m_right [ i ] >= 0 ) i = m_right [ i ];
// return that node (it's a leaf or has one left kid)
return i;
}
// now keep getting parents until one has a key bigger than i's key
long p = m_parents[i];
// if we're the right kid of the parent, then the parent is the
// next least node
if ( m_right[p] == i ) return p;
// keep getting the parent until it has a bigger key
// or until we're the RIGHT kid of the parent. that's better
// cuz comparing keys takes longer. loop is 6 cycles per iteration.
while ( p >= 0 && (m_collnums[p] > m_collnums[i] ||
( m_collnums[p] == m_collnums[i] &&
KEYCMP(m_keys,p,m_keys,i,m_ks) > 0 )) )
p = m_parents[p];
// p will be -1 if none are left
return p;
}
// . get the node with the lowest key
long RdbTree::getLowestNode ( ) {
long i = m_headNode;
while ( m_left[i] != -1 ) i = m_left [ i ];
return i;
}
// . returns -1 if we coulnd't allocate the new space and sets g_errno to ENOMEM
// or ETREENOGROW, ...
// . returns node # we added it to on success
// . this will replace any current node with the same key
// . sets retNode to the node we added the data to (used internally)
// . negative dataSizes should be interpreted as 0
// . probably about 120 cycles per add means we can add 2 million per sec
// . NOTE: does not check to see if it will exceed m_maxMem
long RdbTree::addNode ( collnum_t collnum ,
char *key , char *data , long dataSize ) {
// cannot add if saving, tell them to try again later
if ( m_isSaving ) { g_errno = ETRYAGAIN; return -1; }
// nor if not writable
if ( ! m_isWritable ) { g_errno = ETRYAGAIN; return -1; }
// if there's no more available nodes, error out
if ( m_numUsedNodes >= m_numNodes) { g_errno = ENOMEM; return -1; }
// we need to be saved now
m_needsSave = true;
// sanity check - no empty positive keys for doledb
if ( m_rdbId == RDB_DOLEDB && dataSize == 0 && (key[0]&0x01) == 0x01){
char *xx=NULL;*xx=0; }
// for posdb
if ( m_ks == 18 &&(key[0] & 0x06) ) {char *xx=NULL;*xx=0; }
// sanity check, break if 0 > titleRec > 100MB, it's probably corrupt
//if ( m_dbname && m_dbname[0]=='t' && dataSize >= 4 &&
// (*((long *)data) > 100000000 || *((long *)data) < 0 ) ) {
// char *xx = NULL; *xx = 0; }
// sanity check (MDW)
//if ( dataSize == 0 && (*key & 0x01) && m_dbname[0] != 'c' &&
// (*key & 0x02) ) {
// char *xx = NULL; *xx = 0; }
// commented out because is90PercentFull checks m_memOccupied and
// we can breech m_memAlloced w/o breeching 90% of m_memOccupied
// if ( m_memAlloced + dataSize > m_maxMem) {
// . if no more mem, error out
// . we now use RdbMem class so this isn't necessary
//if ( m_memOccupied + dataSize > m_maxMem){g_errno=ENOMEM; return -1;}
// set up vars
long iparent ;
long rightGuy;
// this is -1 iff there are no nodes used in the tree
long i = m_headNode;
// disable mem protection
char undo ;
if ( m_useProtection ) {
if ( m_isProtected ) undo = 1;
else undo = 0;
unprotect ( );
}
// . find the parent of node i and call it "iparent"
// . if a node exists with our key then replace it
while ( i != -1 ) {
iparent = i;
if ( collnum < m_collnums[i] ) { i = m_left [i]; continue;}
if ( collnum > m_collnums[i] ) { i = m_right[i]; continue;}
//if ( key < m_keys[i] ) i = m_left [i];
//else if ( key > m_keys[i] ) i = m_right[i];
if ( KEYCMP(key,0,m_keys,i,m_ks)<0) i = m_left [i];
else if ( KEYCMP(key,0,m_keys,i,m_ks)>0) i = m_right[i];
else {
if ( ! m_allowDups ) goto replaceIt;
// otherwise, always go right on equal
i = m_right[i];
}
}
// . this overhead is key/left/right/parent
// . we inc it by the data and sizes array if we need to below
m_memOccupied += m_overhead;
// point i to the next available node
i = m_nextNode;
// debug msg
//if ( m_dbname && m_dbname[0]=='t' && dataSize >= 4 )
// logf(LOG_DEBUG,
// "adding node #%li with data ptr at %lx "
// "and data size of %li into a list.",
// i,data,dataSize);
// if we're the first node we become the head node and our parent is -1
if ( m_numUsedNodes == 0 ) {
m_headNode = i;
iparent = -1;
// ensure these are right
m_numNegativeKeys = 0;
m_numPositiveKeys = 0;
// we only use these stats for Rdb::m_trees for a
// PER COLLECTION count, since there can be multiple
// collections using the same Rdb::m_tree!
// crap, when fixing a tree this will segfault because
// m_recs[collnum] is NULL.
if ( m_rdbId >= 0 && g_collectiondb.m_recs[collnum] ) {
//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
g_collectiondb.m_recs[collnum]->
m_numNegKeysInTree[(unsigned char)m_rdbId] =0;
g_collectiondb.m_recs[collnum]->
m_numPosKeysInTree[(unsigned char)m_rdbId] =0;
}
2013-08-03 00:12:24 +04:00
}
// stick ourselves in the next available node, "m_nextNode"
//m_keys [ i ] = key;
KEYSET ( &m_keys[i*m_ks] , key , m_ks );
m_parents [ i ] = iparent;
// save collection number now, too
m_collnums [ i ] = collnum;
// add the key
// set the data and size only if we need to
if ( m_fixedDataSize != 0 ) {
m_data [ i ] = data;
// ack used and occupied mem
if ( m_fixedDataSize >= 0 ) {
m_memAlloced += m_fixedDataSize;
m_memOccupied += m_fixedDataSize;
}
else {
m_memAlloced += dataSize ;
m_memOccupied += dataSize ;
}
// we may have a variable size of data as well
if ( m_fixedDataSize == -1 ) m_sizes [ i ] = dataSize;
}
// make our parent, if any, point to us
if ( iparent >= 0 ) {
if ( collnum < m_collnums[iparent] )
m_left [iparent] = i;
else if (collnum==m_collnums[iparent]&&//key<m_keys[iparent] )
KEYCMP(key,0,m_keys,iparent,m_ks)<0 )
m_left [iparent] = i;
else
m_right[iparent] = i;
}
// . the right kid of an empty node is used as a linked list of
// empty nodes formed by deleting nodes
// . we keep the linked list so we can re-used these vacated nodes
rightGuy = m_right [ i ];
// our kids are -1 (none)
m_left [ i ] = -1;
m_right [ i ] = -1;
// . if we weren't recycling a node then advance to next
// . m_minUnusedNode is the lowest node number that was never filled
// at any one time in the past
// . you might call it the brand new housing district
if ( m_nextNode == m_minUnusedNode ) {m_nextNode++; m_minUnusedNode++;}
// . otherwise, we're in a linked list of vacated used houses
// . we have a linked list in the right kid
// . make sure the new head doesn't have a left
else {
// point m_nextNode to the next available used house, if any
if ( rightGuy >= 0 ) m_nextNode = rightGuy;
// otherwise point it to the next brand new house (TODO:REMOVE)
// this is an error, try to fix the tree
else {
log("db: Encountered corruption in tree while "
"trying to add a record. You should "
"replace your memory sticks.");
if ( ! fixTree ( ) ) {
char *p = NULL;
*p = 1;
}
//sleep(50000); // m_nextNode=m_minUnusedNode;
}
}
// we have one more used node
m_numUsedNodes++;
// update sign counts
if ( KEYNEG(key) ) {
m_numNegativeKeys++;
//m_numNegKeysPerColl[collnum]++;
// we only use these stats for Rdb::m_trees for a
// PER COLLECTION count, since there can be multiple
// collections using the same Rdb::m_tree!
// crap, when fixing a tree this will segfault because
// m_recs[collnum] is NULL.
if ( m_rdbId >= 0 && g_collectiondb.m_recs[collnum] ) {
//if( ((unsigned char)m_rdbId)>=RDB_END){
//char *xx=NULL;*xx=0; }
CollectionRec *cr ;
cr = g_collectiondb.m_recs[collnum];
if(cr)cr->m_numNegKeysInTree[(unsigned char)m_rdbId]++;
}
2013-08-03 00:12:24 +04:00
}
else {
m_numPositiveKeys++;
//m_numPosKeysPerColl[collnum]++;
// crap, when fixing a tree this will segfault because
// m_recs[collnum] is NULL.
if ( m_rdbId >= 0 && g_collectiondb.m_recs[collnum] ) {
//if( ((unsigned char)m_rdbId)>=RDB_END){
//char *xx=NULL;*xx=0; }
CollectionRec *cr ;
cr = g_collectiondb.m_recs[collnum];
if(cr)cr->m_numPosKeysInTree[(unsigned char)m_rdbId]++;
}
2013-08-03 00:12:24 +04:00
}
// debug2 msg
// fprintf(stderr,"+ #%li %lli %li\n",i,key.n0,iparent);
// if we don't have to balance return i now
if ( m_doBalancing ) {
// our depth is now 1 since we're a leaf node
// (we include ourself)
m_depth [ i ] = 1;
// . reset depths starting at i's parent and ascending the tree
// . will balance if child depths differ by 2 or more
setDepths ( iparent );
}
// re-enable mem protection
if ( m_useProtection && undo ) protect ( );
// return the node number of the node we occupied
return i;
// come here to replace node i with the new data/dataSize
replaceIt:
// debug msg
//fprintf(stderr,"replaced it!\n");
// if we don't support any data then we're done
if ( m_fixedDataSize == 0 ) {
if ( m_useProtection && undo ) protect();
return i;
}
// get dataSize
long oldDataSize = m_fixedDataSize;
// if datasize was 0 cuz it was a negative key, fix that for
// calculating m_memOccupied
if ( m_fixedDataSize >= 0 ) dataSize = m_fixedDataSize;
if ( m_fixedDataSize < 0 ) oldDataSize = m_sizes[i];
// free i's data
if ( m_data[i] && m_ownData )
mfree ( m_data[i] , oldDataSize ,m_allocName);
// decrease mem occupied and increase by new size
m_memOccupied -= oldDataSize;
m_memOccupied += dataSize;
m_memAlloced -= oldDataSize;
m_memAlloced += dataSize;
// otherwise set the data
m_data [ i ] = data;
// set the size if we need to as well
if ( m_fixedDataSize < 0 ) m_sizes [ i ] = dataSize;
// re-enable mem protection if we should
if ( m_useProtection && undo ) protect();
return i;
}
//long RdbTree::deleteNode ( collnum_t collnum , key_t &key , bool freeData ){
long RdbTree::deleteNode ( collnum_t collnum , char *key , bool freeData ) {
long node = getNode ( collnum , key );
// debug
//log("db: deleting n1=%llx n0=%llx node=%li.",
// *(long long *)(key+8), *(long long *)(key+0),node);
if ( node == -1 ) return -1;
deleteNode(node,freeData);
return node;
}
// delete all nodes with keys in [startKey,endKey]
void RdbTree::deleteNodes ( collnum_t collnum ,
//key_t startKey , key_t endKey , bool freeData ) {
char *startKey , char *endKey , bool freeData ) {
// protect it all from writes again
if ( m_useProtection ) unprotect ( );
long node = getNextNode ( collnum , startKey );
while ( node >= 0 ) {
//long next = getNextNode ( node );
if ( m_collnums[node] != collnum ) break;
//if ( m_keys [node] > endKey ) return;
if ( KEYCMP(m_keys,node,endKey,0,m_ks) > 0 ) break;
deleteNode ( node , freeData );
// rotation in setDepths() will cause him to be replaced
// with one of his kids, unless he's a leaf node
//node = next;
node = getNextNode ( collnum , startKey );
}
// protect it all from writes again
if ( m_useProtection ) protect ( );
}
// . deletes node i from the tree
// . i's parent should point to i's left or right kid
// . if i has no parent then his left or right kid becomes the new top node
void RdbTree::deleteNode ( long i , bool freeData ) {
// sanity check
if ( ! m_isWritable ) {
log("db: Can not delete record from tree because "
"not writable.");
char *xx = NULL; *xx = 0;
}
// must be saved from interrupts lest i be changed
//if(g_intOff <= 0 && g_globalNiceness == 0 ) { char *xx=NULL;*xx=0; }
// no deleting if we're saving
if ( m_isSaving ) log("db: Can not delete record from tree because "
"saving tree to disk now.");
// watch out for double deletes
if ( m_parents[i] == -2 ) {
log(LOG_LOGIC,"db: Caught double delete.");
return;
}
// we need to be saved now
m_needsSave = true;
// debug step -- check chain from iparent down making sure that
// just debug2 after every 10 deletes for speed
//static long ttt = 0;
//if ( ttt++ == 100 ) { printTree(); ttt = 0; }
// we have one less occupied node
m_memOccupied -= m_overhead;
// . free it now iff "freeIt" is true (default is true)
// . m_data can be a NULL array if m_fixedDataSize is fixed to 0
if ( /*freeData &&*/ m_data ) {
long dataSize = m_fixedDataSize;
if ( dataSize == -1 ) dataSize = m_sizes[i];
if ( m_ownData ) mfree ( m_data [i] , dataSize ,m_allocName);
m_memAlloced -= dataSize;
m_memOccupied -= dataSize;
}
// protect it all from writes again
char undo ;
if ( m_useProtection ) {
if ( m_isProtected ) undo = 1;
else undo = 0;
unprotect ( );
}
//fprintf(stderr,"headNode=%i,numUsed=%i, before deleting node #%i\n",
//m_headNode,m_numUsedNodes,i);
//printTree();
// parent of i
long iparent ;
long jparent ;
// j will be the node that replace node #i
long j = i;
// . now find a node to replace node #i
// . get a node whose key is just to the right or left of i's key
// . get i's right kid
// . then get that kid's LEFT MOST leaf-node descendant
// . this little routine is stolen from getNextNode(i)
// . try to pick a kid from the right the same % of time as from left
if ( ( m_pickRight && m_right[j] >= 0 ) ||
( m_left[j] < 0 && m_right[j] >= 0 ) ) {
// try to pick a left kid next time
m_pickRight = 0;
// go to the right kid
j = m_right [ j ];
// now go left as much as we can
while ( m_left [ j ] >= 0 ) j = m_left [ j ];
// use node j (it's a leaf or has a right kid)
goto gotReplacement;
}
// . now get the previous node if i has no right kid
// . this little routine is stolen from getPrevNode(i)
if ( m_left[j] >= 0 ) {
// try to pick a right kid next time
m_pickRight = 1;
// go to the left kid
j = m_left [ j ];
// now go right as much as we can
while ( m_right [ j ] >= 0 ) j = m_right [ j ];
// use node j (it's a leaf or has a left kid)
goto gotReplacement;
}
// . come here if i did not have any kids (i's a leaf node)
// . get i's parent
iparent = m_parents[i];
// make i's parent, if any, disown him
if ( iparent >= 0 ) {
if ( m_left[iparent] == i ) m_left [iparent] = -1;
else m_right[iparent] = -1;
}
// node i now goes to the top of the list of vacated, available homes
m_right[i] = m_nextNode;
// m_nextNode now points to i
m_nextNode = i;
// his parent is -2 (god) cuz he's dead and available
m_parents[i] = -2;
// . if we were the head node then, since we didn't have any kids,
// the tree must be empty
// . one less node in the tree
m_numUsedNodes--;
// update sign counts
if ( KEYNEG(m_keys,i,m_ks) ) {
m_numNegativeKeys--;
//m_numNegKeysPerColl[m_collnums[i]]--;
if ( m_rdbId >= 0 ) {
CollectionRec *cr;
cr = g_collectiondb.m_recs[m_collnums[i]];
if(cr)cr->m_numNegKeysInTree[(unsigned char)m_rdbId]--;
}
2013-08-03 00:12:24 +04:00
}
else {
m_numPositiveKeys--;
//m_numPosKeysPerColl[m_collnums[i]]--;
if ( m_rdbId >= 0 ) {
CollectionRec *cr;
cr = g_collectiondb.m_recs[m_collnums[i]];
if(cr)cr->m_numPosKeysInTree[(unsigned char)m_rdbId]--;
}
2013-08-03 00:12:24 +04:00
}
// debug step -- check chain from iparent down making sure that
//printTree();
// debug2 msg
//fprintf(stderr,"- #%li %lli %li\n",i,m_keys[i].n0,iparent);
// . reset the depths starting at iparent and going up until unchanged
// . will balance at pivot nodes that need it
if ( m_doBalancing ) setDepths ( iparent );
// protect it all from writes again
if ( m_useProtection && undo ) protect ( );
// return if there are still people
if ( m_numUsedNodes > 0 ) return;
// otherwise tree must be empty
m_headNode = -1;
// this will nullify our linked list of vacated, used homes
m_nextNode = 0;
m_minUnusedNode = 0;
// ensure these are right
m_numNegativeKeys = 0;
m_numPositiveKeys = 0;
//m_numNegKeysPerColl[m_collnums[i]] = 0;
//m_numPosKeysPerColl[m_collnums[i]] = 0;
if ( m_rdbId >= 0 ) {
//if ( ((unsigned char)m_rdbId)>=RDB_END){
//char *xx=NULL;*xx=0; }
CollectionRec *cr ;
cr = g_collectiondb.m_recs[m_collnums[i]];
if(cr){
cr->m_numNegKeysInTree[(unsigned char)m_rdbId] = 0;
cr->m_numPosKeysInTree[(unsigned char)m_rdbId] = 0;
}
}
2013-08-03 00:12:24 +04:00
return;
// . now replace node #i with node #j
// . i should not equal j at this point
gotReplacement:
// . j's parent should take j's one kid
// . that child should likewise point to j's parent
// . j should only have <= 1 kid now because of our algorithm above
// . if j's parent is i then j keeps his kid
jparent = m_parents[j];
if ( jparent != i ) {
// parent: if j is my left kid, then i take j's right kid
// otherwise, if j is my right kid, then i take j's left kid
if ( m_left [ jparent ] == j ) {
m_left [ jparent ] = m_right [ j ];
if (m_right[j]>=0) m_parents [ m_right[j] ] = jparent;
}
else {
m_right [ jparent ] = m_left [ j ];
if (m_left [j]>=0) m_parents [ m_left[j] ] = jparent;
}
}
// . j inherits i's children (providing i's child is not j)
// . those children's parent should likewise point to j
if ( m_left [i] != j ) {
m_left [j] = m_left [i];
if ( m_left[j] >= 0 ) m_parents[m_left [j]] = j;
}
if ( m_right[i] != j ) {
m_right[j] = m_right[i];
if ( m_right[j] >= 0 ) m_parents[m_right[j]] = j;
}
// j becomes the kid of i's parent, if any
iparent = m_parents[i];
if ( iparent >= 0 ) {
if ( m_left[iparent] == i ) m_left [iparent] = j;
else m_right[iparent] = j;
}
// iparent may be -1
m_parents[j] = iparent;
// if i was the head node now j becomes the head node
if ( m_headNode == i ) m_headNode = j;
// . i joins the linked list of available used homes
// . put it at the head of the list
// . "m_nextNode" is the head node of the linked list
m_right[i] = m_nextNode;
m_nextNode = i;
// . i's parent should be -2 so we know it's unused in case we're
// stepping through the nodes linearly for dumping in RdbDump
// . used in getListUnordered()
m_parents[i] = -2;
// we have one less used node
m_numUsedNodes--;
// update sign counts
if ( KEYNEG(m_keys,i,m_ks) ) {
m_numNegativeKeys--;
//m_numNegKeysPerColl[m_collnums[i]]--;
if ( m_rdbId >= 0 ) {
//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
CollectionRec *cr ;
cr = g_collectiondb.m_recs[m_collnums[i]];
if(cr)cr->m_numNegKeysInTree[(unsigned char)m_rdbId]--;
}
2013-08-03 00:12:24 +04:00
}
else {
m_numPositiveKeys--;
//m_numPosKeysPerColl[m_collnums[i]]--;
if ( m_rdbId >= 0 ) {
//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
CollectionRec *cr ;
cr = g_collectiondb.m_recs[m_collnums[i]];
if(cr)cr->m_numPosKeysInTree[(unsigned char)m_rdbId]--;
}
2013-08-03 00:12:24 +04:00
}
// debug step -- check chain from iparent down making sure that
// all kids don't have -2 for their parent... seems to be a rare bug
//printTree();
// debug msg
//fprintf(stderr,"- #%li %lli %li\n",i,m_keys[i].n0,iparent);
// return if we don't have to balance
if ( ! m_doBalancing ) {
// protect it all from writes again
if ( m_useProtection && undo ) protect ( );
return;
}
// our depth becomes that of the node we replaced, unless moving j
// up to i decreases the total depth, in which case setDepths() fixes
m_depth [ j ] = m_depth [ i ];
// debug msg
//fprintf(stderr,"... replaced %li it with %li (-1 means none)\n",i,j);
// . recalculate depths starting at old parent of j
// . stops at the first node to have the correct depth
// . will balance at pivot nodes that need it
if ( jparent != i ) setDepths ( jparent );
else setDepths ( j );
// TODO: register growTree with g_mem to free on demand
// do a grow/shrink test and shrink if we need to
// return growTable ( );
// done:
// protect it all from writes again
if ( m_useProtection && undo ) protect ( );
}
bool RdbTree::deleteKeys ( collnum_t collnum , char *keys , long numKeys ) {
// make a fake list
RdbList list;
long size = m_ks * numKeys;
list.set ( keys ,
size ,
keys ,
size ,
keys ,
keys ,
0 , // fixedDataSize
false ,
false ,
m_ks );
return deleteList ( collnum , &list , true );
}
// . TODO: speed up since keys are usually ordered (use getNextNode())
// . returns false if a key in list was not found
bool RdbTree::deleteList ( collnum_t collnum ,
RdbList *list , bool doBalancing ) {
// sanity check
if ( list->m_ks != m_ks ) { char *xx = NULL; *xx = 0; }
// return if no non-empty nodes in the tree
if ( m_numUsedNodes <= 0 ) return true;
// reset before calling list->getCurrent*() functions
list->resetListPtr();
//key_t key;
char key[MAX_KEY_BYTES];
// bail if list is empty now
if ( list->isEmpty() ) return true;
// a key not found?
bool allgood = true;
// preserve state of balance
bool balanced = m_doBalancing;
// possibly turn off balancing (only turn on/off if it's already on)
if ( m_doBalancing ) m_doBalancing = doBalancing;
// disable mem protection
if ( m_useProtection ) unprotect ( );
//long dataSize;
top:
//key = list->getCurrentKey ( );
list->getCurrentKey ( key );
//dataSize = list->getCurrentDataSize ( );
if ( deleteNode ( collnum , key , true /*freeData?*/) < 0 ) {
//log("RdbTree::deleteList: key not found");
allgood = false;
}
// debug
//log("db: delete %s",KEYSTR(key,m_ks));
if ( list->skipCurrentRecord() ) goto top;
// possibly restore balancing
m_doBalancing = balanced;
// enable protection again
if ( m_useProtection ) protect ( );
// return false if a key was not found
return allgood;
}
// TODO: speed up since keys are usually ordered (use getNextNode())
void RdbTree::deleteOrderedList ( collnum_t collnum ,
RdbList *list , bool doBalancing ) {
// return if no non-empty nodes in the tree
if ( m_numUsedNodes <= 0 ) return ;
// reset before calling list->getCurrent*() functions
list->resetListPtr();
//key_t key;
char key [ MAX_KEY_BYTES ];
// bail if list is empty now
if ( list->isEmpty() ) return;
//long dataSize;
//key = list->getCurrentKey ( );
list->getCurrentKey ( key );
// get the node whose keys is just <= key
long node = getPrevNode ( collnum , key );
// preserve state of balance
bool balanced = m_doBalancing;
// possibly turn off balancing (only turn on/off if it's already on)
if ( m_doBalancing ) m_doBalancing = doBalancing;
// disable mem protection
if ( m_useProtection ) unprotect ( );
top:
// bail if no nodes in tree left that have keys >= "key"
if ( node == -1 ) goto done;
top2:
// . if key of node equals key, remove node and advance key and node
// . this condition is usually the case, so check it first for speed
//if ( m_keys [ node ] == key && m_collnums [ node ] == collnum ) {
if ( KEYCMP(m_keys,node,key,0,m_ks)==0 && m_collnums[node] == collnum){
// trim the node from the tree
deleteNode ( node , true /*freeData?*/ );
// get next node in tree
node = getNextNode ( node ) ;
// . point to next key in list to delete
// . return if list exhausted
if ( ! list->skipCurrentRecord() ) goto done;
// reference that key
//key = list->getCurrentKey() ;
list->getCurrentKey ( key );
goto top;
}
// bust out if done
if ( m_collnums [ node ] > collnum ) goto done;
// if node's key is < "key" advance node
//if ( m_keys [ node ] < key ) {
if ( KEYCMP(m_keys,node,key,0,m_ks)<0 ) {
// get next node in tree
node = getNextNode ( node ) ;
goto top;
}
// . otherwise, we passed "key" so "key" must have not been in tree
// . point to next key in list to delete
// . return if list exhausted
if ( ! list->skipCurrentRecord() ) goto done;
// reference that key
//key = list->getCurrentKey() ;
list->getCurrentKey ( key ) ;
goto top2;
done:
// possibly restore balancing
m_doBalancing = balanced;
// re-enable mem protection
if ( m_useProtection ) protect ( );
}
// . this fixes the tree
// returns false if could not fix tree and sets g_errno, otherwise true
bool RdbTree::fixTree ( ) {
// on error, fix the linked list
//log("RdbTree::fixTree: tree was corrupted on disk?");
log("db: Trying to fix tree.");
log("db: %li occupied nodes and %li empty "
"of top %li nodes.",
m_numUsedNodes , m_minUnusedNode - m_numUsedNodes ,
m_minUnusedNode );
// loop through our nodes
long n = m_minUnusedNode;
long count = 0;
// "clear" the tree as far as addNode() is concerned
m_headNode = -1;
m_numUsedNodes = 0;
m_memAlloced = 0;
m_memOccupied = 0;
m_nextNode = 0;
m_minUnusedNode = 0;
//CollectionRec *recs = g_collectiondb.m_recs;
long max = g_collectiondb.m_numRecs;
log("db: Valid collection numbers range from 0 to %li.",max);
// now re-add the old nods to the tree, they should not be overwritten
// by addNode()
for ( long i = 0 ; i < n ; i++ ) {
// speed update
if ( (i % 100000) == 0 )
log("db: Fixing node #%li of %li.",i,n);
// skip if empty
if ( m_parents[i] <= -2 ) continue;
collnum_t cn = m_collnums[i];
// verify collnum
if ( cn < 0 ) continue;
if ( cn >= max ) continue;
// now add just to set m_right/m_left/m_parent
if ( m_fixedDataSize == 0 )
addNode(cn,&m_keys[i*m_ks], NULL, 0 );
else if ( m_fixedDataSize == -1 )
addNode(cn,&m_keys[i*m_ks],m_data[i],m_sizes[i] );
else
addNode(cn,&m_keys[i*m_ks],m_data[i],
m_fixedDataSize);
// count em
count++;
}
log("db: Fix tree removed %li nodes.",n - count);
// esure it is still good
if ( ! checkTree ( false , true ) )
return log("db: Fix tree failed.");
log("db: Fix tree succeeded.");
return true;
}
// returns false if tree had problem, true otherwise
bool RdbTree::checkTree ( bool printMsgs , bool doChainTest ) {
// no writing to tree while we are checking, since we
// do quickpolls, just make sure
bool saved = m_isWritable;
m_isWritable = false;
// check it
bool status = checkTree2 ( printMsgs , doChainTest );
// put back
m_isWritable = saved;
return status;
}
bool RdbTree::checkTree2 ( bool printMsgs , bool doChainTest ) {
long hkp = 0;
char useHalfKeys = false;
if ( !strcmp(m_dbname,"indexdb") ) useHalfKeys = true;
if ( !strcmp(m_dbname,"datedb" ) ) useHalfKeys = true;
if ( !strcmp(m_dbname,"tfndb" ) ) useHalfKeys = true;
if ( !strcmp(m_dbname,"linkdb" ) ) useHalfKeys = true;
// now check parent kid correlations
for ( long i = 0 ; i < m_minUnusedNode ; i++ ) {
// this thing blocks for 1.5 secs for indexdb
// so do some quick polls!
QUICKPOLL(MAX_NICENESS);
// skip node if parents is -2 (unoccupied)
if ( m_parents[i] == -2 ) continue;
// all half key bits must be off in here
if ( useHalfKeys && (m_keys[i*m_ks] & 0x02) ) {
hkp++;
// turn it off
m_keys[i*m_ks] &= 0xfd;
}
// for posdb
if ( m_ks == 18 &&(m_keys[i*m_ks] & 0x06) ) {
char *xx=NULL;*xx=0; }
// if no left/right kid it MUST be -1
if ( m_left[i] < -1 )
return log(
"db: Tree left kid < -1.");
if ( m_left[i] >= m_numNodes )
return log(
"db: Tree left kid is %li >= %li.",
m_left[i],m_numNodes);
if ( m_right[i] < -1 )
return log(
"db: Tree right kid < -1.");
if ( m_right[i] >= m_numNodes )
return log(
"db: Tree left kid is %li >= %li.",
m_right[i],m_numNodes);
// check left kid
if ( m_left[i] >= 0 && m_parents[m_left[i]] != i )
return log(
"db: Tree left kid and parent disagree.");
// then right kid
if ( m_right[i] >= 0 && m_parents[m_right[i]] != i )
return log(
"db: Tree right kid and parent disagree.");
/*
// check order
if ( m_left[i] >= 0 ) {
char *key = &m_keys[i*m_ks];
char *left = &m_keys[m_left[i]*m_ks];
if ( KEYCMP(key,left,m_ks)<0) {char *xx=NULL;*xx=0;}
}
if ( m_right[i] >= 0 ) {
char *key = &m_keys[i*m_ks];
char *right = &m_keys[m_right[i]*m_ks];
if ( KEYCMP(key,right,m_ks)>0) {char *xx=NULL;*xx=0;}
}
*/
//g_loop.quickPoll(1, __PRETTY_FUNCTION__, __LINE__);
}
if ( hkp > 0 )
return log("db: Had %li half key bits on for %s.",hkp,m_dbname);
// now return if we aren't doing active balancing
if ( ! m_depth ) return true;
// debug -- just always return now
if ( printMsgs )logf(LOG_DEBUG,"***m_headNode=%li, m_numUsedNodes=%li",
m_headNode,m_numUsedNodes);
//CollectionRec *recs = g_collectiondb.m_recs;
long max = g_collectiondb.m_numRecs;
// verify that parent links correspond to kids
for ( long i = 0 ; i < m_minUnusedNode ; i++ ) {
// this thing blocks for 1.5 secs for indexdb
// so do some quick polls!
QUICKPOLL(MAX_NICENESS);
// verify collnum
collnum_t cn = m_collnums[i];
if ( cn < 0 )
return log("db: Got bad collnum in tree, %i.",cn);
if ( cn > max )
return log("db: Got too big collnum in tree. %i.",cn);
// we do not want to delete these nodes from the tree yet
// in case the collection was accidentally removed.
//if ( ! recs[cn] )
// return log("db: Got bad collnum tree. %li.",cn);
long P = m_parents [i];
if ( P == -2 ) continue; // deleted node
if ( P == -1 && i != m_headNode )
return log("db: Tree node %li has "
"no parent.",i);
// check kids
if ( P>=0 && m_left[P] != i && m_right[P] != i )
return log("db: Tree kids of node # %li "
"disowned him.",i);
//g_loop.quickPoll(1, __PRETTY_FUNCTION__, __LINE__);
// speedy tests continue
if ( ! doChainTest ) continue;
// ensure i goes back to head node
long j = i;
while ( j >= 0 ) {
if ( j == m_headNode ) break;
j = m_parents[j];
}
if ( j != m_headNode )
return log(
"db: Node # %li does not lead back to "
"head node.",i);
if ( printMsgs ) {
char *k = &m_keys[i*m_ks];
logf(LOG_DEBUG,"***node=%li left=%li rght=%li "
"prnt=%li, depth=%li c=%li key=%s",
i,m_left[i],m_right[i],m_parents[i],
(long)m_depth[i],(long)m_collnums[i],
KEYSTR(k,m_ks));
// assume linkdb
//key192_t *kp = (key192_t *)k;
//unsigned char hc = g_linkdb.getLinkerHopCount_uk(kp);
//if ( hc ) { char *xx=NULL;*xx=0; }
}
//ensure depth
long newDepth = computeDepth ( i );
if ( m_depth[i] != newDepth )
return log("db: Tree node # %li's depth "
"should be %li.",i,newDepth);
}
if ( printMsgs ) logf(LOG_DEBUG,"---------------");
// no problems found
return true;
}
// . grow tree to "n" nodes
// . this will now actually grow from a current size to a new one
bool RdbTree::growTree ( long nn , long niceness ) {
2013-08-03 00:12:24 +04:00
// if we're that size, bail
if ( m_numNodes == nn ) return true;
// old number of nodes
long on = m_numNodes;
// some quick type info
long k = m_ks;
long d = sizeof(char *);
//key_t *kp = NULL;
char *kp = NULL;
long *lp = NULL;
long *rp = NULL;
long *pp = NULL;
char **dp = NULL;
long *sp = NULL;
char *tp = NULL;
collnum_t *cp = NULL;
// unprotect it all
if ( m_useProtection ) unprotect ( );
// do the reallocs
long cs = sizeof(collnum_t);
cp =(collnum_t *)mrealloc (m_collnums, on*cs,nn*cs,m_allocName);
if ( ! cp ) goto error;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
kp = (char *) mrealloc ( m_keys , on*k , nn*k , m_allocName );
if ( ! kp ) goto error;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
lp = (long *) mrealloc ( m_left , on*4 , nn*4 , m_allocName );
if ( ! lp ) goto error;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
rp = (long *) mrealloc ( m_right , on*4 , nn*4 , m_allocName );
if ( ! rp ) goto error;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
pp = (long *) mrealloc ( m_parents , on*4 , nn*4 , m_allocName );
if ( ! pp ) goto error;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
// deal with data, sizes and depth arrays on a basis of need
if ( m_fixedDataSize != 0 ) {
dp =(char **)mrealloc (m_data , on*d,nn*d,m_allocName);
if ( ! dp ) goto error;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
}
if ( m_fixedDataSize == -1 ) {
sp =(long *)mrealloc (m_sizes , on*4,nn*4,m_allocName);
if ( ! sp ) goto error;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
}
if ( m_doBalancing ) {
tp =(char *)mrealloc (m_depth , on ,nn ,m_allocName);
if ( ! tp ) goto error;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
}
// re-assign
m_collnums= cp;
m_keys = kp;
m_left = lp;
m_right = rp;
m_parents = pp;
m_data = dp;
m_sizes = sp;
m_depth = tp;
// adjust memory usage
m_memAlloced -= m_overhead * on;
m_memAlloced += m_overhead * nn;
// bitch an exit if too much
if ( m_memAlloced > m_maxMem )
return log("db: Trying to grow tree for %s to %li, but max is "
"%li. Consider changing gb.conf.",
m_dbname,m_memAlloced , m_maxMem );
// base mem is mem that cannot be freed
m_baseMem = m_overhead * nn ;
// and the new # of nodes we have
m_numNodes = nn;
// protect it from writes
if ( m_useProtection ) protect ( );
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
return true;
error:
char *kk ;
long *x ;
char *s ;
char **p ;
collnum_t *ss;
// . realloc back down if we need to
// . downsizing should NEVER fail!
if ( cp ) {
ss = (collnum_t *)mrealloc ( cp , nn*cs , on*cs , m_allocName);
if ( ! ss ) { char *xx = NULL; *xx = 0; }
m_collnums = ss;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
}
if ( kp ) {
kk = (char *)mrealloc ( kp, nn*k, on*k, m_allocName );
if ( ! kk ) { char *xx = NULL; *xx = 0; }
m_keys = kk;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
}
if ( lp ) {
x = (long *)mrealloc ( lp , nn*4 , on*4 , m_allocName );
if ( ! x ) { char *xx = NULL; *xx = 0; }
m_left = x;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
}
if ( rp ) {
x = (long *)mrealloc ( rp , nn*4 , on*4 , m_allocName );
if ( ! x ) { char *xx = NULL; *xx = 0; }
m_right = x;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
}
if ( pp ) {
x = (long *)mrealloc ( pp , nn*4 , on*4 , m_allocName );
if ( ! x ) { char *xx = NULL; *xx = 0; }
m_parents = x;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
}
if ( dp && m_fixedDataSize != 0 ) {
p = (char **)mrealloc ( dp , nn*d , on*d , m_allocName );
if ( ! p ) { char *xx = NULL; *xx = 0; }
m_data = p;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
}
if ( sp && m_fixedDataSize == -1 ) {
x = (long *)mrealloc ( sp , nn*4 , on*4 , m_allocName );
if ( ! x ) { char *xx = NULL; *xx = 0; }
m_sizes = x;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
}
if ( tp && m_doBalancing ) {
s = (char *)mrealloc ( tp , nn , on , m_allocName );
if ( ! s ) { char *xx = NULL; *xx = 0; }
m_depth = s;
QUICKPOLL(niceness);
2013-08-03 00:12:24 +04:00
}
return log("db: Failed to grow tree for %s from %li to %li bytes: %s.",
m_dbname,on,nn,mstrerror(g_errno));
}
void RdbTree::protect ( int prot ) {
// old number of nodes
long on = m_numNodes;
gbmprotect ( m_collnums , on*sizeof(collnum_t) , prot );
gbmprotect ( m_keys , on*m_ks, prot );
gbmprotect ( m_left , on*4 , prot );
gbmprotect ( m_right , on*4 , prot );
gbmprotect ( m_parents , on*4 , prot );
if ( m_data ) gbmprotect ( m_data , on*sizeof(char *) , prot );
if ( m_sizes ) gbmprotect ( m_sizes , on*4 , prot );
if ( m_depth ) gbmprotect ( m_depth , on , prot );
}
void RdbTree::gbmprotect ( void *p , long size , int prot ) {
if ( ! p || size <= 0 ) return;
// align on page
//p = (p + PAGESIZE) & (PAGESIZE-1);
char *np = ((char *)p + (8*1024));
// mask out lower bits
np = (char *)((unsigned long)np & ~((8*1024)-1));
size -= (np-(char *)p);
if ( size <= 0 ) return;
// align size, too
long nsize = size & (~(8*1024-1));
if ( nsize <= 0 ) return;
if ( mprotect ( np , nsize , prot ) == -1 )
log("db: mprotect (size=%li): %s.",nsize,mstrerror(errno));
//if ( prot == (PROT_READ | PROT_WRITE) )
// log("db: unprotect: 0x%lx size=%li",(long)np,nsize);
//else
// log("db: protect: 0x%lx size=%li",(long)np,nsize);
}
long RdbTree::getMemOccupiedForList2 ( collnum_t collnum ,
char *startKey ,
char *endKey ,
long minRecSizes ,
long niceness ) {
long ne = 0;
long size = 0;
long i = getNextNode ( collnum , startKey ) ;
while ( i >= 0 ) {
// breathe now... crap what if niceness 0 add to this tree?
// can that happen?
QUICKPOLL(niceness);
// break out if we should
//if ( m_keys [i] > endKey ) break;
if ( KEYCMP(m_keys,i,endKey,0,m_ks) > 0 ) break;
if ( m_collnums[i] != collnum ) break;
if ( size >= minRecSizes ) break;
// num elements
ne++;
// do we got data?
if ( m_data ) {
// is size fixed?
if ( m_fixedDataSize >= 0 ) size += m_fixedDataSize;
else size += m_sizes[i];
}
// add in key overhead
size += m_ks;
// add in dataSize overhead (-1 means variable data size)
if ( m_fixedDataSize < 0 ) size += 4;
// advance
i = getNextNode ( i );
}
// that's it
return size;
}
long RdbTree::getMemOccupiedForList ( ) {
long mem = 0;
if ( m_fixedDataSize >= 0 ) {
mem += m_numUsedNodes * m_ks;
mem += m_numUsedNodes * m_fixedDataSize;
return mem;
}
// get total mem used by occupied nodes
mem = getMemOccupied() ;
// remove left/right/parent for each used node (3 longs)
mem -= m_overhead * m_numUsedNodes;
// but do include the key in the list, even though it's in the overhead
mem += m_ks * m_numUsedNodes;
// but don't include the dataSize in the overhead -- that's in list too
mem -= 4 * m_numUsedNodes;
// . remove m_sizes array if dataSize fixed
// . no! this is included in the list
//if ( m_fixedDataSize == -1 ) mem -= getNumUsedNodes() * 4;
return mem;
}
// . returns false and sets g_errno on error
// . throw all the records in this range into this list
// . probably about 24-50 cycles per key we add
// . if this turns out to be bottleneck we can use hardcore RdbGet later
// . RdbDump should use this
bool RdbTree::getList ( collnum_t collnum ,
char *startKey, char *endKey, long minRecSizes ,
RdbList *list , long *numPosRecs , long *numNegRecs ,
bool useHalfKeys ,
long niceness ) {
// reset the counts of positive and negative recs
long numNeg = 0;
long numPos = 0;
if ( numNegRecs ) *numNegRecs = 0;
if ( numPosRecs ) *numPosRecs = 0;
// set *lastKey in case we have no nodes in the list
//if ( lastKey ) *lastKey = endKey;
// . set the start and end keys of this list
// . set lists's m_ownData member to true
list->reset();
// got set m_ks first so the set ( startKey, endKey ) works!
list->m_ks = m_ks;
list->set ( startKey , endKey );
list->setFixedDataSize ( m_fixedDataSize );
list->setUseHalfKeys ( useHalfKeys );
// bitch if list does not own his own data
if ( ! list->getOwnData() ) {
g_errno = EBADENGINEER;
return log(LOG_LOGIC,"db: rdbtree: getList: List does not "
"own data");
}
// bail if minRecSizes is 0
if ( minRecSizes == 0 ) return true;
// return true if no non-empty nodes in the tree
if ( m_numUsedNodes == 0 ) return true;
// get first node >= startKey
long node = getNextNode ( collnum , startKey );
if ( node < 0 ) return true;
// if it's already beyond endKey, give up
//if ( m_keys [ node ] > endKey ) return true;
if ( KEYCMP ( m_keys,node,endKey,0,m_ks) > 0 ) return true;
// or if we hit a different collection number
if ( m_collnums [ node ] > collnum ) return true;
// save lastNode for setting *lastKey
long lastNode = -1;
// . how much space would whole tree take if we stored it in a list?
// . this includes records that are deletes
// . caller will often say give me 500MB for a fixeddatasize list
// that is heavily constrained by keys...
long growth = getMemOccupiedForList ( );
// do not allocate whole tree's worth of space if we have a fixed
// data size and a finite minRecSizes
if ( m_fixedDataSize >= 0 && minRecSizes >= 0 ) {
// only assign if we require less than minRecSizes of growth
// because some callers set minRecSizes to 500MB!!
long ng = minRecSizes + m_fixedDataSize + m_ks ;
if ( ng < growth && ng > minRecSizes ) growth = ng;
}
// raise to virtual inifinite if not constraining us
if ( minRecSizes < 0 ) minRecSizes = 0x7fffffff;
// . nail it down if titledb because metalincs was getting
// out of memory errors when getting a bunch of titleRecs
// . only do this for titledb/spiderdb lookups since it can be slow
// to go through a million indexdb nodes.
// . this was because minRecSizes was way too big... 16MB i think
// . if they pass us a size-unbounded request for a fixed data size
// list then we should call this as well... as in Msg22.cpp's
// call to msg5::getList for tfndb.
if ( m_fixedDataSize < 0 || minRecSizes >= 256*1024 ) //== 0x7fffffff )
growth = getMemOccupiedForList2 ( collnum, startKey, endKey,
minRecSizes , niceness );
// don't grow more than we need to
//if ( minRecSizes < growth ) {
// growth = minRecSizes;
// // add in a smidgen for exceeding minRecSizes by a bit
// growth += 128;
// // add lots more for titledb/spiderdb/clusterdb
// if ( m_fixedDataSize == -1 ) growth += 10*1024;
//}
// debug msg
//if ( growth > 1000 )
// log (LOG_DEBUG,"db: RdbTree::getList: growth=%li. "
// "minRecSizes=%li db=%s.",growth,minRecSizes,m_dbname);
// grow the list now
if ( ! list->growList ( growth ) )
return log("db: Failed to grow list to %li bytes for storing "
"records from tree: %s.",growth,mstrerror(g_errno));
// similar to above algorithm but we have data along with the keys
long dataSize;
// if a niceness 0 msg4 tries to add to the tree, return ETRYAGAIN
// if it is hitting this quickpoll. increment it as a count in
// case we get quickpolled and call this function as niceness 0!
//
// i think we were getting a list for a doledb dump, and while
// getting that list in Rdb::getList(), a quickpoll was called
// to handle a msg4 addList request that had its niceness converted
// to 0. and it deleted a record from the tree that we had just read
// from the tree and added to the list. so then when RdbDump.cpp
// called deleteList() after dumping that list to disk, one of the
// recs was no longer in the tree! that then caused a core. now we
// don't core, but i think i fixed it here.
m_gettingList++;
// stop when we've hit or jsut exceed minRecSizes
// or we're out of nodes
for ( ; node >= 0 && list->getListSize() < minRecSizes ;
node = getNextNode ( node ) ) {
// breathe when getting big lists for dumping
// hopefully niceness 0 stuff will not add to this tree!
QUICKPOLL(niceness);
// stop before exceeding endKey
//if ( m_keys [ node ] > endKey ) break;
if ( KEYCMP (m_keys,node,endKey,0,m_ks) > 0 ) break;
// or if we hit a different collection number
if ( m_collnums [ node ] != collnum ) break;
// if more recs were added to tree since we initialized the
// list then grow the list to compensate so we do not end up
// reallocating one key at a time.
// add record to our list
if ( m_fixedDataSize == 0 ) {
// node #1518 and #1565 are the key ones
//if ( m_ks == 18 ) {
// log("tree: adding node %li k=%s",node,
// KEYSTR((unsigned char *)&m_keys[node*m_ks],
// m_ks));
//}
if ( ! list->addRecord(&m_keys[node*m_ks],0,NULL)) {
m_gettingList--;
return log("db: Failed to add record "
"to tree list for %s: %s. "
"Fix the growList algo.",
m_dbname,mstrerror(g_errno));
}
}
else {
// get dataSize if not fixed
if ( m_fixedDataSize == -1 ) dataSize = m_sizes[node];
// otherwise, it's fixed
else dataSize = m_fixedDataSize;
// . spiderdb is special
// . RdbDump.cpp "deletes" nodes from the spiderdb
// tree by NULLifying the data but leaving the
// dataSize the way it was.
// . so when it "dedups" a spiderdb rec in the tree
// it just sets it data ptr to NULL
// . MDW: does this still apply? probably not!!!
//if ( ! m_data[node] && dataSize ) continue;
// RdbDump sets m_data[x] to -1 to indicate that a node was deleted
// from the spiderdb tree because it was "deduped" because it was
// a dup or it was already in tfndb.
//if ( m_data[node] == (char *)-1 ) continue;
// point to key
char *key = &m_keys[node*m_ks];
// do not allow negative keys to have data, or
// at least ignore it! let's RdbList::addRecord()
// core dump on us!
if ( (key[0] & 0x01) == 0x00 ) dataSize = 0;
// sanity check, break if 0 > titleRec > 100MB,
// it's probably corrupt
//if (m_dbname && m_dbname[0]=='t' && dataSize >= 4 &&
// (*((long *)m_data[node]) > 100000000 ||
// *((long *)m_data[node]) < 0 ) ) {
// char *xx = NULL; *xx = 0; }
// add the key and data
if ( ! list->addRecord ( key,//&m_keys[node*m_ks] ,
dataSize ,
m_data[node] ) ) {
m_gettingList--;
return log("db: Failed to add record "
"to tree list for %s: %s. "
"Fix the growList algo.",
m_dbname,mstrerror(g_errno));
}
// debug msg for detecting tagdb corruption
/*
if ( m_dbname &&
m_dbname[0]=='t' &&
m_dbname[1] == 'a' &&
dataSize >= 4 ) {
long back = dataSize + m_ks + 4;
char *rec = list->m_list+list->m_listSize-back;
Tag *tag = (Tag *)rec;
logf(LOG_DEBUG,
"tree: "
"getting node #%li with data ptr at %lu "
"and data size of %li into a list.",
node,(long)m_data[node],dataSize);
// detect tagdb corruption
if ( tag->m_bufSize < 0 ||
tag->m_bufSize > 3000 ) {
char *xx=NULL;*xx=0; }
}
*/
2013-08-03 00:12:24 +04:00
}
// count negative and positive recs
//if ( ((m_keys[node].n0) & 0x01) == 0 ) numNeg++;
//else numPos++;
// we are little endian
if ( KEYNEG(m_keys,node,m_ks) ) numNeg++;
else numPos++;
// save lastNode for setting *lastKey
lastNode = node;
// advance to next node
//node = getNextNode ( node );
}
// allow msg4 to add/delete to/from this tree again
m_gettingList--;
// set counts to pass back
if ( numNegRecs ) *numNegRecs = numNeg;
if ( numPosRecs ) *numPosRecs = numPos;
// . we broke out of the loop because either:
// . 1. we surpassed endKey OR
// . 2. we hit or surpassed minRecSizes
// . constrain the endKey of the list to the key of "node" minus 1
// . "node" should be the next node we would have added to this list
// . if "node" is < 0 then we can keep endKey set high the way it is
//if ( node >= 0 ) {
//key_t newEndKey = m_keys[node];
//newEndKey -= (unsigned long) 1 ;
//list->set ( startKey , newEndKey );
//}
// record the last key inserted into the list
if ( lastNode >= 0 )
list->setLastKey ( &m_keys[lastNode*m_ks] );
// reset the list's endKey if we hit the minRecSizes barrier cuz
// there may be more records before endKey than we put in "list"
if ( list->getListSize() >= minRecSizes && lastNode >= 0 ) {
// use the last key we read as the new endKey
//key_t newEndKey = m_keys[lastNode];
char newEndKey[MAX_KEY_BYTES];
KEYSET(newEndKey,&m_keys[lastNode*m_ks],m_ks);
// . if he's negative, boost new endKey by 1 because endKey's
// aren't allowed to be negative
// . we're assured there's no positive counterpart to him
// since Rdb::addRecord() doesn't allow both to exist in
// the tree at the same time
// . if by some chance his positive counterpart is in the
// tree, then it's ok because we'd annihilate him anyway,
// so we might as well ignore him
//if (((newEndKey.n0) & 0x01) == 0x00 )
// newEndKey += (unsigned long)1;
// we are little endian
if ( KEYNEG(newEndKey,0,m_ks) ) KEYADD(newEndKey,1,m_ks);
// if we're using half keys set his half key bit
//if ( useHalfKeys ) newEndKey.n0 |= 0x02;
if ( useHalfKeys ) KEYOR(newEndKey,0x02);
// tell list his new endKey now
list->set ( startKey , newEndKey );
}
// reset list ptr to point to first record
list->resetListPtr();
//if ( m_ks == 24 ) {
// //checkTree ( true , true );
// list->checkList_r(false,true,RDB_LINKDB);//POSDB);
//}
// if list is using less than 90% of it's mem, shrink it
//if ( 100*list->getListSize() > list->getListMaxSize()*90 ) {
// // shrink the list
// list->growList ( list->getListSize() );
// // clear g_errno if there was an error
// g_errno = 0;
//}
// success
return true;
}
// . return false on error (out of memory in list)
// . don't order by keys, order by node #
// . used for saving a tree to disk temporarily so it can be re-loaded
// w/o totally unbalancing the tree
// . "*lastNode" is last node # in the list
// . we set *lastNode to -1 if that's all folks
/*
bool RdbTree::getListUnordered ( long startNode , long minRecSizes ,
RdbList *list , long *nextNode ) {
// assume no nodes from startNode onward in this tree
*nextNode = -1;
// reset the list
list->reset();
list->setFixedDataSize ( m_fixedDataSize );
// return true if no non-empty nodes in the tree
if ( m_numUsedNodes == 0 ) return true;
// . grow list to minRecSizes or size of tree, whichever is smallest
// . how much space would whole tree take if we stored it in a list?
// . this includes records that are deletes
long growth = getMemOccupiedForList ( );
// don't grow more than we need to
if ( minRecSizes < growth ) growth = minRecSizes;
// grow the list now
if ( ! list->growList ( growth ) )
return log("db: Failed to grow list to %li bytes for storing "
"records from tree: %s.",growth,mstrerror(g_errno));
// mdw fixed, this. it was node = 0 so we couldn't dump all of tree!!!
long node = startNode ;
long dataSize;
char *data;
while ( node < m_minUnusedNode ) {
// continue if this node is empty
if ( m_parents [ node ] == -2 ) { node++; continue; }
// get the data/dataSize
if ( m_fixedDataSize == -1 ) dataSize = m_sizes[node];
else dataSize = m_fixedDataSize;
if ( m_fixedDataSize == 0 ) data = NULL;
else data = m_data[node];
// don't exceed the specified buf size
minRecSizes -= (m_ks + dataSize);
if ( m_fixedDataSize == -1 ) minRecSizes -= 4;
if ( minRecSizes < 0 ) break;
// . add to the list
// . return false on error
if ( ! list->addRecord ( m_keys[node], dataSize , data ) )
return log("db: Failed to add record "
"to tree list for %s: %s.",
m_dbname,mstrerror(g_errno));
// goto next node
node++;
}
// . record the next node to be added into the list
// . iff there are more nodes available
// . otherwise, leave it set to -1 so the caller knows that's it
if ( node < m_minUnusedNode ) *nextNode = node;
return true;
}
*/
// . this just estimates the size of the list
// . the more balanced the tree the better the accuracy
// . this now returns total recSizes not # of NODES like it used to
// in [startKey, endKey] in this tree
// . if the count is < 200 it returns an EXACT count
// . right now it only works for dataless nodes (keys only)
long RdbTree::getListSize ( collnum_t collnum ,
//key_t startKey , key_t endKey ,
//key_t *minKey , key_t *maxKey ) {
char *startKey , char *endKey ,
char *minKey , char *maxKey ) {
// make these as benign as possible
//if ( minKey ) *minKey = endKey;
//if ( maxKey ) *maxKey = startKey;
if ( minKey ) KEYSET ( minKey , endKey , m_ks );
if ( maxKey ) KEYSET ( maxKey , startKey , m_ks );
// get order of a key as close to startKey as possible
long order1 = getOrderOfKey ( collnum , startKey , minKey );
// get order of a key as close to endKey as possible
long order2 = getOrderOfKey ( collnum , endKey , maxKey );
// how many recs?
long size = order2 - order1;
// . if enough, return
// . NOTE minKey/maxKey may be < or > startKey/endKey
// . return an estimated list size
if ( size > 200 ) return size * m_ks;
// . otherwise, count exactly
// . reset size and get the initial node
size = 0;
long n = getPrevNode ( collnum , startKey );
// return 0 if no nodes in that key range
if ( n < 0 ) return 0;
// skip to next node if this one is < startKey
//if ( m_keys[n] < startKey ) n = getNextNode ( n );
if ( KEYCMP(m_keys,n,startKey,0,m_ks)<0) n = getNextNode(n);
// or collnum
if ( m_collnums[n] < collnum ) n = getNextNode ( n );
// loop until we run out of nodes or one breeches endKey
//while ( n > 0 && m_keys[n] <= endKey && m_collnums[n] == collnum ) {
while ( n>0 && KEYCMP(m_keys,n,endKey,0,m_ks)<=0 &&
m_collnums[n]==collnum){
size++;
n = getNextNode(n);
}
// this should be an exact list size (actually # of nodes)
return size * m_ks;
}
// . returns a number from 0 to m_numUsedNodes-1
// . represents the ordering of this key in that range
// . *retKey is the key that has the returned order
// . *retKey gets as close to "key" as it can
// . returns # of NODES
//long RdbTree::getOrderOfKey ( collnum_t collnum , key_t key , key_t *retKey){
long RdbTree::getOrderOfKey ( collnum_t collnum , char *key , char *retKey ) {
if ( m_numUsedNodes <= 0 ) return 0;
long i = m_headNode;
// estimate the depth of tree if not balanced
long d = getTreeDepth() ;
// TODO: WARNING: ensure d-1 not >= 32 !!!!!!!!!!!!!!!!!
long step = 1 << (d-1);
long order = step;
while ( i != -1 ) {
//if ( retKey ) *retKey = m_keys[i];
if ( retKey ) KEYSET ( retKey , &m_keys[i*m_ks] , m_ks );
step /= 2;
if ( collnum < m_collnums[i] ||
//(collnum == m_collnums[i] && key < m_keys[i]) ) {
(collnum==m_collnums[i] &&KEYCMP(key,0,m_keys,i,m_ks)<0)){
i = m_left [i];
if ( i >= 0 ) order -= step;
continue;
}
if ( collnum > m_collnums[i] ||
//(collnum == m_collnums[i] && key > m_keys[i]) ) {
(collnum==m_collnums[i] &&KEYCMP(key,0,m_keys,i,m_ks)>0)){
i = m_right[i];
if ( i >= 0 ) order += step;
continue;
}
break;
}
// normalize order since tree probably has less then 2^d nodes
long long normOrder =
(long long) order *
(long long) m_numUsedNodes /
(long long) ((1 << d) -1) ;
return (long) normOrder;
}
long RdbTree::getTreeDepth ( ) {
// no problem if we're balanced
if ( m_doBalancing ) return m_depth [ m_headNode ];
// . otherwise compute: take log2(m_numUsedNodes)
// . get highest bit on in m_numUsedNodes
long n = m_numUsedNodes;
long depth = 0;
for ( long i = 0 ; i < 32; i++ ) {
if ( n & 0x01 ) depth = i;
n >>= 1;
}
return depth + 1;
}
// . recompute depths of nodes starting at i and ascending the tree
// . call rotateRight/Left() when depth of children differs by 2 or more
void RdbTree::setDepths ( long i ) {
// inc the depth of all parents if it changes for them
while ( i >= 0 ) {
// . compute the new depth for node i
// . get depth of left kid
// . left/rightDepth is depth of subtree on left/right
long leftDepth = 0;
long rightDepth = 0;
if ( m_left [i] >= 0 ) leftDepth = m_depth [ m_left [i] ] ;
if ( m_right[i] >= 0 ) rightDepth = m_depth [ m_right[i] ] ;
// . get the new depth for node i
// . add 1 cuz we include ourself in our m_depth
long newDepth ;
if ( leftDepth > rightDepth ) newDepth = leftDepth + 1;
else newDepth = rightDepth + 1;
// if the depth did not change for i then we're done
long oldDepth = m_depth[i] ;
// set our new depth
m_depth[i] = newDepth;
// diff can be -2, -1, 0, +1 or +2
long diff = leftDepth - rightDepth;
// . if it's -1, 0 or 1 then we don't need to balance
// . if rightside is deeper rotate left, i is the pivot
// . otherwise, rotate left
// . these should set the m_depth[*] for all nodes needing it
if ( diff == -2 ) i = rotateLeft ( i );
else if ( diff == 2 ) i = rotateRight ( i );
// . return if our depth was ultimately unchanged
// . i may have change if we rotated, but same logic applies
if ( m_depth[i] == oldDepth ) break;
// debug msg
//fprintf (stderr,"changed node %li's depth from %li to %li\n",
//i,oldDepth,newDepth);
// get his parent to continue the ascension
i = m_parents [ i ];
}
// debug msg
//printTree();
}
/*
// W , X and B are SUBTREES.
// B's subtree was 1 less in depth than W or X, then a new node was added to
// W or X triggering the imbalance.
// However, if B gets deleted W and X can be the same size.
//
// Right rotation if W subtree depth is >= X subtree depth:
//
// A N
// / \ / \
// / \ / \
// N B ---> W A
// / \ / \
// W X X B
//
// Right rotation if W subtree depth is < X subtree depth:
// A X
// / \ / \
// / \ / \
// N B ---> N A
// / \ / \ / \
// W X W Q T B
// / \
// Q T
*/
// . we come here when A's left subtree is deeper than it's right subtree by 2
// . this rotation operation causes left to lose 1 depth and right to gain one
// . the type of rotation depends on which subtree is deeper, W or X
// . W or X must deeper by the other by exactly one
// . if they were equal depth then how did adding a node inc the depth?
// . if their depths differ by 2 then N would have been rotated first!
// . the parameter "i" is the node # for A in the illustration above
// . return the node # that replaced A so the balance() routine can continue
// . TODO: check our depth modifications below
long RdbTree::rotateRight ( long i ) {
//fprintf(stderr,"rotateRight: pivot = %li\n",i);
return rotate ( i , m_left , m_right );
}
// . i just swapped left with m_right
long RdbTree::rotateLeft ( long i ) {
//fprintf(stderr,"rotateLeft: pivot = %li\n",i);
return rotate ( i , m_right , m_left );
}
long RdbTree::rotate ( long i , long *left , long *right ) {
// i's left kid's right kid takes his place
long A = i;
long N = left [ A ];
long W = left [ N ];
long X = right [ N ];
long Q = -1;
long T = -1;
if ( X >= 0 ) {
Q = left [ X ];
T = right [ X ];
}
// let AP be A's parent
long AP = m_parents [ A ];
// whose the bigger subtree, W or X? (depth includes W or X itself)
long Wdepth = 0;
long Xdepth = 0;
if ( W >= 0 ) Wdepth = m_depth[W];
if ( X >= 0 ) Xdepth = m_depth[X];
// debug msg
//fprintf(stderr,"A=%li AP=%li N=%li W=%li X=%li Q=%li T=%li "
//"Wdepth=%li Xdepth=%li\n",A,AP,N,W,X,Q,T,Wdepth,Xdepth);
// goto Xdeeper if X is deeper
if ( Wdepth < Xdepth ) goto Xdeeper;
// N's parent becomes A's parent
m_parents [ N ] = AP;
// A's parent becomes N
m_parents [ A ] = N;
// X's parent becomes A
if ( X >= 0 ) m_parents [ X ] = A;
// A's parents kid becomes N
if ( AP >= 0 ) {
if ( left [ AP ] == A ) left [ AP ] = N;
else right [ AP ] = N;
}
// if A had no parent, it was the headNode
else {
//fprintf(stderr,"changing head node from %li to %li\n",
//m_headNode,N);
m_headNode = N;
}
// N's right kid becomes A
right [ N ] = A;
// A's left kid becomes X
left [ A ] = X;
// . compute A's depth from it's X and B kids
// . it should be one less if Xdepth smaller than Wdepth
// . might set m_depth[A] to computeDepth(A) if we have problems
if ( Xdepth < Wdepth ) m_depth [ A ] -= 2;
else m_depth [ A ] -= 1;
// N gains a depth iff W and X were of equal depth
if ( Wdepth == Xdepth ) m_depth [ N ] += 1;
// now we're done, return the new pivot that replaced A
return N;
// come here if X is deeper
Xdeeper:
// X's parent becomes A's parent
m_parents [ X ] = AP;
// A's parent becomes X
m_parents [ A ] = X;
// N's parent becomes X
m_parents [ N ] = X;
// Q's parent becomes N
if ( Q >= 0 ) m_parents [ Q ] = N;
// T's parent becomes A
if ( T >= 0 ) m_parents [ T ] = A;
// A's parent's kid becomes X
if ( AP >= 0 ) {
if ( left [ AP ] == A ) left [ AP ] = X;
else right [ AP ] = X;
}
// if A had no parent, it was the headNode
else {
//fprintf(stderr,"changing head node2 from %li to %li\n",
//m_headNode,X);
m_headNode = X;
}
// A's left kid becomes T
left [ A ] = T;
// N's right kid becomes Q
right [ N ] = Q;
// X's left kid becomes N
left [ X ] = N;
// X's right kid becomes A
right [ X ] = A;
// X's depth increases by 1 since it gained 1 level of 2 new kids
m_depth [ X ] += 1;
// N's depth decreases by 1
m_depth [ N ] -= 1;
// A's depth decreases by 2
m_depth [ A ] -= 2;
// now we're done, return the new pivot that replaced A
return X;
}
// . depth of subtree with i as the head node
// . includes i, so minimal depth is 1
long RdbTree::computeDepth ( long i ) {
long leftDepth = 0;
long rightDepth = 0;
if ( m_left [i] >= 0 ) leftDepth = m_depth [ m_left [i] ] ;
if ( m_right[i] >= 0 ) rightDepth = m_depth [ m_right[i] ] ;
// . get the new depth for node i
// . add 1 cuz we include ourself in our m_depth
if ( leftDepth > rightDepth ) return leftDepth + 1;
else return rightDepth + 1;
}
// . a quick way to add a list of sorted keys (no data)...
// . will take care of positive/negative key annihilations
// . returns false and sets g_errno on error
/*
bool RdbTree::addSortedKeys ( key_t *keys , long numKeys ) {
// do we have enough room?
if ( m_numUsedNodes + numKeys >= m_numNodes) {
g_errno = ENOMEM; return false; }
// add one key at a time
long x = 0;
// some vars
key_t k;
long iparent ;
long rightGuy;
long i;
loop:
// bail if x is exhausted
if ( x >= numKeys ) return true;
// get the xth key
k = keys[x];
// point x to next key
x++;
// this is -1 iff there are no nodes used in the tree
i = m_headNode;
// . find the parent of node i and call it "iparent"
// . if a node exists with our key then replace it
while ( i != -1 ) {
iparent = i;
if ( key < m_keys[i] ) i = m_left [i];
else if ( key > m_keys[i] ) i = m_right[i];
else goto replaceIt;
}
// . this overhead is key/left/right/parent
// . we inc it by the data and sizes array if we need to below
m_memOccupied += m_overhead;
// point i to the next available node
i = m_nextNode;
// if we're the first node we become the head node and our parent is -1
if ( m_numUsedNodes == 0 ) {
m_headNode = i;
iparent = -1;
}
// stick ourselves in the next available node, "m_nextNode"
m_keys [ i ] = key;
m_parents [ i ] = iparent;
// add the key
// set the data and size only if we need to
if ( m_fixedDataSize != 0 ) {
// ack used and occupied mem
m_memAlloced += dataSize ;
m_memOccupied += dataSize ;
}
// make our parent, if any, point to us
if ( iparent >= 0 ) {
if ( key < m_keys[iparent] ) m_left [iparent] = i;
else m_right[iparent] = i;
}
// . the right kid of an empty node is used as a linked list of
// empty nodes formed by deleting nodes
// . we keep the linked list so we can re-used these vacated nodes
rightGuy = m_right [ i ];
// our kids are -1 (none)
m_left [ i ] = -1;
m_right [ i ] = -1;
// . if we weren't recycling a node then advance to next
// . m_minUnusedNode is the lowest node number that was never filled
// at any one time in the past
// . you might call it the brand new housing district
if ( m_nextNode == m_minUnusedNode ) {m_nextNode++; m_minUnusedNode++;}
// . otherwise, we're in a linked list of vacated used houses
// . we have a linked list in the right kid
// . make sure the new head doesn't have a left
else {
// point m_nextNode to the next available used house, if any
if ( rightGuy >= 0 ) m_nextNode = rightGuy;
// otherwise point it to the next brand new house
else m_nextNode = m_minUnusedNode;
}
// we have one more used node
m_numUsedNodes++;
// if we don't have to balance return i now
if ( ! m_doBalancing ) return i;
// our depth is now 1 since we're a leaf node (we include ourself)
m_depth [ i ] = 1;
// . reset depths starting at i's parent and ascending the tree
// . will balance if child depths differ by 2 or more
setDepths ( iparent );
// return the node number of the node we occupied
return i;
}
*/
// how balanced is this tree? = #nodes w/ right kids / # node w/ left
// the multiplied by 100. invereted to make smaller than 100.
long RdbTree::getBalancePercent() {
// count nodes w/ left kids and nodes w/ right kids
long numRight = 0;
long numLeft = 0;
for ( long i = 0 ; i < m_minUnusedNode ; i++ ) {
// skip nuked nodes
if ( m_parents[i] == -2 ) continue;
if ( m_left[i] >= 0 ) numLeft++;
if ( m_right[i] >= 0 ) numRight++;
}
// ensure these not zero
numRight++;
numLeft++;
// . the ratio
// . flip if top heavy
long p;
if ( numLeft < numRight ) p = (numLeft * 100) / numRight;
else p = (numRight * 100) / numLeft;
// return the percent. from 0 to 100%.
return p;
}
#define BLOCK_SIZE 10000
static void *saveWrapper ( void *state , ThreadEntry *t ) ;
static void threadDoneWrapper ( void *state , ThreadEntry *t ) ;
// . caller should call f->set() himself
// . we'll open it here
// . returns false if blocked, true otherwise
// . sets g_errno on error
bool RdbTree::fastSave ( char *dir ,
char *dbname ,
bool useThread ,
void *state ,
void (* callback) (void *state) ) {
if ( g_conf.m_readOnlyMode ) return true;
// we do not need a save
if ( ! m_needsSave ) return true;
// return true if already in the middle of saving
if ( m_isSaving ) return false;
// note it
logf(LOG_INFO,"db: Saving %s/%s-saved.dat",dir,dbname);
// save parms
//m_saveFile = f;
strcpy ( m_dir , dir );
//m_dbname = dbname;
// sanity check
if ( dbname && strcmp(dbname,m_dbname) ) {
log("db: tree dbname mismatch.");
char *xx=NULL;*xx=0;
}
m_state = state;
m_callback = callback;
// assume no error
m_saveErrno = 0;
// no adding to the tree now
m_isSaving = true;
// skip thread call if we should
if ( ! useThread ) goto skip;
// make this a thread now
if ( g_threads.call ( SAVETREE_THREAD , // threadType
1 , // niceness
this , // top 4 bytes must be cback
threadDoneWrapper ,
saveWrapper ) ) return false;
// if it failed
if ( ! g_threads.m_disabled )
log("db: Thread creation failed. Blocking while saving tree. "
"Hurts performance.");
skip:
// this returns false and sets g_errno on error
fastSave_r ();
// store save error into g_errno
g_errno = m_saveErrno;
// resume adding to the tree
m_isSaving = false;
// we do not need to be saved now?
m_needsSave = false;
// we did not block
return true;
}
void *saveWrapper ( void *state , ThreadEntry *t ) {
// get this class
RdbTree *THIS = (RdbTree *)state;
// this returns false and sets g_errno on error
THIS->fastSave_r();
// now exit the thread, bogus return
return NULL;
}
// we come here after thread exits
void threadDoneWrapper ( void *state , ThreadEntry *t ) {
// get this class
RdbTree *THIS = (RdbTree *)state;
// store save error into g_errno
g_errno = THIS->m_saveErrno;
// . resume adding to the tree
// . this will also allow other threads to be queued
// . if we did this at the end of the thread we could end up with
// an overflow of queued SAVETHREADs
THIS->m_isSaving = false;
// we do not need to be saved now?
THIS->m_needsSave = false;
// g_errno should be preserved from the thread so if fastSave_r()
// had an error it will be set
if ( g_errno )
log("db: Had error saving tree to disk for %s: %s.",
THIS->m_dbname,mstrerror(g_errno));
else
// log it
log("db: Done saving %s/%s-saved.dat",
THIS->m_dir,THIS->m_dbname);
// . call callback
if ( THIS->m_callback ) THIS->m_callback ( THIS->m_state );
}
// . returns false and sets g_errno on error
// . NO USING g_errno IN A DAMN THREAD!!!!!!!!!!!!!!!!!!!!!!!!!
bool RdbTree::fastSave_r() {
if ( g_conf.m_readOnlyMode ) return true;
// recover the file
//BigFile *f = m_saveFile;
// open it up
//if ( ! f->open ( O_RDWR | O_CREAT ) )
// return log("RdbTree::fastSave_r: %s",mstrerror(g_errno));
// cannot use the BigFile class, since we may be in a thread and it
// messes with g_errno
//char *s = m_saveFile->getFilename();
char s[1024];
sprintf ( s , "%s/%s-saving.dat", m_dir , m_dbname );
int fd = ::open ( s ,
O_RDWR | O_CREAT | O_TRUNC , S_IRUSR | S_IWUSR |
S_IRGRP | S_IWGRP | S_IROTH);
if ( fd < 0 ) {
m_saveErrno = errno;
return log("db: Could not open %s for writing: %s.",
s,mstrerror(errno));
}
// clear our own errno
errno = 0;
// . save the header
// . force file head to the 0 byte in case offset was elsewhere
long long offset = 0;
long long br = 0;
br += pwrite ( fd , &m_numNodes , 4 , offset ); offset += 4;
br += pwrite ( fd , &m_fixedDataSize , 4 , offset ); offset += 4;
br += pwrite ( fd , &m_numUsedNodes , 4 , offset ); offset += 4;
br += pwrite ( fd , &m_headNode , 4 , offset ); offset += 4;
br += pwrite ( fd , &m_nextNode , 4 , offset ); offset += 4;
br += pwrite ( fd , &m_minUnusedNode , 4 , offset ); offset += 4;
br += pwrite ( fd , &m_doBalancing , sizeof(m_doBalancing) , offset);
offset += sizeof(m_doBalancing);
br += pwrite ( fd , &m_ownData , sizeof(m_ownData) , offset);
offset += sizeof(m_ownData);
// bitch on error
if ( br != offset ) {
m_saveErrno = errno;
close ( fd );
return log("db: Failed to save tree1 for %s: %s.",
m_dbname,mstrerror(errno));
}
// position to store into m_keys, ...
long start = 0;
// save tree in block units
while ( start < m_minUnusedNode ) {
// . returns number of nodes, starting at node #i, saved
// . returns -1 and sets errno on error
long bytesWritten = fastSaveBlock_r ( fd , start , offset ) ;
// returns -1 on error
if ( bytesWritten < 0 ) {
close ( fd );
m_saveErrno = errno;
return log("db: Failed to save tree2 for %s: %s.",
m_dbname,mstrerror(errno));
}
// point to next block to save to
start += BLOCK_SIZE;
// and advance the file offset
offset += bytesWritten;
}
// remember total bytes written
m_bytesWritten = offset;
// close it up
close ( fd );
// now fucking rename it
char s2[1024];
sprintf ( s2 , "%s/%s-saved.dat", m_dir , m_dbname );
::rename ( s , s2 ) ;
// info
//log(0,"RdbTree::fastSave: saved %li nodes", m_numUsedNodes );
return true;
}
// return bytes written
long RdbTree::fastSaveBlock_r ( int fd , long start , long long offset ) {
// save offset
long long oldOffset = offset;
// . just save each one right out, even if empty
// because the empty's have a linked list in m_right[]
// . set # n
long n = BLOCK_SIZE;
// don't over do it
if ( start + n > m_minUnusedNode ) n = m_minUnusedNode - start;
// debug msg
//log("writing block at %lli, %li nodes",
// f->m_currentOffset, n);
errno = 0;
long long br = 0;
// write the block
br += pwrite ( fd,&m_collnums[start], n * sizeof(collnum_t) , offset );
offset += n * sizeof(collnum_t);
br += pwrite ( fd , &m_keys [start*m_ks] , n * m_ks , offset );
offset += n * m_ks;
br += pwrite(fd, &m_left [start] , n * 4 , offset ); offset += n * 4;
br += pwrite(fd, &m_right [start] , n * 4 , offset ); offset += n * 4;
br += pwrite(fd, &m_parents[start] , n * 4 , offset ); offset += n * 4;
if ( m_doBalancing ) {
br += pwrite ( fd , &m_depth[start] , n , offset ); offset += n ; }
if ( m_fixedDataSize == -1 ) {
br += pwrite ( fd , &m_sizes[start] , n*4, offset ); offset += n*4; }
// if the data is actually stored in the data ptrs, just save those
if ( m_dataInPtrs ) {
br +=pwrite(fd,&m_data[start],n * 4 , offset ); offset +=n*4;}
// bitch on error
if ( br != offset - oldOffset )
return log("db: Failed to save tree3 for %s (%lli!=%lli): %s.",
m_dbname,
br,offset,
mstrerror(errno)) - 1;
// if no data to write then return bytes written this call
if ( m_fixedDataSize == 0 || m_dataInPtrs ) return offset - oldOffset ;
// debug count
//long count = 0;
// define ending node for all loops
long end = start + n ;
// now we have to dump out all the records
for ( long i = start ; i < end ; i++ ) {
// skip if empty
if ( m_parents[i] == -2 ) continue;
// write variable sized nodes
if ( m_fixedDataSize == -1 ) {
if ( m_sizes[i] <= 0 ) continue;
pwrite ( fd , m_data[i] , m_sizes[i] , offset );
offset += m_sizes[i];
continue;
}
// write fixed sized nodes
pwrite ( fd , m_data[i] , m_fixedDataSize , offset );
offset += m_fixedDataSize;
}
// debug
//log("wrote %li bytes of raw rec data", count);
// . don't close cuz needs to stay open for the rename
// from *-saving.dat to *-saved.dat
// . close it
//f->close();
// return bytes written
return offset - oldOffset;
}
#include "Spider.h"
// . caller should call f->set() himself
// . we'll open it here
// . returns false and sets g_errno on error (sometimes g_errno not set)
bool RdbTree::fastLoad ( BigFile *f , RdbMem *stack ) {
// msg
log(LOG_INIT,"db: Loading %s.",f->getFilename());
// open it up
if ( ! f->open ( O_RDONLY ) ) return log("db: open failed");
long fsize = f->getFileSize();
// init offset
long long offset = 0;
// 16 byte header
long header = 4*6 + sizeof(m_doBalancing) + sizeof(m_ownData);
// file size must be a min of "header"
if ( fsize < header ) { f->close(); g_errno=EBADFILE; return false; }
// note it
m_isLoading = true;
// get # of nodes in the tree
long n , fixedDataSize , numUsedNodes ;
bool doBalancing , ownData ;
long headNode , nextNode , minUnusedNode;
// force file head to the 0 byte in case offset was elsewhere
f->read ( &n , 4 , offset ); offset += 4;
f->read ( &fixedDataSize , 4 , offset ); offset += 4;
f->read ( &numUsedNodes , 4 , offset ); offset += 4;
f->read ( &headNode , 4 , offset ); offset += 4;
f->read ( &nextNode , 4 , offset ); offset += 4;
f->read ( &minUnusedNode , 4 , offset ); offset += 4;
f->read ( &doBalancing , sizeof(m_doBalancing) , offset ) ;
offset += sizeof(m_doBalancing);
f->read ( &ownData , sizeof(m_ownData ) , offset ) ;
offset += sizeof(m_ownData);
// return false on read error
if ( g_errno ) { f->close(); m_isLoading = false; return false; }
// parms check
if ( m_fixedDataSize != fixedDataSize ||
m_doBalancing != doBalancing ||
m_ownData != ownData ) {
f->close();
m_isLoading = false;
return log(LOG_LOGIC,"db: rdbtree: fastload: Bad parms. File "
"may be corrupt or a key attribute was changed in "
"the code and is not reflected in this file.");
}
// make sure size it right again
long nodeSize = (sizeof(collnum_t)+m_ks+4+4+4);
long minFileSize = header + minUnusedNode * nodeSize;
if ( doBalancing ) minFileSize += minUnusedNode ;
if ( fixedDataSize == -1 ) minFileSize += minUnusedNode * 4 ;
//if ( fixedDataSize > 0 ) minFileSize += minUnusedNode *fixedDataSize;
// if no data, sizes much match exactly
if ( fixedDataSize == 0 && fsize != minFileSize ) {
g_errno = EBADFILE;
log(
"db: File size of %s is %li, should be %li. File may be "
"corrupted.",
f->getFilename(),fsize,minFileSize);
f->close();
m_isLoading = false;
return false;
}
// does it fit?
if ( fsize < minFileSize ) {
g_errno = EBADFILE;
log(
"db: File size of %s is %li, should >= %li. File may be "
"corrupted.",
f->getFilename(),fsize,minFileSize);
f->close();
m_isLoading = false;
return false;
}
// make room if we don't have any
if ( m_numNodes < minUnusedNode ) {
log(LOG_INIT,
"db: Growing tree to make room for %s",f->getFilename());
if ( ! growTree ( minUnusedNode , 0 ) ) {
2013-08-03 00:12:24 +04:00
f->close();
m_isLoading = false;
return log("db: Failed to grow tree: %s.",
mstrerror(g_errno));
}
}
// we'll read this many
long start = 0;
if ( m_useProtection ) unprotect();
// reset corruption count
m_corrupt = 0;
// read block by block
while ( start < minUnusedNode ) {
// . returns next place to start scan
// . incs m_numPositive/NegativeKeys and m_numUsedNodes
// . incs m_memAlloced and m_memOccupied
long bytesRead = fastLoadBlock ( f ,
start ,
minUnusedNode ,
stack ,
offset ) ;
if ( bytesRead < 0 ) {
f->close();
if ( m_useProtection ) protect();
g_errno = errno;
log("db: bytesRead = %li",bytesRead);
m_isLoading = false;
return false;
}
// inc the start
start += BLOCK_SIZE;
// and the offset
offset += bytesRead;
}
m_isLoading = false;
// print corruption
if ( m_corrupt )
log("admin: Loaded %li corrupted recs in tree for %s.",
m_corrupt,m_dbname);
// re-enable protection
if ( m_useProtection ) protect();
// remember total bytes read
m_bytesRead = offset;
// set these
m_headNode = headNode;
m_nextNode = nextNode;
m_minUnusedNode = minUnusedNode;
// info
//log(0,"RdbTree::fastLoad: loaded %li nodes", m_numUsedNodes );
// close it
//f->close();
// check it
if ( ! checkTree( false , true ) ) return fixTree ( );
// a temporary hack to remove all data less tree nodes from
// spiderdb and titledb
/*
if ( m_fixedDataSize == -1 ) {
log("REMOVING 0 SIZE NODES FROM SPIDERDB/SITEDB/TITLEDB");
long count = 0;
again:
for ( long i = 0 ; i < m_minUnusedNode ; i++ ) {
if ( m_parents[i] == -2 ) continue;
if ( m_sizes[i] != 0 ) continue;
if ( (m_keys[i].n0 & 0x01) == 0x00 ) continue;
count++;
log("got one");
// make it negative
m_keys[i].n0 &= 0xfffffffffffffffeLL;
//deleteNode ( i , true ); // freeData?
//goto again;
}
log("REMOVED %li",count);
if ( ! checkTree( false ) ) return fixTree ( );
}
*/
// no longer needs save
m_needsSave = false;
//printTree();
return true;
}
// . return bytes loaded
// . returns -1 and sets g_errno on error
long RdbTree::fastLoadBlock ( BigFile *f ,
long start ,
long totalNodes ,
RdbMem *stack ,
long long offset ) {
// set # ndoes to read
long n = totalNodes - start;
if ( n > BLOCK_SIZE ) n = BLOCK_SIZE;
// debug msg
//log("reading block at %lli, %li nodes",
// f->m_currentOffset, n );
long long oldOffset = offset;
// . copy them in
// . start reading at beginning of file
f->read ( &m_collnums[start], n * sizeof(collnum_t) , offset );
offset += n * sizeof(collnum_t);
f->read ( &m_keys [start*m_ks] , n * m_ks , offset );
offset += n * m_ks;
f->read ( &m_left [start] , n * 4 , offset ); offset += n * 4;
f->read ( &m_right [start] , n * 4 , offset ); offset += n * 4;
f->read ( &m_parents[start] , n * 4 , offset ); offset += n * 4;
if ( m_doBalancing ) {
f->read ( &m_depth[start] , n , offset ); offset += n ; }
if ( m_fixedDataSize == -1 ) {
f->read ( &m_sizes[start] , n * 4 , offset); offset += n * 4; }
// if the data is actually stored in the data ptrs, just save those
if ( m_dataInPtrs ) {
f->read ( &m_data[start] , n * 4 , offset); offset += n * 4; }
// return false on read error
if ( g_errno ) {
log("db: Failed to read %s: %s.",
f->getFilename(),mstrerror(g_errno));
return -1;
}
// get valid collnum ranges
long max = g_collectiondb.m_numRecs;
// sanity check
//if ( max >= MAX_COLLS ) { char *xx = NULL; *xx = 0; }
2013-08-03 00:12:24 +04:00
// define ending node for all loops
long end = start + n ;
// shortcut
CollectionRec **recs = g_collectiondb.m_recs;
2013-08-03 00:12:24 +04:00
// store into tree in the appropriate nodes
for ( long i = start ; i < end ; i++ ) {
// skip if empty
if ( m_parents[i] == -2 ) continue;
// watch out for bad collnums... corruption...
collnum_t c = m_collnums[i];
if ( c < 0 || c >= max ) {
m_corrupt++;
continue;
}
// must have rec as well
if ( ! recs[c] ) {
m_corrupt++;
continue;
}
2013-08-03 00:12:24 +04:00
// keep a tally on all this
m_numUsedNodes++;
m_memOccupied += m_overhead;
2013-08-03 00:12:24 +04:00
if ( KEYNEG(m_keys,i,m_ks) ) {
m_numNegativeKeys++;
//m_numNegKeysPerColl[c]++;
// this is only used for Rdb::m_trees
//if ( m_isRealTree )
recs[c]->m_numNegKeysInTree[(unsigned char)m_rdbId]++;
2013-08-03 00:12:24 +04:00
}
else {
m_numPositiveKeys++;
//m_numPosKeysPerColl[c]++;
// this is only used for Rdb::m_trees
//if ( m_isRealTree )
recs[c]->m_numPosKeysInTree[(unsigned char)m_rdbId]++;
2013-08-03 00:12:24 +04:00
}
}
// bail now if we can
if ( m_fixedDataSize == 0 || m_dataInPtrs ) return offset - oldOffset ;
// how much should we read?
long bufSize = 0;
if ( m_fixedDataSize == -1 ) {
for ( long i = start ; i < end ; i++ )
if ( m_parents[i] != -2 ) bufSize += m_sizes[i];
}
else if ( m_fixedDataSize > 0 ) {
for ( long i = start ; i < end ; i++ )
if ( m_parents[i] != -2 ) bufSize += m_fixedDataSize;
}
// get space
//key_t dummy;
char *dummy = NULL;
char *buf = (char *) stack->allocData ( dummy , bufSize , 0 );
if ( ! buf ) {
log("db: Failed to allocate %li bytes to read %s. "
"Increase tree size for it in gb.conf.",
bufSize,f->getFilename());
return -1;
}
// debug
//log("reading %li bytes of raw rec data", bufSize );
// establish end point
char *bufEnd = buf + bufSize;
// . read all into that buf
// . this should block since callback is NULL
f->read ( buf , bufSize , offset ) ;
// return false on read error
if ( g_errno ) return -1;
// advance file offset
offset += bufSize;
// part it out now
long size = m_fixedDataSize;
for ( long i = start ; i < end ; i++ ) {
// skip unused
if ( m_parents[i] == -2 ) continue;
// get size of his data if it's variable
if ( m_fixedDataSize == -1 ) size = m_sizes[i];
// ensure we have the room
if ( buf + size > bufEnd ) {
g_errno = EBADFILE;
log("db: Encountered record with corrupted "
"size parameter of %li in %s.",
size,f->getFilename());
return -1;
}
m_data[i] = buf;
buf += size;
// update these
m_memAlloced += size;
m_memOccupied += size;
}
return offset - oldOffset ;
}
// . caller should call f->set() himself
// . we'll open it here
// . returns false and sets g_errno on error (sometimes g_errno not set)
/*
bool RdbTree::oldLoad ( BigFile *f , RdbMem *stack ) {
// msg
log(LOG_INFO,"db: Loading %s.",f->getFilename());
// open it up
if ( ! f->open ( O_RDONLY ) ) return false;
long fsize = f->getFileSize();
// 16 byte header
long header = 4*6 + sizeof(m_doBalancing) + sizeof(m_ownData);
// file size must be a min of "header"
if ( fsize < header ) { g_errno = EBADFILE; return false; }
// get # of nodes in the tree
long n , fixedDataSize , numUsedNodes ;
bool doBalancing , ownData ;
long headNode , nextNode , minUnusedNode;
long long offset = 0;
f->read ( &n , 4 , offset ); offset += 4 ;
f->read ( &fixedDataSize , 4 , offset ); offset += 4 ;
f->read ( &numUsedNodes , 4 , offset ); offset += 4 ;
f->read ( &headNode , 4 , offset ); offset += 4 ;
f->read ( &nextNode , 4 , offset ); offset += 4 ;
f->read ( &minUnusedNode , 4 , offset ); offset += 4 ;
f->read ( &doBalancing , sizeof(m_doBalancing) , offset ) ;
offset += sizeof(m_doBalancing);
f->read ( &ownData , sizeof(m_ownData ) , offset ) ;
offset += sizeof(m_ownData);
// return false on read error
if ( g_errno ) return false;
// parms check
if ( m_fixedDataSize != fixedDataSize ||
m_doBalancing != doBalancing ||
m_ownData != ownData )
return log("RdbTree::fastLoad: bad parms");
// make sure size it right again
long minFileSize = header + numUsedNodes * (m_ks+4+4+4+4);
if ( doBalancing ) minFileSize += numUsedNodes ;
if ( fixedDataSize == -1 ) minFileSize += numUsedNodes * 4 ;
// does it fit?
if ( fsize < minFileSize || (fixedDataSize==0 && fsize!=minFileSize)){
g_errno = EBADFILE;
return log(LOG_LOGIC,"db: rdbtree: fastload: Bad parms. File "
"may be corrupt or a key attribute of %s was "
"changed in the code and is not reflected in this "
"file.");
return false;
}
// make room if we don't have any
if ( m_numNodes < numUsedNodes ) {
log(LOG_INFO,
"db: Growing tree to make room for %s",f->getFilename());
if ( ! growTree ( numUsedNodes ) )
return log("RdbTree::fastLoad: %s",mstrerror(g_errno));
}
// read block by block
while ( numUsedNodes > 0 ) {
// returns next place to start scan
long bytesRead = oldLoadBlock (f, numUsedNodes, stack,offset);
if ( bytesRead < 0 ) return false;
// advance file offset
offset += bytesRead;
// subtract the count
numUsedNodes -= BLOCK_SIZE;
}
// set these
//m_headNode = headNode;
//m_nextNode = nextNode;
//m_minUnusedNode = minUnusedNode;
// info
//log(0,"RdbTree::fastLoad: loaded %li nodes", m_numUsedNodes );
// close it
f->close();
//printTree();
return true;
}
long RdbTree::oldLoadBlock ( BigFile *f, long remainingNodes , RdbMem *stack,
long long offset ){
// save offset
long long oldOffset = offset;
// array for holding shit
long slotNums [ BLOCK_SIZE ];
key_t keys [ BLOCK_SIZE ];
long left [ BLOCK_SIZE ];
long right [ BLOCK_SIZE ];
long parents [ BLOCK_SIZE ];
char depth [ BLOCK_SIZE ];
long sizes [ BLOCK_SIZE ];
// set # ndoes to read
long n = remainingNodes;
if ( n > BLOCK_SIZE ) n = BLOCK_SIZE;
// debug msg
//log("reading block at %lli, %li nodes",
// f->m_currentOffset, remainingNodes);
// copy them in
f->read ( slotNums, n * 4 , offset ); offset += n * 4;
f->read ( keys , n * m_ks , offset);
offset += n * m_ks;
f->read ( left , n * 4 , offset ); offset += n * 4;
f->read ( right , n * 4 , offset ); offset += n * 4;
f->read ( parents , n * 4 , offset ); offset += n * 4;
if ( m_doBalancing ) {
f->read ( depth , n , offset ); offset += n; }
if ( m_fixedDataSize == -1 ) {
f->read ( sizes , n * 4 , offset ); offset += n * 4 ; }
// return false on read error
if ( g_errno ) return -1;
// store into tree in the appropriate nodes
//long j ;
//for ( long i = 0 ; i < n ; i++ ) {
//addNode ( m_keys[i] ,
// get the node number this belongs in
//j = slotNums[i];
// store it in that node number
//m_keys [j] = keys [i];
//m_left [j] = left [i];
//m_right [j] = right [i];
//m_parents [j] = parents [i];
//if ( m_doBalancing ) m_depth[j] = depth[i];
//if ( m_fixedDataSize == -1 ) m_sizes[j] = sizes[i];
// keep a tally on all this
//m_numUsedNodes++;
//if ( (keys[i] & 0x01LL) == 0x01 ) m_numPositiveKeys++;
//else m_numNegativeKeys++;
//m_memOccupied += m_overhead;
//}
// bail now if we can
if ( m_fixedDataSize == 0 ) {
for ( long i = 0 ; i < n ; i++ )
addNode ( keys[i] , NULL , 0 );
return offset - oldOffset;
}
// how much should we read?
long bufSize = 0;
if ( m_fixedDataSize == -1 )
for ( long i = 0 ; i < n ; i++ ) bufSize += sizes[i];
else
bufSize = m_fixedDataSize * n;
// get space
key_t dummy;
char *buf = (char *) stack->allocData ( dummy , bufSize );
if ( ! buf ) return -1;
// establish end point
char *bufEnd = buf + bufSize;
// . read all into that buf
// . this should block since callback is NULL
f->read ( buf , bufSize , offset ) ;
// return false on read error
if ( g_errno ) return -1;
// advance file offset
offset += bufSize;
// part it out now
long size = m_fixedDataSize;
for ( long i = 0 ; i < n ; i++ ) {
// get slot num
//k = slotNums[i];
// get size of his data if it's variable
if ( m_fixedDataSize == -1 ) size = sizes[i];
// ensure we have the room
if ( buf + size > bufEnd ) {
g_errno = EBADFILE;
log("RdbTree::fastLoad: bad data sizes");
return -1;
}
addNode ( keys[i] , buf , size );
//m_data[k] = buf;
buf += size;
// update these
//m_memAlloced += size;
//m_memOccupied += size;
}
return offset - oldOffset;
}
*/
void RdbTree::cleanTree ( ) { // char **bases ) {
2013-08-03 00:12:24 +04:00
// the liberation count
long count = 0;
collnum_t collnum;
long max = g_collectiondb.m_numRecs;
for ( long i = 0 ; i < m_minUnusedNode ; i++ ) {
// skip node if parents is -2 (unoccupied)
if ( m_parents[i] == -2 ) continue;
// is collnum valid?
if ( m_collnums[i] >= 0 &&
m_collnums[i] < max &&
g_collectiondb.m_recs[m_collnums[i]] ) continue;
// if it is negtiave, remove it, that is wierd corruption
if ( m_collnums[i] < 0 )
deleteNode ( i , true );
// remove it otherwise
// don't actually remove it!!!! in case collection gets
// moved accidentally.
// no... otherwise it can clog up the tree forever!!!!
deleteNode ( i , true );
2013-08-03 00:12:24 +04:00
count++;
// save it
collnum = m_collnums[i];
}
// print it
if ( count == 0 ) return;
log(LOG_LOGIC,"db: Removed %li records from %s tree for invalid "
"collection number %i.",count,m_dbname,collnum);
2014-01-22 10:39:01 +04:00
//log(LOG_LOGIC,"db: Records not actually removed for safety. Except "
// "for those with negative colnums.");
2013-08-03 00:12:24 +04:00
static bool s_print = true;
if ( ! s_print ) return;
s_print = false;
log (LOG_LOGIC,"db: This is bad. Did you remove a collection "
"subdirectory? Don't do that, you should use the \"delete "
"collections\" interface because it also removes records from "
"memory, too.");
}
long RdbTree::getNumNegativeKeys ( collnum_t collnum ) {
if ( m_rdbId < 0 ) { char *xx=NULL;*xx=0; }
CollectionRec *cr = g_collectiondb.m_recs[collnum];
if ( ! cr ) return 0;
//if ( ! m_countsInitialized ) { char *xx=NULL;*xx=0; }
return cr->m_numNegKeysInTree[(unsigned char)m_rdbId];
}
long RdbTree::getNumPositiveKeys ( collnum_t collnum ) {
if ( m_rdbId < 0 ) { char *xx=NULL;*xx=0; }
CollectionRec *cr = g_collectiondb.m_recs[collnum];
if ( ! cr ) return 0;
//if ( ! m_countsInitialized ) { char *xx=NULL;*xx=0; }
return cr->m_numPosKeysInTree[(unsigned char)m_rdbId];
}
void RdbTree::setNumKeys ( CollectionRec *cr ) {
2013-10-22 01:17:32 +04:00
if ( ! cr ) return;
//m_countsInitialized = true;
return;
if ( ((unsigned char)m_rdbId) >= RDB_END ) { char *xx=NULL;*xx=0; }
collnum_t collnum = cr->m_collnum;
cr->m_numNegKeysInTree[(unsigned char)m_rdbId] = 0;
cr->m_numPosKeysInTree[(unsigned char)m_rdbId] = 0;
for ( long i = 0 ; i < m_numNodes ; i++ ) {
//QUICKPOLL(niceness);
// skip if empty
if ( m_parents[i] == -2 ) continue;
// or if we hit a different collection number
if ( m_collnums [ i ] != collnum ) continue;
if ( KEYNEG(m_keys,i,m_ks) )
cr->m_numNegKeysInTree[(unsigned char)m_rdbId]++;
else
cr->m_numPosKeysInTree[(unsigned char)m_rdbId]++;
}
}