Matt 09de59f026 do not store cblock, etc. tags into tagdb to save
disk space. added tagdb file cache for better performance,
less disk accesses. will help reduce disk load.
put file cache sizes in master controls and if they change
then update the cache size dynamically.
2015-09-10 12:46:00 -06:00

536 lines
16 KiB

// Matt Wells, copyright Sep 2000
// contains one RdbBase for each collection
#ifndef _RDB_H_
#define _RDB_H_
#include "RdbBase.h"
#include "RdbTree.h"
#include "RdbMem.h"
#include "RdbCache.h"
#include "RdbDump.h"
//#include "Dir.h"
#include "RdbBuckets.h"
bool makeTrashDir() ;
void removeFromMergeLinkedList ( class CollectionRec *cr ) ;
void addCollnumToLinkedListOfMergeCandidates ( collnum_t dumpCollnum ) ;
// . each Rdb instance has an ID
// . these ids are also return values for getIdFromRdb()
#define RDB_START 1
enum {
RDB_NONE = 0, // 0
RDB_CATDB, // 10
RDB_REVDB, // 15
RDB_POSDB, // 16
RDB_PARMDB, // kind of a fake rdb for modifying collrec/g_conf parms
//RDB_FAKEDB, // used by spider.cpp to fake things out
// . secondary rdbs for rebuilding done in PageRepair.cpp
// . we add new recs into these guys and then make the original rdbs
// point to them when we are done.
RDB2_TAGDB2, // 31
// how many rdbs are in "urgent merge" mode?
extern int32_t g_numUrgentMerges;
// get the RdbBase class for an rdbId and collection name
class RdbBase *getRdbBase ( uint8_t rdbId , char *coll );
class RdbBase *getRdbBase ( uint8_t rdbId , collnum_t collnum );
// maps an rdbId to an Rdb
class Rdb *getRdbFromId ( uint8_t rdbId ) ;
// the reverse of the above
char getIdFromRdb ( class Rdb *rdb ) ;
char isSecondaryRdb ( uint8_t rdbId ) ;
// get the dbname
char *getDbnameFromId ( uint8_t rdbId ) ;
// get cache by rdbId. Used by MsgB.cpp.
//RdbCache *getCache ( uint8_t rdbId ) ;
// size of keys
char getKeySizeFromRdbId ( uint8_t rdbId );
// and this is -1 if dataSize is variable
int32_t getDataSizeFromRdbId ( uint8_t rdbId );
void forceMergeAll ( char rdbId , char niceness ) ;
// main.cpp calls this
void attemptMergeAll ( int fd , void *state ) ;
void attemptMergeAll2 ( );
class Rdb {
Rdb ( );
~Rdb ( );
bool addRdbBase1 ( char *coll );
bool addRdbBase2 ( collnum_t collnum );
bool delColl ( char *coll );
bool resetBase ( collnum_t collnum );
bool deleteAllRecs ( collnum_t collnum ) ;
bool deleteColl ( collnum_t collnum , collnum_t newCollnum ) ;
bool init ( char *dir , // working directory
char *dbname , // "indexdb","tagdb",...
bool dedup , //= true ,
int32_t fixedDataSize , //= -1 ,
int32_t minToMerge , //, //= 2 ,
int32_t maxTreeMem , //= 1024*1024*32 ,
int32_t maxTreeNodes ,
bool isTreeBalanced ,
int32_t maxCacheMem , //= 1024*1024*5 );
int32_t maxCacheNodes ,
bool useHalfKeys ,
bool loadCacheFromDisk ,
//class DiskPageCache *pc = NULL ,
void *pc = NULL,
bool isTitledb = false , // use fileIds2[]?
bool preloadDiskPageCache = false ,
char keySize = 12 ,
bool biasDiskPageCache = false ,
bool isCollectionLess = false );
// . frees up all the memory and closes all files
// . suspends any current merge (saves state to disk)
// . calls reset() for each file
// . will cause any open map files to dump
// . will dump tables to backup or store
// . calls close on each file
// . returns false if blocked, true otherwise
// . sets errno on error
bool close ( void *state ,
void (* callback)(void *state ) ,
bool urgent ,
bool exitAfterClosing );
//bool close ( ) { return close ( NULL , NULL ); };
// used by PageMaster.cpp to check to see if all rdb's are closed yet
bool isClosed ( ) { return m_isClosed; };
bool needsSave();
// . returns false and sets g_errno on error
// . caller should retry later on g_errno of ENOMEM or ETRYAGAIN
// . returns the node # in the tree it added the record to
// . key low bit must be set (otherwise it indicates a delete)
bool addRecord ( collnum_t collnum ,
//key_t &key, char *data, int32_t dataSize );
char *key, char *data, int32_t dataSize,
int32_t niceness);
bool addRecord ( char *coll , char *key, char *data, int32_t dataSize,
int32_t niceness);
bool addRecord (char *coll , key_t &key, char *data, int32_t dataSize,
int32_t niceness) {
return addRecord(coll,(char *)&key,data,dataSize, niceness);};
// returns false if no room in tree or m_mem for a list to add
bool hasRoom ( RdbList *list , int32_t niceness );
int32_t reclaimMemFromDeletedTreeNodes( int32_t niceness ) ;
int32_t m_lastReclaim;
// . returns false on error and sets errno
// . return true on success
// . if we can't handle all records in list we don't add any and
// set errno to ETRYAGAIN or ENOMEM
// . we copy all data so you can free your list when we're done
bool addList ( collnum_t collnum , RdbList *list, int32_t niceness );
// calls addList above
bool addList ( char *coll , RdbList *list, int32_t niceness );
// . add a record without any data, just a key (faster)
// . returns the node # in the tree it added the record to
//int32_t addKey ( collnum_t collnum , key_t &key );
int32_t addKey ( collnum_t collnum , char *key );
// . uses the bogus data pointed to by "m_dummy" for record's data
// . we clear the key low bit to signal a delete
// . returns false and sets errno on error
//bool deleteRecord ( collnum_t collnum , key_t &key ) ;
bool deleteRecord ( collnum_t collnum , char *key );
bool isSecondaryRdb () {
return ::isSecondaryRdb((unsigned char)m_rdbId); };
bool isInitialized () { return m_initialized; };
// get the directory name where this rdb stores it's files
//char *getDir ( ) { return m_dir.getDirname(); };
char *getDir ( ) { return g_hostdb.m_dir; };
char *getStripeDir ( ) { return g_conf.m_stripeDir; };
int32_t getFixedDataSize ( ) { return m_fixedDataSize; };
bool useHalfKeys ( ) { return m_useHalfKeys; };
char getKeySize ( ) { return m_ks; };
RdbTree *getTree ( ) { if(!m_useTree) return NULL; return &m_tree; };
//RdbCache *getCache ( ) { return &m_cache; };
RdbMem *getRdbMem ( ) { return &m_mem; };
bool useTree ( ) { return m_useTree;};
int32_t getNumUsedNodes ( );
int32_t getMaxTreeMem();
int32_t getTreeMemOccupied() ;
int32_t getTreeMemAlloced () ;
int32_t getNumNegativeKeys();
void disableWrites ();
void enableWrites ();
bool isWritable ( ) ;
RdbBase *getBase ( collnum_t collnum ) ;
int32_t getNumBases ( ) { return g_collectiondb.m_numRecs; };
void addBase ( collnum_t collnum , class RdbBase *base ) ;
// how much mem is alloced for our maps?
int64_t getMapMemAlloced ();
int32_t getNumFiles ( ) ;
// sum of all parts of all big files
int32_t getNumSmallFiles ( ) ;
int64_t getDiskSpaceUsed ( );
// returns -1 if variable (variable dataSize)
int32_t getRecSize ( ) {
if ( m_fixedDataSize == -1 ) return -1;
//return sizeof(key_t) + m_fixedDataSize; };
return m_ks + m_fixedDataSize; };
// use the maps and tree to estimate the size of this list
int64_t getListSize ( collnum_t collnum,
//key_t startKey ,key_t endKey , key_t *maxKey ,
char *startKey ,char *endKey , char *maxKey ,
int64_t oldTruncationLimit ) ;
int64_t getListSize ( collnum_t collnum,
key_t startKey ,key_t endKey , key_t *maxKey ,
int64_t oldTruncationLimit ) {
return getListSize(collnum,(char *)&startKey,(char *)&endKey,
(char *)maxKey,oldTruncationLimit);};
// positive minus negative
int64_t getNumTotalRecs ( bool useCache = false ) ;
int64_t getCollNumTotalRecs ( collnum_t collnum );
int64_t getNumRecsOnDisk ( );
int64_t getNumGlobalRecs ( );
// used for keeping track of stats
void didSeek ( ) { m_numSeeks++; };
void didRead ( int32_t bytes ) { m_numRead += bytes; };
void didReSeek ( ) { m_numReSeeks++; };
int64_t getNumSeeks ( ) { return m_numSeeks; };
int64_t getNumReSeeks ( ) { return m_numReSeeks; };
int64_t getNumRead ( ) { return m_numRead ; };
// net stats for "get" requests
void readRequestGet ( int32_t bytes ) {
m_numReqsGet++ ; m_numNetReadGet += bytes; };
void sentReplyGet ( int32_t bytes ) {
m_numRepliesGet++ ; m_numNetSentGet += bytes; };
int64_t getNumRequestsGet ( ) { return m_numReqsGet; };
int64_t getNetReadGet ( ) { return m_numNetReadGet; };
int64_t getNumRepliesGet ( ) { return m_numRepliesGet; };
int64_t getNetSentGet ( ) { return m_numNetSentGet; };
// net stats for "add" requests
void readRequestAdd ( int32_t bytes ) {
m_numReqsAdd++ ; m_numNetReadAdd += bytes; };
void sentReplyAdd ( int32_t bytes ) {
m_numRepliesAdd++ ; m_numNetSentAdd += bytes; };
int64_t getNumRequestsAdd ( ) { return m_numReqsAdd; };
int64_t getNetReadAdd ( ) { return m_numNetReadAdd; };
int64_t getNumRepliesAdd ( ) { return m_numRepliesAdd; };
int64_t getNetSentAdd ( ) { return m_numNetSentAdd; };
// used by main.cpp to periodically save us if we haven't dumped
// in a while
int64_t getLastWriteTime ( ) { return m_lastWrite; };
// private:
//void attemptMerge ( int32_t niceness , bool forceMergeAll ,
// bool doLog = true );
bool gotTokenForDump ( ) ;
//void gotTokenForMerge ( ) ;
// called after merge completed
//bool incorporateMerge ( );
// . you'll lose your data in this class if you call this
void reset();
bool isSavingTree ( ) ;
bool saveTree ( bool useThread ) ;
bool saveMaps ( bool useThread ) ;
//bool saveCache ( bool useThread ) ;
// . load the tree named "saved.dat", keys must be out of order because
// tree is not balanced
bool loadTree ( ) ;
bool treeFileExists ( ) ;
// . write out tree to a file with keys in order
// . only shift.cpp/reindex.cpp programs set niceness to 0
bool dumpTree ( int32_t niceness ); //= MAX_NICENESS );
// . called when done saving a tree to disk (keys not ordered)
void doneSaving ( ) ;
bool dumpCollLoop ( ) ;
// . called when we've dumped the tree to disk w/ keys ordered
void doneDumping ( );
bool needsDump ( );
// these are used by Msg34 class for computing load on a machine
bool isMerging ( ) ;
bool isDumping ( ) { return m_dump.isDumping(); };
// PageRepair.cpp calls this when it is done rebuilding an rdb
// and wants to tell the primary rdb to reload itself using the newly
// rebuilt files, pointed to by rdb2.
bool updateToRebuildFiles ( Rdb *rdb2 , char *coll ) ;
//bool hasMergeFile ( ) { return m_hasMergeFile; };
// used for translating titledb file # 255 (as read from new tfndb)
// into the real file number
//int32_t getNewestFileNum ( ) { return m_numFiles - 1; };
// Msg22 needs the merge info so if the title file # of a read we are
// doing is being merged, we have to include the start merge file num
//int32_t getMergeStartFileNum ( ) { return m_mergeStartFileNum; };
//int32_t getMergeNumFiles ( ) { return m_numFilesToMerge; };
// used by Sync.cpp to convert a file name to a file number in m_files
//int32_t getFileNumFromName ( char *filename );
//void doneWrapper2 ( ) ;
//void doneWrapper4 ( ) ;
//int32_t m_x;
//int32_t m_a;
// keep a copy of these here so merge can use them to kick out
// records whose key when, ANDed w/ m_groupMask, equals
// m_groupId
//uint32_t m_groupMask;
//uint32_t m_groupId;
// . we try to minimize the number of files to minimize disk seeks
// . records that end up as not found will hit all these files
// . when we get "m_minToMerge" or more files a merge kicks in
// . TODO: merge should combine just the smaller files... kinda
// . files are sorted by fileId
// . older files are listed first (lower fileIds)
// . filenames should include the directory (full filenames)
// . TODO: RdbMgr should control what rdb gets merged?
//BigFile *m_files [ MAX_RDB_FILES ];
//int32_t m_fileIds [ MAX_RDB_FILES ];
//int32_t m_fileIds2 [ MAX_RDB_FILES ]; // for titledb/tfndb linking
//RdbMap *m_maps [ MAX_RDB_FILES ];
//int32_t m_numFiles;
// just put this into CollectionRec so we are not limited to MAX_COLLS
//class RdbBase *m_bases [ MAX_COLLS ];
//int32_t m_numBases;
bool m_dedup;
int32_t m_fixedDataSize;
//Dir m_dir;
char m_dbname [32];
int32_t m_dbnameLen;
bool m_isCollectionLess;
// for g_cachedb, g_statsdb, etc.
RdbBase *m_collectionlessBase;
//RdbCache m_cache;
// for storing records in memory
RdbTree m_tree;
RdbBuckets m_buckets;
bool m_useTree;
// for dumping a table to an rdb file
RdbDump m_dump;
// memory for us to use to avoid calling malloc()/mdup()/...
RdbMem m_mem;
int32_t m_cacheLastTime;
int64_t m_cacheLastTotal;
bool m_inAddList;
int32_t m_numMergesOut;
// . this is now static in Rdb.cpp
// . for merging many rdb files into one
// . no we brought it back so tfndb can merge while titledb is merging
//RdbMerge m_merge;
BigFile m_saveFile; // for saving the tree
bool m_isClosing;
bool m_isClosed;
bool m_haveSavedFile; // we only unlink this file when we dump
bool m_preloadCache;
bool m_biasDiskPageCache;
// this callback called when close is complete
void *m_closeState;
void (* m_closeCallback) (void *state );
int32_t m_maxTreeMem ; // max mem tree can use, dump at 90% of this
int32_t m_minToMerge; // need at least this many files b4 merging
int32_t m_numFilesToMerge ;
int32_t m_mergeStartFileNum ;
int32_t m_dumpErrno;
// a dummy data string for deleting records when m_fixedDataSize > 0
char *m_dummy;
int32_t m_dummySize ; // size of that dummy data
int32_t m_delRecSize; // size of the whole delete record
// for keeping stats
int64_t m_numSeeks;
int64_t m_numReSeeks;
int64_t m_numRead;
// network request/reply info for get requests
int64_t m_numReqsGet ;
int64_t m_numNetReadGet ;
int64_t m_numRepliesGet ;
int64_t m_numNetSentGet ;
// network request/reply info for add requests
int64_t m_numReqsAdd ;
int64_t m_numNetReadAdd ;
int64_t m_numRepliesAdd ;
int64_t m_numNetSentAdd ;
// should our next merge in waiting force itself?
bool m_nextMergeForced;
// do we need to dump to disk?
//bool m_needsSave;
// . when we dump list to an rdb file, can we use int16_t keys?
// . currently exclusively used by indexdb
bool m_useHalfKeys;
// are we waiting on another merge/dump to complete before our turn?
bool m_inWaiting;
// . is our merge urgent? (if so, it will starve spider disk reads)
// . also see Threads.cpp for the starvation
// . this is now exclusively in RdbBase.h
//bool m_mergeUrgent;
// are we saving the tree urgently? like we cored...
bool m_urgent;
// after saving the tree in call to Rdb::close() should the tree
// remain closed to writes?
bool m_isReallyClosing;
bool m_niceness;
//bool m_waitingForTokenForDump ;
//bool m_waitingForTokenForMerge;
// we now determine when in merge mode
//bool m_isMerging;
// have we create the merge file?
//bool m_hasMergeFile;
// rec counts for files being merged
//int64_t m_numPos ;
//int64_t m_numNeg ;
// so only one save thread launches at a time
bool m_isSaving;
//class DiskPageCache *m_pc;
bool m_isTitledb;
bool m_isUnlinking;
int32_t m_fn;
// filename of merge file for passing to g_sync to unlink it from there
char m_oldname [ 256 ];
char m_treeName [64];
char m_memName [64];
BigFile m_dummyFile;
int64_t m_lastWrite;
collnum_t m_dumpCollnum;
char m_registered;
int64_t m_lastTime;
// set to true when dumping tree so RdbMem does not use the memory
// being dumped to hold newly added records
char m_inDumpLoop;
char m_rdbId;
char m_ks; // key size
int32_t m_pageSize;
bool m_initialized;
int8_t m_gbcounteventsTermId[8];
// timedb support
time_t m_nowGlobal;
class HashTableX *m_sortByDateTablePtr;
// used for deduping spiderdb tree
Msg5 m_msg5;
//extern RdbCache g_forcedCache;
//extern RdbCache g_alreadyAddedCache;