// . Matt Wells, copyright Feb 2001

// . we use a big buffer, m_buf, into which we sequentially store records as 
//   they are added to the cache
// . each record we store in the big buffer has a header which consists
//   of a key_t, recordSize(int32_t), timestamp and the record data
// . the header is as follows:
//   a collnum_t (use sizeof(collnum_t)) that identifies the collection
//   a 12 byte key_t (actually, it is now m_cks bytes)
//   a 4 bytes timestamp (seconds since the epoch, like time_t)
//   a 4 bytes data size
//   the data
// . we have a hash table that maps a key_t to a ptr to a record header in the
//   big buffer
// . what if we run out of room in the hash table? we delete the oldest
//   records in the big buffer so we can remove their ptrs from the hash table
// . we keep an "tail" ptr into the hash table that point to the last
//   non-overwritten record in the big buffer
// . when we run out of room in the big buffer we wrap our ptr to the start
//   and we remove any records from the hashtable that we overwrite using
//   the tail ptr
// . when a record is read from the cache we also promote it by copying it to
//   the head of m_buf, m_bufOffset. since modern day pentiums do 2.5-6.4GB/s
//   this is ok, it will not be the bottleneck by far. if the record is
//   expired we do not promote it. The advantage is we don't have ANY memory
//   fragmentation and utilize 100% of the memory. Copying a 6 Megabyte
//   list takes like 2.4ms on a new pentium, so we should allow regular
//   allocating if the record size is 256k or more. Copying 256k only
//   takes .1 ms on the P4 2.60CGHz. This is on the TODO list.

#ifndef RDBCACHE_H
#define RDBCACHE_H

// . TODO:
// . if size of added rec is ABOVE this, then don't use our memory buffer
// . because we copy a rec to the head of memory buffer (m_bufOffset) every 
//   time that rec is accessed and doing that for big recs is more intensive
// . the idea is that allocating/freeing memory for smaller recs is what
//   causes the memory fragmentation
// . to stay under m_maxMem we should limit each memory buffer to 1M or so
//   and free the tailing memory buffer to make room for a large unbuffered rec
//#define MEM_LIMIT (256*1024)

#include <time.h>       // time_t
#include "Mem.h"        // g_mem.calloc()
#include "RdbList.h"

extern bool g_cacheWritesEnabled;

class RdbCache {

 public:

	friend class Rdb;

	// constructor & destructor
	 RdbCache();
	~RdbCache();

	void reset();

	// . this just clears the contents of the cache for a particular coll
	// . used by g_collectiondb.delRec() call to Rdb::delColl() to 
	//   clear out the collection's stuff in the cache
	void clear ( collnum_t collnum ) ;

	bool isInitialized () { 
		if ( m_ptrs ) return true; 
		return false;
	};

	// . we are allowed to keep a min mem of "minCacheSize"
	// . a fixedDataSize of -1 means the dataSize varies from rec to rec
	// . set "maxNumNodes" to -1 for it to be auto determined
	// . can only do this if fixedDataSize is not -1
	bool init ( int32_t maxCacheMem   , 
		    int32_t fixedDataSize , 
		    bool supportLists  ,
		    int32_t maxCacheNodes ,
		    bool useHalfKeys   ,
		    char *dbname       ,
		    //bool  loadFromDisk );
		    bool  loadFromDisk ,
		    char  cacheKeySize = 12 ,
		    char  dataKeySize  = 12 ,
		    int32_t  numPtrsMax   = -1 );

	// . a quick hack for SpiderCache.cpp
	// . if your record is always a 4 byte int32_t call this
	// . returns -1 if not found, so don't store -1 in there then
	int64_t getLongLong ( collnum_t collnum ,
				uint32_t key , int32_t maxAge , // in seconds
				bool promoteRecord );

	// this puts a int32_t in there
        void addLongLong ( collnum_t collnum ,
			   uint32_t key , int64_t value ,
			   char **retRecPtr = NULL ) ;

	// . both key and data are int64_ts here
	// . returns -1 if not found
	int64_t getLongLong2 ( collnum_t collnum ,
				 uint64_t key , 
				 int32_t maxAge , // in seconds
				 bool promoteRecord );

	// this puts a int32_t in there
        void addLongLong2 ( collnum_t collnum ,
			   uint64_t key , int64_t value ,
			   char **retRecPtr = NULL ) ;

	// same routines for int32_ts now, but key is a int64_t
	int32_t getLong ( collnum_t collnum ,
		       uint64_t key , int32_t maxAge , // in seconds
		       bool promoteRecord );
        void addLong ( collnum_t collnum ,
		       uint64_t key , int32_t value ,
		       char **retRecPtr = NULL ) ;

	// . returns true if found, false if not found in cache
	// . sets *rec and *recSize iff found
	// . sets *cachedTime to time the rec was cached
	// . use maxAge of -1 to have no limit to the age of cached rec
	bool getRecord ( collnum_t collnum   ,
			 //key_t    cacheKey   ,
			 char    *cacheKey   ,
			 char   **rec        ,
			 int32_t    *recSize    ,
			 bool     doCopy     ,
			 int32_t     maxAge     , // in seconds
			 bool     incCounts  ,
			 time_t  *cachedTime = NULL ,
			 bool     promoteRecord = true );

	bool getRecord ( char    *coll       ,
			 //key_t    cacheKey   ,
			 char    *cacheKey   ,
			 char   **rec        ,
			 int32_t    *recSize    ,
			 bool     doCopy     ,
			 int32_t     maxAge     , // in seconds
			 bool     incCounts  ,
			 time_t  *cachedTime = NULL ,
			 bool     promoteRecord = true);

	bool getRecord ( collnum_t collnum   ,
			 key_t    cacheKey   ,
			 char   **rec        ,
			 int32_t    *recSize    ,
			 bool     doCopy     ,
			 int32_t     maxAge     , // in seconds
			 bool     incCounts  ,
			 time_t  *cachedTime = NULL,
			 bool     promoteRecord = true) {
		return getRecord (collnum,(char *)&cacheKey,rec,recSize,doCopy,
				  maxAge,incCounts,cachedTime, promoteRecord);
	};

	bool getRecord ( char    *coll       ,
			 key_t    cacheKey   ,
			 char   **rec        ,
			 int32_t    *recSize    ,
			 bool     doCopy     ,
			 int32_t     maxAge     , // in seconds
			 bool     incCounts  ,
			 time_t  *cachedTime = NULL,
			 bool     promoteRecord = true) {
		return getRecord (coll,(char *)&cacheKey,rec,recSize,doCopy,
				  maxAge,incCounts,cachedTime, promoteRecord);
	};

	bool setTimeStamp ( collnum_t  collnum      ,
			    key_t      cacheKey     ,
			    int32_t       newTimeStamp ) {
		return setTimeStamp ( collnum           ,
				      (char *)&cacheKey ,
				      newTimeStamp      );
	};

	bool setTimeStamp ( collnum_t  collnum      ,
			    char      *cacheKey     ,
			    int32_t       newTimeStamp );

	// . returns true if found, false if not found
	// . sets errno no error
	// . if "copyRecords" is true then COPIES into a new buffer
	// . maxAge constraint for ignoring the stale nodes
	// . promotes the returned list to the head of the linked list
	// . maxAge of -1 means no maxAge
	// . maxAge of  0 means do not check the cache
	// . uses "startKey" to get the list
	// . if "incCounts" is true and we hit  we inc the hit  count
	// . if "incCounts" is true and we miss we inc the miss count
	bool getList ( collnum_t collnum  ,
		       //key_t    cacheKey  ,
		       //key_t    startKey  ,
		       char    *cacheKey  ,
		       char    *startKey  ,
		       RdbList *list      ,
		       bool     doCopy    ,
		       int32_t     maxAge    , // in seconds
		       bool     incCounts );

	// use this key for cache lookup of the list rather than form from 
	// startKey/endKey
	bool addList ( collnum_t collnum , char *cacheKey , RdbList *list );
	bool addList ( collnum_t collnum , key_t cacheKey , RdbList *list ) {
		return addList(collnum,(char *)&cacheKey,list); };

	bool addList ( char *coll , char *cacheKey , RdbList *list );
	bool addList ( char *coll , key_t cacheKey , RdbList *list ) {
		return addList(coll,(char *)&cacheKey,list); };

	// . add a list of only 1 record
	// . return false on error and set g_errno, otherwise return true
	// . recOffset is proper offset into the buffer system
	bool addRecord ( collnum_t collnum ,
			 //key_t cacheKey , 
			 char *cacheKey , 
			 char *rec      ,
			 int32_t  recSize  ,
			 int32_t  timestamp = 0 ,
			 char **retRecPtr = NULL ) ;

	bool addRecord ( char *coll     ,
			 //key_t cacheKey , 
			 char *cacheKey , 
			 char *rec      ,
			 int32_t  recSize  ,
			 int32_t  timestamp = 0 );

	bool addRecord ( collnum_t collnum ,
			 key_t cacheKey , 
			 char *rec      ,
			 int32_t  recSize  ,
			 int32_t  timestamp = 0 ) {
		return addRecord(collnum,(char *)&cacheKey,rec,recSize,
				 timestamp); };

	bool addRecord ( char *coll     ,
			 key_t cacheKey , 
			 char *rec      ,
			 int32_t  recSize  ,
			 int32_t  timestamp = 0 ) {
		return addRecord(coll,(char *)&cacheKey,rec,recSize,
				 timestamp); };

	void verify();

	// . just checks to see if a record is in the cache
	// . does not promote record
	// . used by Msg34.cpp for disk load balancing
	bool isInCache ( collnum_t collnum , char *cacheKey , int32_t maxAge );
	bool isInCache ( collnum_t collnum , key_t cacheKey , int32_t maxAge ) {
		return isInCache(collnum,(char *)&cacheKey,maxAge);};

	// these include our mem AND our tree's mem combined
	int32_t getMemOccupied () {
		return m_memOccupied ; };
	int32_t getMemAlloced  () {
		return m_memAlloced ;  };
	//int32_t getRecOverhead () {
	//	return 3*4 + m_tree.m_overhead; };
	int32_t getMaxMem      () { return m_maxMem; };

	//int32_t getBaseMem     () {
	//	return m_baseMem         + m_tree.m_baseMem; };
	// cache stats
	int64_t getNumHits   () { return m_numHits;   };
	int64_t getNumMisses () { return m_numMisses; };
	int64_t getHitBytes  () { return m_hitBytes; };
	int32_t getNumUsedNodes  () { return m_numPtrsUsed; };
	int32_t getNumTotalNodes () { return m_numPtrsMax ; };

	bool useDisk ( ) { return m_useDisk; };
	bool load ( char *dbname );
	bool save ( bool useThreads );
	bool save_r ( );
	bool save2_r ( int fd );
	void threadDone ( );
	bool load   ( );
	int32_t m_saveError;

	// called internally by save()
	bool saveSome_r ( int fd, int32_t *iptr , int32_t *off ) ;

	// remove a key range from the cache
	void removeKeyRange ( collnum_t collnum,
			      char *startKey,
			      char *endKey );

	char *getDbname () { return m_dbname ; };

	char *m_dbname;

	// private:

	bool addRecord ( collnum_t collnum ,
			 //key_t cacheKey , 
			 char *cacheKey , 
			 char *rec1     ,
			 int32_t  recSize1 ,
			 char *rec2     ,
			 int32_t  recSize2 ,
			 int32_t  timestamp ,
			 char **retRecPtr = NULL ) ;

	bool deleteRec ( );
	//void addKey     ( collnum_t collnum , key_t key , char *ptr ) ;
	//void removeKey  ( collnum_t collnum , key_t key , char *rec ) ;
	void addKey     ( collnum_t collnum , char *key , char *ptr ) ;
	void removeKey  ( collnum_t collnum , char *key , char *rec ) ;
	void markDeletedRecord(char *ptr);

	bool convertCache ( int32_t numPtrsMax , int32_t maxMem ) ;
	bool m_convert;
	int32_t m_convertNumPtrsMax;
	int32_t m_convertMaxMem;

	bool m_isSaving;

	// . mem stats -- just for arrays we contain -- not in tree
	// . memory that is allocated and in use, including dataSizes
	int32_t m_memOccupied;
	// total memory allocated including dataSizes of our records
	int32_t m_memAlloced;
	// allocated memory for m_next/m_prev/m_time arrays
	//int32_t m_baseMem;
	// don't let m_memAlloced exceed this
	int32_t  m_maxMem;

	// . data is stored in m_bufs, an array of buffers
	// . we may have to use multiple bufs because we cannot allocate more
	//   than 128 Megabytes without pthread_create() failing
	// . we can have up to 32 bufs of 128M each, that's 4 gigs, plenty
	char      *m_bufs     [32];
	int32_t       m_bufSizes [32]; // size of the alloc'd space
	int32_t       m_numBufs;
	int32_t  m_totalBufSize; // gbpwrite() assumes 32 bits
	int32_t       m_offset; // where next rec is stored
	int32_t       m_tail;   // next rec to delete

	// the hash table, buckets are ptrs into an m_bufs[i]
	char     **m_ptrs;
	int32_t       m_numPtrsMax;
	int32_t       m_numPtrsUsed;
	int32_t       m_threshold;
	// use this for testing to make sure cache doesn't fuck up the content
	//int32_t      *m_crcs;

	// cache hits and misses
	int64_t m_numHits; // includes partial hits & cached not-founds too
	//int64_t m_numPartialHits;
	int64_t m_numMisses;
	int64_t m_hitBytes;

	int32_t m_fixedDataSize;
	bool m_supportLists;
	bool m_useHalfKeys;
	bool m_useDisk;  // load/save from disk?

	// have we wrapped yet?
	int8_t m_wrapped;

	// keySize of cache keys in bytes
	char m_cks;

	// keysize of lists for addList() and getList()
	char m_dks;

	// count the add ops
	int64_t m_adds;
	int64_t m_deletes;

	char m_needsSave;

	char m_corruptionDetected;

	int64_t m_maxColls;
};	

#endif