open-source-search-engine/RdbMap.h
Matt adcef39376 Merge branch 'diffbot-testing' into diffbot-matt
Conflicts:
	Collectiondb.cpp
	Collectiondb.h
	Conf.cpp
	Conf.h
	Msg39.cpp
	PageEvents.cpp
	PageResults.cpp
	PageTurk.cpp
	Pages.cpp
	Parms.cpp
	Posdb.cpp
	Proxy.cpp
	Query.cpp
	Query.h
	RdbBase.cpp
	RdbMap.cpp
	Repair.cpp
	Repair.h
	SafeBuf.cpp
	Spider.cpp
	Tagdb.cpp
	TopTree.cpp
	XmlDoc.cpp
	main.cpp
2014-11-20 16:53:07 -08:00

401 lines
15 KiB
C++

// Copyright Sep 2000 Matt Wells
// . an array of key/pageOffset pairs indexed by disk page number
// . slots in Slot (on disk) must be sorted by key from smallest to largest
// . used for quick btree lookup of a key to find the page where that slot
// resides on disk
// . disk page size is system's disk page size (8k) for best performance
// . TODO: split up map into several arrays so like the first 1meg of
// map can be easily freed like during a merge
// . TODO: use a getKey(),getOffset(),getDataSize() to make this easier to do
#ifndef _RDBMAP_H_
#define _RDBMAP_H_
#include "BigFile.h"
#include "RdbList.h"
// . this can be increased to provide greater disk coverage but it will
// increase delays because each seek will have to read more
// . 8k of disk corresponds to 18 bytes of map
// . 8megs of disk needs 18k of map (w/ dataSize)
// . 8megs of disk needs 14k of map (w/o dataSize)
// . a 32gig index would need 14megs*4 = 56 megs of map!!!
// . then merging would mean we'd need twice that, 112 megs of map in memory
// unless we dumped the map to disk periodically
// . 256 megs per machine would be excellent, but costly?
// . we need 66 megabytes of mem for every 80-gigs of index (actually 40gigs
// considering half the space is for merging)
// . PAGE_SIZE is often called the block size
// . a page or block is read in "IDE Block Mode" by the drive
// . it's the amount of disk that can be read with one i/o (interrupt)
// . try to make sure PAGE_SIZE matches your "multiple sector count"
// . use hdparm to configure (hdparm -m16 /dev/hda) will set it to 8k since
// each sector is 512bytes
// . hdparm -u1 -X66 -d1 -c3 -m16 /dev/hda is pretty agressive
// . actually "block size" in context of the file system can be 1024,... 4096
// on ext2fs ... set it as high as possible since we have very large files
// and want to avoid external fragmentation for the fastest reading/writing
// . we now set it to 16k to make map smaller in memory
// . NOTE: 80gigs of disk is 80,000,000,000 bytes NOT 80*1024*1024*1024
// . mapping 80 gigs should now take 80G/(16k) = 4.8 million pages
// . 4.8 million pages at 16 bytes a page is 74.5 megs of memory
// . mapping 320 gigs, at 8k pages is 686 megabytes of RAM (w/ crc)
// . mapping 320 gigs, at 16k pages is half that
// . mapping 640 gigs, at 16k pages is 686 megabytes of RAM (w/ crc)
// . mapping 640 gigs, at 32k pages is 343 megabytes of RAM (w/ crc)
#define GB_INDEXDB_PAGE_SIZE (32*1024)
#define GB_TFNDB_PAGE_SIZE ( 1*1024)
//#define PAGE_SIZE (16*1024)
//#define PAGE_SIZE (8*1024)
// . I define a segment to be a group of pages
// . I use 2k pages per segment
// . each page represents m_pageSize bytes on disk
// . the BigFile's MAX_PART_SIZE should be evenly divisible by PAGES_PER_SEG
// . that way, when a part file is removed we can remove an even amount of
// segments (we chop the leading Files of a BigFile during merges)
#define PAGES_PER_SEGMENT (2*1024)
#define PAGES_PER_SEG (PAGES_PER_SEGMENT)
// MAX_SEGMENTS of 16*1024 allows for 32 million pages = 256gigs of disk data
//#define MAX_SEGMENTS (16*1024)
class RdbMap {
public:
RdbMap ();
~RdbMap ();
// . does not write data to disk
// . frees all
void reset ( );
// set the filename, and if it's fixed data size or not
void set ( char *dir , char *mapFilename,
//int32_t fixedDataSize , bool useHalfKeys );
int32_t fixedDataSize , bool useHalfKeys , char keySize ,
int32_t pageSize );
bool rename ( char *newMapFilename ) {
return m_file.rename ( newMapFilename ); };
bool rename ( char *newMapFilename ,
void (* callback)(void *state) , void *state ) {
return m_file.rename ( newMapFilename , callback , state ); };
char *getFilename ( ) { return m_file.getFilename(); };
BigFile *getFile ( ) { return &m_file; };
// . writes the map to disk if any slot was added
// . returns false if File::close() returns false
// . should free up all mem
// . resets m_numPages and m_maxNumPages to 0
// . a file's version of the popular reset() function
// . if it's urgent we do not call mfree()
bool close ( bool urgent );
// . we store the fixed dataSize in the map file
// . if it's -1 then each record's data is of variable size
int32_t getFixedDataSize() { return m_fixedDataSize; };
void reduceMemFootPrint();
// . this is called automatically when close() is called
// . however, we may wish to call it externally to ensure no data loss
// . return false if any write failes
// . returns true when done dumping m_keys and m_offsets to file
// . write out the m_keys and m_offsets arrays
// . this is totally MTUnsafe
// . don't be calling addRecord with this is dumping
// . flushes when done
bool writeMap ( );
bool writeMap2 ( );
int64_t writeSegment ( int32_t segment , int64_t offset );
// . calls addRecord() for each record in the list
// . returns false and sets errno on error
// . TODO: implement transactional rollback feature
bool addList ( RdbList *list );
bool prealloc ( RdbList *list );
// . like above but faster
// . just for adding data-less keys
// . NOTE: disabled until it works correctly
// bool addKey ( key_t &key );
// get the number of non-deleted records in the data file we map
int64_t getNumPositiveRecs ( ) { return m_numPositiveRecs; };
// get the number of "delete" records in the data file we map
int64_t getNumNegativeRecs ( ) { return m_numNegativeRecs; };
// total
int64_t getNumRecs ( ) { return m_numPositiveRecs +
m_numNegativeRecs; };
// get the size of the file we are mapping
int64_t getFileSize () { return m_offset; };
// . gets total size of all recs in this page range
// . if subtract is true we subtract the sizes of pages that begin
// with a delete key (low bit is clear)
int64_t getRecSizes ( int32_t startPage ,
int32_t endPage ,
bool subtract );
// like above, but recSizes is guaranteed to be in [startKey,endKey]
int64_t getMinRecSizes ( int32_t sp ,
int32_t ep ,
//key_t startKey ,
//key_t endKey ,
char *startKey ,
char *endKey ,
bool subtract );
// like above, but sets an upper bound for recs in [startKey,endKey]
int64_t getMaxRecSizes ( int32_t sp ,
int32_t ep ,
//key_t startKey ,
//key_t endKey ,
char *startKey ,
char *endKey ,
bool subtract );
// get a key range from a page range
void getKeyRange ( int32_t startPage , int32_t endPage ,
//key_t *minKey , key_t *maxKey );
char *minKey , char *maxKey );
// . get a page range from a key range
// . returns false if no records exist in that key range
// . maxKey will be sampled under "oldTruncationLimit" so you
// can increase the trunc limit w/o messing up Indexdb::getTermFreq()
//bool getPageRange ( key_t startKey , key_t endKey ,
bool getPageRange ( char *startKey , char *endKey ,
int32_t *startPage , int32_t *endPage ,
//key_t *maxKey ,
char *maxKey ,
int64_t oldTruncationLimit = -1 ) ;
// get the ending page so that [startPage,endPage] has ALL the recs
// whose keys are in [startKey,endKey]
//int32_t getEndPage ( int32_t startPage , key_t endKey );
int32_t getEndPage ( int32_t startPage , char *endKey );
// like above, but endPage may be smaller as int32_t as we cover at least
// minRecSizes worth of records in [startKey,endKey]
//bool getPageRange ( key_t startKey , key_t endKey ,
//int32_t minRecSizes ,
//int32_t *startPage , int32_t *endPage ) ;
// . offset of first key wholly on page # "page"
// . return length of the whole mapped file if "page" > m_numPages
// . use m_offset as the size of the file that we're mapping
int64_t getAbsoluteOffset ( int32_t page ) ;
// . the offset of a page after "page" that is a different key
// . returns m_offset if page >= m_numPages
int64_t getNextAbsoluteOffset ( int32_t page ) ;
//key_t getLastKey ( ) { return m_lastKey; };
//char *getLastKey ( ) { return m_lastKey; };
void getLastKey ( char *key ) { KEYSET(key,m_lastKey,m_ks); };
// . these functions operate on one page
// . get the first key wholly on page # "page"
// . if page >= m_numPages use the lastKey in the file
//key_t getKey ( int32_t page ) {
void getKey ( int32_t page , char *k ) {
if ( page >= m_numPages ) {KEYSET(k,m_lastKey,m_ks);return;}
//return m_keys[page/PAGES_PER_SEG][page%PAGES_PER_SEG];
KEYSET(k,&m_keys[page/PAGES_PER_SEG][(page%PAGES_PER_SEG)*m_ks],m_ks);
return;
}
//const key_t *getKeyPtr ( int32_t page ) {
char *getKeyPtr ( int32_t page ) {
//if ( page >= m_numPages ) return &m_lastKey;
//if ( page >= m_numPages ) return m_lastKey;
if ( page >= m_numPages ) return m_lastKey;
return &m_keys[page/PAGES_PER_SEG][(page%PAGES_PER_SEG)*m_ks];
}
// return getKey ( page ); };
// if page >= m_numPages return 0
int16_t getOffset ( int32_t page ) {
if ( page >= m_numPages ) {
log(LOG_LOGIC,"RdbMap::getOffset: bad engineer");
return 0;
}
return m_offsets [page/PAGES_PER_SEG][page%PAGES_PER_SEG];
};
//void setKey ( int32_t page , key_t &k ) {
void setKey ( int32_t page , char *k ) {
//#ifdef GBSANITYCHECK
if ( page >= m_maxNumPages ) {
char *xx = NULL; *xx = 0;
log(LOG_LOGIC,"RdbMap::setKey: bad engineer");return; }
//#endif
//m_keys[page/PAGES_PER_SEG][page%PAGES_PER_SEG] = k; };
KEYSET(&m_keys[page/PAGES_PER_SEG][(page%PAGES_PER_SEG)*m_ks],
k,m_ks);
};
void setOffset ( int32_t page , int16_t offset ) {
m_offsets[page/PAGES_PER_SEG][page%PAGES_PER_SEG] = offset;};
// . total recSizes = positive + negative rec sizes
// . used to read all the recs in Msg3 and RdbScan
//int32_t getRecSizes ( int32_t page ) {
//return getRecSizes ( page , page + 1 ); };
// . returns true on success
// . returns false on i/o error.
// . calls allocMap() to get memory for m_keys/m_offsets
// . The format of the map on disk is described in Map.h
// . sets "m_numPages", "m_keys", and "m_offsets".
// . reads the keys and offsets into buffers allocated during open().
bool readMap ( BigFile *dataFile );
bool readMap2 ( );
int64_t readSegment ( int32_t segment, int64_t offset, int32_t fileSize);
// due to disk corruption keys or offsets can be out of order in map
bool verifyMap ( BigFile *dataFile );
bool verifyMap2 ( );
bool unlink ( ) { return m_file.unlink ( ); };
bool unlink ( void (* callback)(void *state) , void *state ) {
return m_file.unlink ( callback , state ); };
int32_t getNumPages ( ) { return m_numPages; };
// . return first page #, "N", to read to get the record w/ this key
// if it exists
// . if m_keys[N] < startKey then m_keys[N+1] is > startKey
// . if m_keys[N] > startKey then all keys before m_keys[N] in the file
// are strictly less than "startKey" and "startKey" does not exist
// . if m_keys[N] > startKey then m_keys[N-1] spans multiple pages so
// that the key immediately after it on disk is in fact, m_keys[N]
//int32_t getPage ( key_t startKey ) ;
int32_t getPage ( char *startKey ) ;
// used in Rdb class before calling setMapSize
//int32_t setMapSizeFromFile ( int32_t fileSize ) ;
// . call this before calling addList() or addRecord()
// . returns false if realloc had problems
// . sets m_maxNumPages to maxNumPages if successfull
// . used to grow the map, too
//bool setMapSize ( int32_t maxNumPages );
bool addSegmentPtr ( int32_t n ) ;
// called by setMapSize() to increase the # of segments
bool addSegment ( ) ;
// . remove and bury (shift over) all segments below the one that
// contains page # "pageNum"
// . used by RdbMerge when unlinking part files
// . returns false and sets errno on error
// . the first "fileSize" bytes of the BigFile was chopped off
// . we must remove our segments
bool chopHead (int32_t fileSize );
// how much mem is being used by this map?
int64_t getMemAlloced ();
// . attempts to auto-generate from data file, f
// . returns false and sets g_errno on error
bool generateMap ( BigFile *f ) ;
// . add a slot to the map
// . returns false if map size would be exceed by adding this slot
bool addRecord ( char *key, char *rec , int32_t recSize );
bool addRecord ( key_t &key, char *rec , int32_t recSize ) {
return addRecord((char *)&key,rec,recSize);};
bool truncateFile ( BigFile *f ) ;
private:
// specialized routine for adding a list to an indexdb map
bool addIndexList ( class IndexList *list ) ;
void printMap ();
// the map file
BigFile m_file;
// . we divide the map up into segments now
// . this facilitates merges so one map can shrink while another grows
// . these 3 arrays define the map
// . see explanation at top of this file for map description
// . IMPORTANT: if growing m_pageSize might need to change m_offsets
// from int16_t to int32_t
//key_t *m_keys [ MAX_SEGMENTS ];
//char *m_keys [ MAX_SEGMENTS ];
char **m_keys;
int32_t m_numSegmentPtrs;
//key96_t **m_keys96; // set to m_keys
//key128_t **m_keys128; // set to m_keys
//int16_t *m_offsets [ MAX_SEGMENTS ];
int16_t **m_offsets;
int32_t m_numSegmentOffs;
// number of valid pages in the map.
int32_t m_numPages;
// . size of m_keys, m_offsets arrays
// . not all slots are used, however
// . this is sum of all pages in all segments
int32_t m_maxNumPages;
// each segment holds PAGES_PER_SEGMENT pages of info
int32_t m_numSegments;
// is the rdb file's dataSize fixed? -1 means it's not.
int32_t m_fixedDataSize;
// . to keep track of disk offsets of added records
// . if this is > 0 we know a key was added to map so we should call
// writeMap() on close or destroy
// . NOTE: also used as the file size of the file we're mapping
int64_t m_offset;
long m_newPagesPerSegment;
// we keep global tallies on the number of non-deleted records
// and deleted records
int64_t m_numPositiveRecs;
int64_t m_numNegativeRecs;
// . the last key in the file itself
// . getKey(pageNum) returns this when pageNum == m_numPages
// . used by Msg3::getSmallestEndKey()
//key_t m_lastKey;
char m_lastKey[MAX_KEY_BYTES];
// when close is called, must we write the map?
bool m_needToWrite;
// when a BigFile gets chopped, keep up a start offset for it
int64_t m_fileStartOffset;
// are we mapping a data file that supports 6-byte keys?
bool m_useHalfKeys;
char m_ks;
bool m_generatingMap;
int32_t m_pageSize;
int32_t m_pageSizeBits;
int32_t m_lastLogTime ;
int64_t m_badKeys ;
bool m_needVerify ;
};
#endif