open-source-search-engine/RdbMap.h
Matt Wells 4e803210ee tons of changes from live github on neo.
lots of core fixes.
took out ppthtml powerpoint convert, it hangs.
dynamic rdbmap to save memory per coll.
fixed disk page cache logic and brought it
back.
2014-01-17 21:01:43 -08:00

397 lines
14 KiB
C++

// Copyright Sep 2000 Matt Wells
// . an array of key/pageOffset pairs indexed by disk page number
// . slots in Slot (on disk) must be sorted by key from smallest to largest
// . used for quick btree lookup of a key to find the page where that slot
// resides on disk
// . disk page size is system's disk page size (8k) for best performance
// . TODO: split up map into several arrays so like the first 1meg of
// map can be easily freed like during a merge
// . TODO: use a getKey(),getOffset(),getDataSize() to make this easier to do
#ifndef _RDBMAP_H_
#define _RDBMAP_H_
#include "BigFile.h"
#include "RdbList.h"
// . this can be increased to provide greater disk coverage but it will
// increase delays because each seek will have to read more
// . 8k of disk corresponds to 18 bytes of map
// . 8megs of disk needs 18k of map (w/ dataSize)
// . 8megs of disk needs 14k of map (w/o dataSize)
// . a 32gig index would need 14megs*4 = 56 megs of map!!!
// . then merging would mean we'd need twice that, 112 megs of map in memory
// unless we dumped the map to disk periodically
// . 256 megs per machine would be excellent, but costly?
// . we need 66 megabytes of mem for every 80-gigs of index (actually 40gigs
// considering half the space is for merging)
// . PAGE_SIZE is often called the block size
// . a page or block is read in "IDE Block Mode" by the drive
// . it's the amount of disk that can be read with one i/o (interrupt)
// . try to make sure PAGE_SIZE matches your "multiple sector count"
// . use hdparm to configure (hdparm -m16 /dev/hda) will set it to 8k since
// each sector is 512bytes
// . hdparm -u1 -X66 -d1 -c3 -m16 /dev/hda is pretty agressive
// . actually "block size" in context of the file system can be 1024,... 4096
// on ext2fs ... set it as high as possible since we have very large files
// and want to avoid external fragmentation for the fastest reading/writing
// . we now set it to 16k to make map smaller in memory
// . NOTE: 80gigs of disk is 80,000,000,000 bytes NOT 80*1024*1024*1024
// . mapping 80 gigs should now take 80G/(16k) = 4.8 million pages
// . 4.8 million pages at 16 bytes a page is 74.5 megs of memory
// . mapping 320 gigs, at 8k pages is 686 megabytes of RAM (w/ crc)
// . mapping 320 gigs, at 16k pages is half that
// . mapping 640 gigs, at 16k pages is 686 megabytes of RAM (w/ crc)
// . mapping 640 gigs, at 32k pages is 343 megabytes of RAM (w/ crc)
#define GB_INDEXDB_PAGE_SIZE (32*1024)
#define GB_TFNDB_PAGE_SIZE ( 1*1024)
//#define PAGE_SIZE (16*1024)
//#define PAGE_SIZE (8*1024)
// . I define a segment to be a group of pages
// . I use 2k pages per segment
// . each page represents m_pageSize bytes on disk
// . the BigFile's MAX_PART_SIZE should be evenly divisible by PAGES_PER_SEG
// . that way, when a part file is removed we can remove an even amount of
// segments (we chop the leading Files of a BigFile during merges)
#define PAGES_PER_SEGMENT (2*1024)
#define PAGES_PER_SEG (PAGES_PER_SEGMENT)
// MAX_SEGMENTS of 16*1024 allows for 32 million pages = 256gigs of disk data
//#define MAX_SEGMENTS (16*1024)
class RdbMap {
public:
RdbMap ();
~RdbMap ();
// . does not write data to disk
// . frees all
void reset ( );
// set the filename, and if it's fixed data size or not
void set ( char *dir , char *mapFilename,
//long fixedDataSize , bool useHalfKeys );
long fixedDataSize , bool useHalfKeys , char keySize ,
long pageSize );
bool rename ( char *newMapFilename ) {
return m_file.rename ( newMapFilename ); };
bool rename ( char *newMapFilename ,
void (* callback)(void *state) , void *state ) {
return m_file.rename ( newMapFilename , callback , state ); };
char *getFilename ( ) { return m_file.getFilename(); };
BigFile *getFile ( ) { return &m_file; };
// . writes the map to disk if any slot was added
// . returns false if File::close() returns false
// . should free up all mem
// . resets m_numPages and m_maxNumPages to 0
// . a file's version of the popular reset() function
// . if it's urgent we do not call mfree()
bool close ( bool urgent );
// . we store the fixed dataSize in the map file
// . if it's -1 then each record's data is of variable size
long getFixedDataSize() { return m_fixedDataSize; };
// . this is called automatically when close() is called
// . however, we may wish to call it externally to ensure no data loss
// . return false if any write failes
// . returns true when done dumping m_keys and m_offsets to file
// . write out the m_keys and m_offsets arrays
// . this is totally MTUnsafe
// . don't be calling addRecord with this is dumping
// . flushes when done
bool writeMap ( );
bool writeMap2 ( );
long long writeSegment ( long segment , long long offset );
// . calls addRecord() for each record in the list
// . returns false and sets errno on error
// . TODO: implement transactional rollback feature
bool addList ( RdbList *list );
bool prealloc ( RdbList *list );
// . like above but faster
// . just for adding data-less keys
// . NOTE: disabled until it works correctly
// bool addKey ( key_t &key );
// get the number of non-deleted records in the data file we map
long long getNumPositiveRecs ( ) { return m_numPositiveRecs; };
// get the number of "delete" records in the data file we map
long long getNumNegativeRecs ( ) { return m_numNegativeRecs; };
// total
long long getNumRecs ( ) { return m_numPositiveRecs +
m_numNegativeRecs; };
// get the size of the file we are mapping
long long getFileSize () { return m_offset; };
// . gets total size of all recs in this page range
// . if subtract is true we subtract the sizes of pages that begin
// with a delete key (low bit is clear)
long long getRecSizes ( long startPage ,
long endPage ,
bool subtract );
// like above, but recSizes is guaranteed to be in [startKey,endKey]
long long getMinRecSizes ( long sp ,
long ep ,
//key_t startKey ,
//key_t endKey ,
char *startKey ,
char *endKey ,
bool subtract );
// like above, but sets an upper bound for recs in [startKey,endKey]
long long getMaxRecSizes ( long sp ,
long ep ,
//key_t startKey ,
//key_t endKey ,
char *startKey ,
char *endKey ,
bool subtract );
// get a key range from a page range
void getKeyRange ( long startPage , long endPage ,
//key_t *minKey , key_t *maxKey );
char *minKey , char *maxKey );
// . get a page range from a key range
// . returns false if no records exist in that key range
// . maxKey will be sampled under "oldTruncationLimit" so you
// can increase the trunc limit w/o messing up Indexdb::getTermFreq()
//bool getPageRange ( key_t startKey , key_t endKey ,
bool getPageRange ( char *startKey , char *endKey ,
long *startPage , long *endPage ,
//key_t *maxKey ,
char *maxKey ,
long long oldTruncationLimit = -1 ) ;
// get the ending page so that [startPage,endPage] has ALL the recs
// whose keys are in [startKey,endKey]
//long getEndPage ( long startPage , key_t endKey );
long getEndPage ( long startPage , char *endKey );
// like above, but endPage may be smaller as long as we cover at least
// minRecSizes worth of records in [startKey,endKey]
//bool getPageRange ( key_t startKey , key_t endKey ,
//long minRecSizes ,
//long *startPage , long *endPage ) ;
// . offset of first key wholly on page # "page"
// . return length of the whole mapped file if "page" > m_numPages
// . use m_offset as the size of the file that we're mapping
long long getAbsoluteOffset ( long page ) ;
// . the offset of a page after "page" that is a different key
// . returns m_offset if page >= m_numPages
long long getNextAbsoluteOffset ( long page ) ;
//key_t getLastKey ( ) { return m_lastKey; };
//char *getLastKey ( ) { return m_lastKey; };
void getLastKey ( char *key ) { KEYSET(key,m_lastKey,m_ks); };
// . these functions operate on one page
// . get the first key wholly on page # "page"
// . if page >= m_numPages use the lastKey in the file
//key_t getKey ( long page ) {
void getKey ( long page , char *k ) {
if ( page >= m_numPages ) {KEYSET(k,m_lastKey,m_ks);return;}
//return m_keys[page/PAGES_PER_SEG][page%PAGES_PER_SEG];
KEYSET(k,&m_keys[page/PAGES_PER_SEG][(page%PAGES_PER_SEG)*m_ks],m_ks);
return;
}
//const key_t *getKeyPtr ( long page ) {
char *getKeyPtr ( long page ) {
//if ( page >= m_numPages ) return &m_lastKey;
//if ( page >= m_numPages ) return m_lastKey;
if ( page >= m_numPages ) return m_lastKey;
return &m_keys[page/PAGES_PER_SEG][(page%PAGES_PER_SEG)*m_ks];
}
// return getKey ( page ); };
// if page >= m_numPages return 0
short getOffset ( long page ) {
if ( page >= m_numPages ) {
log(LOG_LOGIC,"RdbMap::getOffset: bad engineer");
return 0;
}
return m_offsets [page/PAGES_PER_SEG][page%PAGES_PER_SEG];
};
//void setKey ( long page , key_t &k ) {
void setKey ( long page , char *k ) {
//#ifdef _SANITYCHECK_
if ( page >= m_maxNumPages ) {
char *xx = NULL; *xx = 0;
log(LOG_LOGIC,"RdbMap::setKey: bad engineer");return; }
//#endif
//m_keys[page/PAGES_PER_SEG][page%PAGES_PER_SEG] = k; };
KEYSET(&m_keys[page/PAGES_PER_SEG][(page%PAGES_PER_SEG)*m_ks],
k,m_ks);
};
void setOffset ( long page , short offset ) {
m_offsets[page/PAGES_PER_SEG][page%PAGES_PER_SEG] = offset;};
// . total recSizes = positive + negative rec sizes
// . used to read all the recs in Msg3 and RdbScan
//long getRecSizes ( long page ) {
//return getRecSizes ( page , page + 1 ); };
// . returns true on success
// . returns false on i/o error.
// . calls allocMap() to get memory for m_keys/m_offsets
// . The format of the map on disk is described in Map.h
// . sets "m_numPages", "m_keys", and "m_offsets".
// . reads the keys and offsets into buffers allocated during open().
bool readMap ( BigFile *dataFile );
bool readMap2 ( );
long long readSegment ( long segment, long long offset, long fileSize);
// due to disk corruption keys or offsets can be out of order in map
bool verifyMap ( BigFile *dataFile );
bool verifyMap2 ( );
bool unlink ( ) { return m_file.unlink ( ); };
bool unlink ( void (* callback)(void *state) , void *state ) {
return m_file.unlink ( callback , state ); };
long getNumPages ( ) { return m_numPages; };
// . return first page #, "N", to read to get the record w/ this key
// if it exists
// . if m_keys[N] < startKey then m_keys[N+1] is > startKey
// . if m_keys[N] > startKey then all keys before m_keys[N] in the file
// are strictly less than "startKey" and "startKey" does not exist
// . if m_keys[N] > startKey then m_keys[N-1] spans multiple pages so
// that the key immediately after it on disk is in fact, m_keys[N]
//long getPage ( key_t startKey ) ;
long getPage ( char *startKey ) ;
// used in Rdb class before calling setMapSize
//long setMapSizeFromFile ( long fileSize ) ;
// . call this before calling addList() or addRecord()
// . returns false if realloc had problems
// . sets m_maxNumPages to maxNumPages if successfull
// . used to grow the map, too
//bool setMapSize ( long maxNumPages );
bool addSegmentPtr ( long n ) ;
// called by setMapSize() to increase the # of segments
bool addSegment ( ) ;
// . remove and bury (shift over) all segments below the one that
// contains page # "pageNum"
// . used by RdbMerge when unlinking part files
// . returns false and sets errno on error
// . the first "fileSize" bytes of the BigFile was chopped off
// . we must remove our segments
bool chopHead (long fileSize );
// how much mem is being used by this map?
long long getMemAlloced ();
// . attempts to auto-generate from data file, f
// . returns false and sets g_errno on error
bool generateMap ( BigFile *f ) ;
// . add a slot to the map
// . returns false if map size would be exceed by adding this slot
bool addRecord ( char *key, char *rec , long recSize );
bool addRecord ( key_t &key, char *rec , long recSize ) {
return addRecord((char *)&key,rec,recSize);};
bool truncateFile ( BigFile *f ) ;
private:
// specialized routine for adding a list to an indexdb map
bool addIndexList ( class IndexList *list ) ;
void printMap ();
// the map file
BigFile m_file;
// . we divide the map up into segments now
// . this facilitates merges so one map can shrink while another grows
// . these 3 arrays define the map
// . see explanation at top of this file for map description
// . IMPORTANT: if growing m_pageSize might need to change m_offsets
// from short to long
//key_t *m_keys [ MAX_SEGMENTS ];
//char *m_keys [ MAX_SEGMENTS ];
char **m_keys;
long m_numSegmentPtrs;
//key96_t **m_keys96; // set to m_keys
//key128_t **m_keys128; // set to m_keys
//short *m_offsets [ MAX_SEGMENTS ];
short **m_offsets;
long m_numSegmentOffs;
// number of valid pages in the map.
long m_numPages;
// . size of m_keys, m_offsets arrays
// . not all slots are used, however
// . this is sum of all pages in all segments
long m_maxNumPages;
// each segment holds PAGES_PER_SEGMENT pages of info
long m_numSegments;
// is the rdb file's dataSize fixed? -1 means it's not.
long m_fixedDataSize;
// . to keep track of disk offsets of added records
// . if this is > 0 we know a key was added to map so we should call
// writeMap() on close or destroy
// . NOTE: also used as the file size of the file we're mapping
long long m_offset;
// we keep global tallies on the number of non-deleted records
// and deleted records
long long m_numPositiveRecs;
long long m_numNegativeRecs;
// . the last key in the file itself
// . getKey(pageNum) returns this when pageNum == m_numPages
// . used by Msg3::getSmallestEndKey()
//key_t m_lastKey;
char m_lastKey[MAX_KEY_BYTES];
// when close is called, must we write the map?
bool m_needToWrite;
// when a BigFile gets chopped, keep up a start offset for it
long long m_fileStartOffset;
// are we mapping a data file that supports 6-byte keys?
bool m_useHalfKeys;
char m_ks;
bool m_generatingMap;
long m_pageSize;
long m_pageSizeBits;
long m_lastLogTime ;
long long m_badKeys ;
bool m_needVerify ;
};
#endif