open-source-search-engine/Indexdb.cpp
2015-09-10 13:24:59 -06:00

511 lines
17 KiB
C++

#include "gb-include.h"
#include "Indexdb.h"
#include "Url.h"
#include "Clusterdb.h"
//#include "Checksumdb.h"
#include "Threads.h"
// a global class extern'd in .h file
Indexdb g_indexdb;
// for rebuilding indexdb
Indexdb g_indexdb2;
// resets rdb
void Indexdb::reset() {
m_rdb.reset();
//#ifdef SPLIT_INDEXDB
//if ( m_groupIdTable ) {
//if ( g_hostdb.m_indexSplits > 1 && m_groupIdTable ) {
// mfree(m_groupIdTable, m_groupIdTableSize, "Indexdb");
// m_groupIdTable = NULL;
// m_groupIdTableSize = 0;
//}
//#endif
}
//#include "DiskPageCache.h"
/*
bool Indexdb::setGroupIdTable ( ) {
// skip if not split
if ( g_hostdb.m_indexSplits <= 1 ) return true;
// . create the groupId table
m_numGroups = g_hostdb.getNumGroups();
//m_groupIdTableSize = m_numGroups*INDEXDB_SPLIT*sizeof(int32_t);
m_groupIdTableSize = m_numGroups*g_hostdb.m_indexSplits*sizeof(int32_t);
m_groupIdTable =(uint32_t*)mmalloc(m_groupIdTableSize, "Indexdb");
if ( ! m_groupIdTable ) {
g_errno = ENOMEM;
log ( "Could not allocate %"INT32" bytes for groupIdTable",
m_groupIdTableSize );
return false;
}
// . the groupId table with the lookup values
m_groupIdShift = 32;
int32_t x = m_numGroups;
while ( x != 1 ) {
x >>= 1;
m_groupIdShift--;
}
for ( int32_t i = 0; i < m_numGroups; i++ ) {
uint32_t groupId = g_hostdb.getGroupId(i);
groupId >>= m_groupIdShift;
if ( !g_conf.m_legacyIndexdbSplit ) {
//for ( int32_t s = 0; s < INDEXDB_SPLIT; s++ ) {
for ( int32_t s = 0; s < g_hostdb.m_indexSplits; s++ ) {
int32_t g = i + s;
while ( g >= m_numGroups ) g -= m_numGroups;
//int32_t x = groupId + ((g % INDEXDB_SPLIT) *
int32_t x = groupId + ((g%g_hostdb.m_indexSplits)*
m_numGroups);
m_groupIdTable[x] = g_hostdb.getGroupId(g);
}
}
else {
//for ( int32_t s = 0; s < INDEXDB_SPLIT; s++ ) {
for ( int32_t s = 0; s < g_hostdb.m_indexSplits; s++ ) {
int32_t g = i + s;
while ( g >= m_numGroups ) g -= m_numGroups;
m_groupIdTable[groupId+(m_numGroups*s)] =
g_hostdb.getGroupId(g);
}
}
}
return true;
}
*/
bool Indexdb::init ( ) {
// fake it for now
return true;
//if ( ! setGroupIdTable () ) return false;
// . what's max # of tree nodes?
// . each rec in tree is only 1 key (12 bytes)
// . but has 12 bytes of tree overhead (m_left/m_right/m_parents)
// . this is UNUSED for bin trees!!
int32_t nodeSize = (sizeof(key_t)+12+4) + sizeof(collnum_t);
int32_t maxTreeNodes = g_conf.m_indexdbMaxTreeMem / nodeSize ;
// . assume the average cached list is about 600 bytes
// . TODO: if we cache a lot of not founds (small lists), we won't have
// enough nodes!!
int32_t maxCacheNodes = g_conf.m_indexdbMaxCacheMem / 600;
//int32_t pageSize = GB_INDEXDB_PAGE_SIZE;
// we now use a disk page cache as opposed to the
// old rec cache. i am trying to do away with the Rdb::m_cache rec
// cache in favor of cleverly used disk page caches, because
// the rec caches are not real-time and get stale.
//int32_t pcmem = g_conf.m_indexdbMaxDiskPageCacheMem;
//pcmem = 0;
// make sure at least 30MB
//if ( pcmem < 30000000 ) pcmem = 30000000;
// keep this low if we are the tmp cluster, 30MB
//if ( g_hostdb.m_useTmpCluster && pcmem > 30000000 ) pcmem = 30000000;
// do not use any page cache if doing tmp cluster in order to
// prevent swapping
//if ( g_hostdb.m_useTmpCluster ) pcmem = 0;
// . init the page cache
// . MDW: "minimize disk seeks" not working otherwise i'd enable it!
// if ( ! m_pc.init ( "indexdb",
// RDB_INDEXDB,
// pcmem ,
// pageSize ))
// return log("db: Indexdb init failed.");
// . set our own internal rdb
// . max disk space for bin tree is same as maxTreeMem so that we
// must be able to fit all bins in memory
// . we do not want indexdb's bin tree to ever hit disk since we
// dump it to rdb files when it is 90% full (90% of bins in use)
if ( !m_rdb.init ( g_hostdb.m_dir ,
"indexdb" ,
true , // dedup same keys?
0 , // fixed data size
g_conf.m_indexdbMinFilesToMerge ,
g_conf.m_indexdbMaxTreeMem ,
maxTreeNodes ,
// now we balance so Sync.cpp can ordered huge lists
true , // balance tree?
g_conf.m_indexdbMaxCacheMem ,
maxCacheNodes ,
true , // use half keys?
false , // g_conf.m_indexdbSav
NULL))//&m_pc ) )
return false;
return true;
// validate indexdb
//return verify();
}
// init the rebuild/secondary rdb, used by PageRepair.cpp
bool Indexdb::init2 ( int32_t treeMem ) {
//if ( ! setGroupIdTable () ) return false;
// . what's max # of tree nodes?
// . each rec in tree is only 1 key (12 bytes)
// . but has 12 bytes of tree overhead (m_left/m_right/m_parents)
// . this is UNUSED for bin trees!!
int32_t nodeSize = (sizeof(key_t)+12+4) + sizeof(collnum_t);
int32_t maxTreeNodes = treeMem / nodeSize ;
// . set our own internal rdb
// . max disk space for bin tree is same as maxTreeMem so that we
// must be able to fit all bins in memory
// . we do not want indexdb's bin tree to ever hit disk since we
// dump it to rdb files when it is 90% full (90% of bins in use)
if ( ! m_rdb.init ( g_hostdb.m_dir ,
"indexdbRebuild" ,
true , // dedup same keys?
0 , // fixed data size
200 , // min files to merge
treeMem ,
maxTreeNodes ,
true , // balance tree?
0 , // MaxCacheMem ,
0 , // maxCacheNodes
true , // use half keys?
false , // indexdbSaveCache
NULL ) ) // s_pc
return false;
return true;
}
/*
bool Indexdb::addColl ( char *coll, bool doVerify ) {
if ( ! m_rdb.addColl ( coll ) ) return false;
if ( ! doVerify ) return true;
// verify
if ( verify(coll) ) return true;
// do a deep verify to figure out which files are corrupt
deepVerify ( coll );
// if not allowing scale, return false
if ( ! g_conf.m_allowScale ) return false;
// otherwise let it go
log ( "db: Verify failed, but scaling is allowed, passing." );
return true;
}
*/
bool Indexdb::verify ( char *coll ) {
return true;
log ( LOG_INFO, "db: Verifying Indexdb for coll %s...", coll );
g_threads.disableThreads();
Msg5 msg5;
Msg5 msg5b;
RdbList list;
key_t startKey;
key_t endKey;
startKey.setMin();
endKey.setMax();
//int32_t minRecSizes = 64000;
CollectionRec *cr = g_collectiondb.getRec(coll);
if ( ! msg5.getList ( RDB_INDEXDB ,
cr->m_collnum ,
&list ,
startKey ,
endKey ,
64000 , // minRecSizes ,
true , // includeTree ,
false , // add to cache?
0 , // max cache age
0 , // startFileNum ,
-1 , // numFiles ,
NULL , // state
NULL , // callback
0 , // niceness
false , // err correction?
NULL ,
0 ,
-1 ,
true ,
-1LL ,
&msg5b ,
true )) {
g_threads.enableThreads();
return log("db: HEY! it did not block");
}
int32_t count = 0;
int32_t got = 0;
bool printedKey = false;
bool printedZeroKey = false;
for ( list.resetListPtr() ; ! list.isExhausted() ;
list.skipCurrentRecord() ) {
key_t k = list.getCurrentKey();
count++;
//uint32_t groupId = k.n1 & g_hostdb.m_groupMask;
//uint32_t groupId = getGroupId ( RDB_INDEXDB , &k );
//if ( groupId == g_hostdb.m_groupId ) got++;
uint32_t shardNum = getShardNum( RDB_INDEXDB , &k );
if ( shardNum == getMyShardNum() ) got++;
else if ( !printedKey ) {
log ( "db: Found bad key in list (only printing once): "
"%"XINT32" %"XINT64"", k.n1, k.n0 );
printedKey = true;
}
if ( k.n1 == 0 && k.n0 == 0 ) {
if ( !printedZeroKey ) {
log ( "db: Found Zero key in list, passing. "
"(only printing once)." );
printedZeroKey = true;
}
if ( shardNum != getMyShardNum() )
got++;
}
}
if ( got != count ) {
log ("db: Out of first %"INT32" records in indexdb, only %"INT32" belong "
"to our group.",count,got);
// exit if NONE, we probably got the wrong data
if ( got == 0 ) log("db: Are you sure you have the "
"right "
"data in the right directory? "
"Exiting.");
log ( "db: Exiting due to Indexdb inconsistency." );
g_threads.enableThreads();
return g_conf.m_bypassValidation;
}
log ( LOG_INFO, "db: Indexdb passed verification successfully for %"INT32" "
"recs.", count );
// DONE
g_threads.enableThreads();
return true;
}
void Indexdb::deepVerify ( char *coll ) {
log ( LOG_INFO, "db: Deep Verifying Indexdb for coll %s...", coll );
g_threads.disableThreads();
Msg5 msg5;
Msg5 msg5b;
RdbList list;
key_t startKey;
key_t endKey;
startKey.setMin();
endKey.setMax();
//int32_t minRecSizes = 64000;
collnum_t collnum = g_collectiondb.getCollnum(coll);
RdbBase *rdbBase = g_indexdb.m_rdb.getBase(collnum);
int32_t numFiles = rdbBase->getNumFiles();
int32_t currentFile = 0;
CollectionRec *cr = g_collectiondb.getRec(coll);
deepLoop:
// done after scanning all files
if ( currentFile >= numFiles ) {
g_threads.enableThreads();
log ( LOG_INFO, "db: Finished deep verify for %"INT32" files.",
numFiles );
return;
}
// scan this file
if ( ! msg5.getList ( RDB_INDEXDB ,
cr->m_collnum ,
&list ,
startKey ,
endKey ,
64000 , // minRecSizes ,
true , // includeTree ,
false , // add to cache?
0 , // max cache age
currentFile , // startFileNum ,
1 , // numFiles ,
NULL , // state
NULL , // callback
0 , // niceness
false , // err correction?
NULL ,
0 ,
-1 ,
true ,
-1LL ,
&msg5b ,
false )) {
g_threads.enableThreads();
log("db: HEY! it did not block");
return;
}
int32_t count = 0;
int32_t got = 0;
for ( list.resetListPtr() ; ! list.isExhausted() ;
list.skipCurrentRecord() ) {
key_t k = list.getCurrentKey();
count++;
//uint32_t groupId = k.n1 & g_hostdb.m_groupMask;
//uint32_t groupId = getGroupId ( RDB_INDEXDB , &k );
//if ( groupId == g_hostdb.m_groupId ) got++;
uint32_t shardNum = getShardNum( RDB_INDEXDB , &k );
if ( shardNum == getMyShardNum() ) got++;
}
if ( got != count ) {
BigFile *f = rdbBase->getFile(currentFile);
log ("db: File %s: Out of first %"INT32" records in indexdb, "
"only %"INT32" belong to our group.",
f->getFilename(),count,got );
}
//else
// log ( LOG_INFO, "db: File %"INT32": Indexdb passed verification "
// "successfully for %"INT32" recs.",currentFile,count );
// next file
currentFile++;
goto deepLoop;
}
// . see Indexdb.h for format of the 12 byte key
// . TODO: substitute var ptrs if you want extra speed
key_t Indexdb::makeKey ( int64_t termId ,
unsigned char score ,
uint64_t docId ,
bool isDelKey ) {
// make sure we mask out the hi bits we do not use first
termId = termId & TERMID_MASK;
key_t key ;
char *kp = (char *)&key;
char *tp = (char *)&termId;
char *dp = (char *)&docId;
// store termid
*(int16_t *)(kp+10) = *(int16_t *)(tp+4);
*(int32_t *)(kp+ 6) = *(int32_t *)(tp );
// store the complement of the score
kp[5] = ~score;
// . store docid
// . make room for del bit and half bit
docId <<= 2;
*(int32_t *)(kp+1) = *(int32_t *)(dp+1);
kp[0] = dp[0];
// turn off half bit
kp[0] &= 0xfd;
// turn on/off delbit
if ( isDelKey ) kp[0] &= 0xfe;
else kp[0] |= 0x01;
// key is complete
return key;
}
// . accesses RdbMap to estimate size of the indexList for this termId
// . returns an UPPER BOUND
int64_t Indexdb::getTermFreq ( collnum_t collnum , int64_t termId ) {
// establish the list boundary keys
key_t startKey = makeStartKey ( termId );
key_t endKey = makeEndKey ( termId );
// . ask rdb for an upper bound on this list size
// . but actually, it will be somewhat of an estimate 'cuz of RdbTree
key_t maxKey;
// divide by 6 since indexdb's recs are 6 bytes each, except for first
int64_t maxRecs;
// . don't count more than these many in the map
// . that's our old truncation limit, the new stuff isn't as dense
int32_t oldTrunc = 100000;
// get maxKey for only the top "oldTruncLimit" docids because when
// we increase the trunc limit we screw up our extrapolation! BIG TIME!
maxRecs=m_rdb.getListSize(collnum,startKey,endKey,&maxKey,oldTrunc )/6;
// . TRUNCATION NOW OBSOLETE
return maxRecs;
// . is this termId truncated in this indexdb?
// . truncationLimit of Indexdb is max # of records for one termId
//if ( (int64_t)maxRecs < getTruncationLimit() ) return maxRecs;
// . no, i like to raise truncation limit on the fly, so if we
// still have that line above then nothing would seem to be
// truncated, would it?
// . so just, use a minimal truncation limit then
if ( maxRecs < MIN_TRUNC ) return maxRecs;
// this var is so we can adjust the # of recs lost due to truncation
int64_t numRecs = maxRecs ;
// . get last score we got
// . if it is > 1 then we probably got the 1's truncated off
unsigned char shy = g_indexdb.getScore ( maxKey );
int64_t lastDocId = g_indexdb.getDocId ( maxKey );
// . which page has first key with this score (shy)?
// . modify maxKey
key_t midKey = g_indexdb.makeKey ( termId , shy , 0LL , true );
// get # of recs that have this termId and score
int32_t lastChunk = m_rdb.getListSize(collnum,
midKey,endKey,&maxKey,oldTrunc)/ 6;
// now interpolate number of uncounted docids for the score "shy"
int32_t remaining = (((int64_t)lastChunk) * lastDocId) /
(int64_t)DOCID_MASK ;
// add in remaining # of docids from the score "shy"
numRecs += remaining;
// log it
log(LOG_DEBUG,"query: Adding %"INT32" (%"INT32") to score --> %"INT64".",
remaining,lastChunk,numRecs);
// . if we got a meta tag here, scores are MOSTLY the same
// and we should not interpolate based on score
// . if we got a meta tag scores are usually 33 or more
// . TODO: figure out a way to do this correctly
if ( shy > 20 ) shy = 0;
// debug msg
//log("endKey.n0=%"XINT64" startKey.n0=%"XINT64"", endKey.n0 , startKey.n0 );
//log("maxRecs=%"UINT64" maxKey.n0=%"XINT64" shy=%"INT32"",maxRecs,maxKey.n0,shy);
// don't loop forever
if ( shy == 0 ) shy = 1;
// . if last score is > 1 then interpolate just based on the score
// . a score of i has about 1.5 times the docids of a score of i+1
// . so if max score (255) has N docs, then we got
// TOTAL = N + Nx + Nxx + Nxxx + ... ( where x = 1.5)
// . therefore, if we lost the score of 1, we just multiply total
// docs for scores of 2 though 255 by 1.5 and add N, if N is small,
// which it is, don't bother adding it
// . unfortunately, if we increase the trunc limit we'll often
// quickly get lower scoring docids in as porous filler so "shy" will
// equal 1 and we won't extrapolate, because we won't know that
// a bunch of other docids are really missing
// . TODO: extrapolate based on last docid, too, not just score,
// that way we are way more continuous
// . FIX: now we use g_conf.m_oldTruncationLimit
while ( shy-- > 1 ) {
// this is exponential
numRecs = (numRecs * 1436LL /*1106*//*1500*/ ) / 1000LL ;
// only account for truncation by docid for the first round
//if ( numRecs == maxRecs ) {
// // make up for missed docids
// uint64_t d = g_indexdb.getDocId ( maxKey );
// toAdd = (toAdd * DOCID_MASK) / d;
//}
//numRecs += toAdd;
}
// log it
log(LOG_DEBUG,"query: Interpolated tf to %"INT64".", numRecs );
// debug msg
//log("numRecs=%"UINT64"",numRecs);
// . see PageRoot.cpp for explanation of this:
// . so starting with Lars we'll use checksumdb
//#ifdef _LARS_
//int64_t trecs = g_checksumdb.getRdb()->getNumGlobalRecs();
int64_t trecs = g_clusterdb.getRdb()->getNumGlobalRecs();
//#else
//int64_t trecs = g_clusterdb.getRdb()->getNumGlobalRecs() ;
//#endif
if ( numRecs > trecs ) numRecs = trecs;
// TODO: watch out for explosions! (if all scores are the same...)
if ( maxRecs > numRecs ) return maxRecs;
return numRecs;
}
// keys are stored from lowest to highest
key_t Indexdb::makeStartKey ( int64_t termId ) {
return makeKey ( termId , 255/*score*/ ,
0x0000000000000000LL/*docId*/ , true/*delKey?*/ );
}
key_t Indexdb::makeEndKey ( int64_t termId ) {
return makeKey ( termId , 0/*score*/ ,
0xffffffffffffffffLL/*docId*/ , false/*delKey?*/ );
}