mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 20:27:43 +03:00
127 lines
4.1 KiB
C
127 lines
4.1 KiB
C
|
// Matt Wells, copyright Jun 2001
|
||
|
|
||
|
// . IndexList is a list of keys
|
||
|
// . some keys are 12 bytes, some are 6 bytes (compressed)
|
||
|
// . see Indexdb.h for format of the keys
|
||
|
|
||
|
// . you can set from a TitleRec/SiteRec pair
|
||
|
// . you can set from a TermTable
|
||
|
// . we use this class to generate the final indexList for a parsed document
|
||
|
// . we have to do some #include's in the .cpp cuz the TitleRec contains an
|
||
|
// IndexList for holding indexed link Text
|
||
|
// . the TermTable has an addIndexList() function
|
||
|
// . we map 32bit scores from a TermTable to 8bits scores by taking the log
|
||
|
// of the score if it's >= 128*256, otherwise, we keep it as is
|
||
|
|
||
|
|
||
|
// override all funcs in RdbList where m_useShortKeys is true...
|
||
|
// skipCurrentRec() etc need to use m_useHalfKeys in RdbList cuz
|
||
|
// that is needed by many generic routines, like merge_r, RdbMap, Msg1, Msg22..
|
||
|
// We would need to make it a virtual function which would slow things down...
|
||
|
// or make those classes have specialized functions for IndexLists... in
|
||
|
// addition to the RdbLists they already support
|
||
|
|
||
|
#ifndef _INDEXLIST_H_
|
||
|
#define _INDEXLIST_H_
|
||
|
|
||
|
#include "RdbList.h"
|
||
|
//#include "SiteRec.h"
|
||
|
//#include "TermTable.h" // for setting from TitleRec/SiteRec
|
||
|
#include "Indexdb.h" // g_indexdb.makeKey()
|
||
|
|
||
|
class IndexList : public RdbList {
|
||
|
|
||
|
public:
|
||
|
|
||
|
// why do i have to repeat this for LinkInfo::set() calling our set()??
|
||
|
void set ( char *list , long listSize , bool ownData ) {
|
||
|
RdbList::set ( list ,
|
||
|
listSize ,
|
||
|
list , // alloc
|
||
|
listSize , // alloc size
|
||
|
0 , // fixed data size
|
||
|
ownData ,
|
||
|
true , // use half keys?
|
||
|
sizeof(key_t));// 12 bytes per key
|
||
|
};
|
||
|
|
||
|
// . set from a termtable and old IndexList (can be NULL)
|
||
|
// . oldList is subtracted from this list
|
||
|
/*
|
||
|
bool set ( class TermTable *table ,
|
||
|
long long docId ,
|
||
|
class IndexList *oldList ,
|
||
|
class IndexList *newDateList ,
|
||
|
long newDate ,
|
||
|
class IndexList *oldDateList ,
|
||
|
class Sections *newSections ,
|
||
|
class Sections *oldSections ,
|
||
|
unsigned long long *chksum1Ptr , // = NULL,
|
||
|
long niceness ); // = 2);
|
||
|
bool subtract ( TermTable *ourTable , class IndexList *oldList1 );
|
||
|
*/
|
||
|
|
||
|
|
||
|
// clear the low bits on the keys so terms are DELETED
|
||
|
void clearDelBits ( );
|
||
|
|
||
|
void print();
|
||
|
|
||
|
//unsigned char score32to8 ( unsigned long score ) ;
|
||
|
//static unsigned long score8to32(unsigned char score8);
|
||
|
|
||
|
// . these are made for special IndexLists, too
|
||
|
// . getTermId() assumes as 12 byte key
|
||
|
long long getCurrentTermId12 ( ) {
|
||
|
return getTermId12 ( m_listPtr ); };
|
||
|
long long getTermId12 ( char *rec ) {
|
||
|
return (*(unsigned long long *)(&rec[4])) >> 16 ;
|
||
|
};
|
||
|
long long getTermId16 ( char *rec ) {
|
||
|
return (*(unsigned long long *)(&rec[8])) >> 16 ;
|
||
|
};
|
||
|
//long long getTermId12 ( char *rec ) {
|
||
|
// return ((long long)(*(unsigned long *)(m_listPtrHi+2))<<14) |
|
||
|
// ((*(unsigned short *)(m_listPtrHi))>>2) ;
|
||
|
//};
|
||
|
// these 2 assume 12 and 6 byte keys respectively
|
||
|
long long getCurrentDocId () {
|
||
|
if ( isHalfBitOn ( m_listPtr ) ) return getDocId6 (m_listPtr);
|
||
|
else return getDocId12(m_listPtr);
|
||
|
};
|
||
|
long long getDocId ( char *rec ) {
|
||
|
if ( isHalfBitOn ( rec ) ) return getDocId6 (rec);
|
||
|
else return getDocId12(rec);
|
||
|
};
|
||
|
long long getCurrentDocId12 ( ) {
|
||
|
return getDocId12 ( m_listPtr ); };
|
||
|
long long getDocId12 ( char *rec ) {
|
||
|
return ((*(unsigned long long *)(rec)) >> 2) & DOCID_MASK; };
|
||
|
//long long getDocId12 ( char *rec ) {
|
||
|
// ((*(unsigned long *)rec)>>10) |
|
||
|
// (((long long)(*(unsigned short *)(rec+4)))<<22);
|
||
|
//};
|
||
|
long long getDocId6 ( char *rec ) {
|
||
|
long long docid;
|
||
|
*(long *)(&docid) = *(long *)rec;
|
||
|
((char *)&docid)[4] = rec[4];
|
||
|
docid >>= 2;
|
||
|
return docid & DOCID_MASK;
|
||
|
};
|
||
|
// this works with either 12 or 6 byte keys
|
||
|
unsigned char getCurrentScore ( ) {
|
||
|
return getScore(m_listPtr); };
|
||
|
unsigned char getScore ( char *rec ) { return ~rec[5]; };
|
||
|
|
||
|
// uncomplemented...
|
||
|
void setScore ( char *rec , char score ) { rec[5] = score; };
|
||
|
|
||
|
// for date lists only...
|
||
|
long getCurrentDate ( ) { return ~*(long *)(m_listPtr+6); };
|
||
|
};
|
||
|
|
||
|
#endif
|
||
|
|
||
|
|
||
|
|