mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 04:07:13 +03:00
fix a couple catdb generation bugs.
MAX_CATIDS violation causing corruption. not saving catdb tree to catdb-saved.dat causing missing catdb recs.
This commit is contained in:
parent
1d133e87c9
commit
3374ce450a
@ -198,6 +198,7 @@ bool CatRec::set ( Url *url , char *data , long dataSize , bool gotByIp ) {
|
||||
log ( "tagdb: Deserialized datasize %i != %li for url %s so "
|
||||
"ignoring tagdb record.",
|
||||
p - m_data, m_dataSize , url->getUrl() );
|
||||
return false;
|
||||
char *xx = NULL; *xx = 0;
|
||||
}
|
||||
|
||||
|
@ -62,7 +62,7 @@ bool Catdb::init ( ) {
|
||||
-1 , // fixed record size
|
||||
//g_hostdb.m_groupMask ,
|
||||
//g_hostdb.m_groupId ,
|
||||
g_conf.m_catdbMinFilesToMerge ,
|
||||
2,//g_conf.m_catdbMinFilesToMerge ,
|
||||
treeMem ,//g_conf.m_catdbMaxTreeMem ,
|
||||
maxTreeNodes ,
|
||||
// now we balance so Sync.cpp can ordered huge list
|
||||
@ -74,7 +74,7 @@ bool Catdb::init ( ) {
|
||||
&m_pc ,
|
||||
false,
|
||||
false,
|
||||
12,
|
||||
12, // keysize
|
||||
false,
|
||||
true )) // is collectionless?
|
||||
return false;
|
||||
|
@ -26,7 +26,7 @@
|
||||
#define MAX_TAG_LEN 127
|
||||
#define MAX_URL_CATIDS 64
|
||||
#define MAX_URLTXT_SIZE 500000
|
||||
#define MAX_CATIDS 64
|
||||
#define MAX_CATIDS 96
|
||||
#define MAX_CATNAME_LEN 1024
|
||||
|
||||
#define HASHTABLE_SIZE (1024*1024)
|
||||
|
2
Conf.h
2
Conf.h
@ -164,7 +164,7 @@ class Conf {
|
||||
long m_catdbMaxTreeMem;
|
||||
long m_catdbMaxDiskPageCacheMem;
|
||||
long m_catdbMaxCacheMem;
|
||||
long m_catdbMinFilesToMerge;
|
||||
//long m_catdbMinFilesToMerge;
|
||||
|
||||
long m_revdbMaxTreeMem;
|
||||
long m_timedbMaxTreeMem;
|
||||
|
21
Msg9b.cpp
21
Msg9b.cpp
@ -100,6 +100,10 @@ bool Msg9b::addCatRecs ( char *urls ,
|
||||
site.set ( p , e - p , false ); // addwww?
|
||||
// normalize the url
|
||||
g_catdb.normalizeUrl(&site, &site);
|
||||
|
||||
// sanity
|
||||
if ( numCatids[k] > MAX_CATIDS ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
// make a siteRec from this url
|
||||
CatRec sr;
|
||||
// returns false and sets g_errno on error
|
||||
@ -110,6 +114,10 @@ bool Msg9b::addCatRecs ( char *urls ,
|
||||
char *data = sr.getData ();
|
||||
long dataSize = sr.getDataSize ();
|
||||
key_t key;
|
||||
// sanity test
|
||||
CatRec cr2;
|
||||
if ( ! cr2.set ( NULL , sr.getData(), sr.getDataSize(),false)){
|
||||
char *xx=NULL;*xx=0; }
|
||||
// debug when generating catdb
|
||||
//char *x = p;
|
||||
//for ( ; x<e ; x++ ) {
|
||||
@ -133,6 +141,19 @@ bool Msg9b::addCatRecs ( char *urls ,
|
||||
else if ( ! m_list.addRecord ( key, dataSize, data ) )
|
||||
return true;
|
||||
|
||||
/*
|
||||
// debug point
|
||||
SafeBuf sb;
|
||||
//sb.safeMemcpy(p , e-p );
|
||||
sb.safeStrcpy(sr.m_url);
|
||||
sb.safePrintf(" ");
|
||||
for ( long i = 0 ; i < numCatids[k] ; i++ )
|
||||
sb.safePrintf ( "%li " , catids[c+i] );
|
||||
log("catdb: adding key=%s url=%s",
|
||||
KEYSTR(&key,12),
|
||||
sb.getBufStart());
|
||||
*/
|
||||
|
||||
// debug
|
||||
//log("gencat: adding url=%s",sr.m_url);
|
||||
|
||||
|
@ -1086,7 +1086,6 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
|
||||
//g_tfndb.getRdb(),
|
||||
g_tagdb.getRdb(),
|
||||
g_clusterdb.getRdb(),
|
||||
//g_catdb.getRdb(),
|
||||
g_linkdb.getRdb(),
|
||||
g_cachedb.getRdb(),
|
||||
g_serpdb.getRdb(),
|
||||
|
@ -4105,6 +4105,7 @@ void Parms::init ( ) {
|
||||
m->m_type = TYPE_LONG;
|
||||
m++;
|
||||
|
||||
/*
|
||||
m->m_title = "catdb min files to merge";
|
||||
m->m_desc = "";
|
||||
m->m_off = (char *)&g_conf.m_catdbMinFilesToMerge - g;
|
||||
@ -4113,7 +4114,6 @@ void Parms::init ( ) {
|
||||
m->m_save = 0;
|
||||
m++;
|
||||
|
||||
/*
|
||||
m->m_title = "revdb max tree mem";
|
||||
m->m_desc = "Revdb holds the meta list we added for this doc.";
|
||||
m->m_off = (char *)&g_conf.m_revdbMaxTreeMem - g;
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include "Clusterdb.h"
|
||||
#include "Hostdb.h"
|
||||
#include "Tagdb.h"
|
||||
//#include "Catdb.h"
|
||||
#include "Catdb.h"
|
||||
#include "Posdb.h"
|
||||
#include "Cachedb.h"
|
||||
#include "Monitordb.h"
|
||||
@ -411,7 +411,7 @@ bool Process::init ( ) {
|
||||
m_rdbs[m_numRdbs++] = g_spiderdb.getRdb ();
|
||||
m_rdbs[m_numRdbs++] = g_clusterdb.getRdb ();
|
||||
m_rdbs[m_numRdbs++] = g_tagdb.getRdb ();
|
||||
//m_rdbs[m_numRdbs++] = g_catdb.getRdb ();
|
||||
m_rdbs[m_numRdbs++] = g_catdb.getRdb ();
|
||||
m_rdbs[m_numRdbs++] = g_statsdb.getRdb ();
|
||||
m_rdbs[m_numRdbs++] = g_linkdb.getRdb ();
|
||||
m_rdbs[m_numRdbs++] = g_cachedb.getRdb ();
|
||||
@ -1657,7 +1657,7 @@ void Process::resetAll ( ) {
|
||||
rdb->reset();
|
||||
}
|
||||
|
||||
//g_catdb .reset();
|
||||
g_catdb .reset();
|
||||
g_collectiondb .reset();
|
||||
g_categories1 .reset();
|
||||
g_categories2 .reset();
|
||||
@ -1758,7 +1758,7 @@ void Process::resetPageCaches ( ) {
|
||||
//g_tfndb .getDiskPageCache()->reset();
|
||||
//g_checksumdb .getDiskPageCache()->reset();
|
||||
g_clusterdb .getDiskPageCache()->reset();
|
||||
//g_catdb .getDiskPageCache()->reset();
|
||||
g_catdb .getDiskPageCache()->reset();
|
||||
//g_placedb .getDiskPageCache()->reset();
|
||||
g_doledb .getDiskPageCache()->reset();
|
||||
//g_statsdb .getDiskPageCache()->reset();
|
||||
|
13
Rdb.cpp
13
Rdb.cpp
@ -261,8 +261,8 @@ bool Rdb::init ( char *dir ,
|
||||
if ( ! loadTree ( ) ) return false;
|
||||
|
||||
// add the single dummy collection for catdb
|
||||
//if ( g_catdb.getRdb() == this ) //||
|
||||
// return g_catdb.addColl ( NULL );
|
||||
if ( g_catdb.getRdb() == this )
|
||||
return g_catdb.addColl ( NULL );
|
||||
if ( g_statsdb.getRdb() == this )
|
||||
return g_statsdb.addColl ( NULL );
|
||||
if ( g_cachedb.getRdb() == this )
|
||||
@ -275,8 +275,6 @@ bool Rdb::init ( char *dir ,
|
||||
// return g_facebookdb.addColl ( NULL );
|
||||
if ( g_syncdb.getRdb() == this )
|
||||
return g_syncdb.addColl ( NULL );
|
||||
if ( g_catdb.getRdb() == this )
|
||||
return g_catdb.addColl ( NULL );
|
||||
|
||||
// set this for use below
|
||||
//*(long long *)m_gbcounteventsTermId =
|
||||
@ -2001,6 +1999,13 @@ bool Rdb::addRecord ( collnum_t collnum,
|
||||
}
|
||||
*/
|
||||
|
||||
// debug testing
|
||||
//if ( m_rdbId == RDB_CATDB ) {
|
||||
// // show key
|
||||
// log("rdb: adding key=%s to tree n=%li",KEYSTR(key,12) ,n);
|
||||
//}
|
||||
|
||||
|
||||
//jumpdown:
|
||||
|
||||
// if it exists then annihilate it
|
||||
|
4
main.cpp
4
main.cpp
@ -11060,7 +11060,9 @@ void dumpTagdb (char *coll,long startFileNum,long numFiles,bool includeTree,
|
||||
data ,
|
||||
size ,
|
||||
false);
|
||||
printf("caturl=%s #catids=%li version=%li\n"
|
||||
fprintf(stdout,
|
||||
"key=%s caturl=%s #catids=%li version=%li\n"
|
||||
,KEYSTR(&k,12)
|
||||
,crec.m_url
|
||||
,(long)crec.m_numCatids
|
||||
,(long)crec.m_version
|
||||
|
Loading…
Reference in New Issue
Block a user