mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 12:17:35 +03:00
log cleanups mostly.
took out disk page cache, kinda buggy... need to fix at some point.
This commit is contained in:
parent
2ffad5d835
commit
1b5057ad42
@ -88,7 +88,7 @@ bool Cachedb::addColl ( char *coll, bool doVerify ) {
|
||||
|
||||
bool Cachedb::verify ( char *coll ) {
|
||||
// coll is NULL here methinks
|
||||
log ( LOG_INFO, "db: Verifying %s...",m_name );
|
||||
log ( LOG_DEBUG, "db: Verifying %s...",m_name );
|
||||
g_threads.disableThreads();
|
||||
|
||||
Msg5 msg5;
|
||||
@ -167,7 +167,7 @@ bool Cachedb::verify ( char *coll ) {
|
||||
g_threads.enableThreads();
|
||||
return g_conf.m_bypassValidation;
|
||||
}
|
||||
log ( LOG_INFO, "db: %s passed verification successfully for "
|
||||
log ( LOG_DEBUG, "db: %s passed verification successfully for "
|
||||
"%li recs.", m_name,count );
|
||||
// DONE
|
||||
g_threads.enableThreads();
|
||||
|
@ -350,7 +350,7 @@ bool Clusterdb::addColl ( char *coll, bool doVerify ) {
|
||||
}
|
||||
|
||||
bool Clusterdb::verify ( char *coll ) {
|
||||
log ( LOG_INFO, "db: Verifying Clusterdb for coll %s...", coll );
|
||||
log ( LOG_DEBUG, "db: Verifying Clusterdb for coll %s...", coll );
|
||||
g_threads.disableThreads();
|
||||
|
||||
Msg5 msg5;
|
||||
@ -411,7 +411,7 @@ bool Clusterdb::verify ( char *coll ) {
|
||||
g_threads.enableThreads();
|
||||
return g_conf.m_bypassValidation;
|
||||
}
|
||||
log ( LOG_INFO, "db: Clusterdb passed verification successfully for "
|
||||
log ( LOG_DEBUG, "db: Clusterdb passed verification successfully for "
|
||||
"%li recs.", count );
|
||||
// DONE
|
||||
g_threads.enableThreads();
|
||||
|
@ -527,8 +527,8 @@ bool Collectiondb::registerCollRec ( CollectionRec *cr ,
|
||||
if ( ! g_doledb.addColl ( coll, verify ) ) goto hadError;
|
||||
|
||||
// debug message
|
||||
log ( LOG_INFO, "db: verified collection \"%s\" (%li).",
|
||||
coll,(long)cr->m_collnum);
|
||||
//log ( LOG_INFO, "db: verified collection \"%s\" (%li).",
|
||||
// coll,(long)cr->m_collnum);
|
||||
|
||||
// tell SpiderCache about this collection, it will create a
|
||||
// SpiderCollection class for it.
|
||||
@ -1383,6 +1383,8 @@ bool CollectionRec::load ( char *coll , long i ) {
|
||||
m_collLen = gbstrlen ( coll );
|
||||
strcpy ( m_coll , coll );
|
||||
|
||||
log(LOG_INFO,"db: loading data for %s",coll);
|
||||
|
||||
// collection name HACK for backwards compatibility
|
||||
//if ( strcmp ( coll , "main" ) == 0 ) {
|
||||
// m_coll[0] = '\0';
|
||||
@ -1432,7 +1434,7 @@ bool CollectionRec::load ( char *coll , long i ) {
|
||||
// LOAD LOCAL
|
||||
sprintf ( tmp1 , "%scoll.%s.%li/localcrawlinfo.dat",
|
||||
g_hostdb.m_dir , m_coll , (long)m_collnum );
|
||||
log(LOG_INFO,"db: loading %s",tmp1);
|
||||
log(LOG_DEBUG,"db: loading %s",tmp1);
|
||||
m_localCrawlInfo.reset();
|
||||
SafeBuf sb;
|
||||
// fillfromfile returns 0 if does not exist, -1 on read error
|
||||
@ -1443,7 +1445,7 @@ bool CollectionRec::load ( char *coll , long i ) {
|
||||
// LOAD GLOBAL
|
||||
sprintf ( tmp1 , "%scoll.%s.%li/globalcrawlinfo.dat",
|
||||
g_hostdb.m_dir , m_coll , (long)m_collnum );
|
||||
log(LOG_INFO,"db: loading %s",tmp1);
|
||||
log(LOG_DEBUG,"db: loading %s",tmp1);
|
||||
m_globalCrawlInfo.reset();
|
||||
sb.reset();
|
||||
if ( sb.fillFromFile ( tmp1 ) > 0 )
|
||||
|
6
File.cpp
6
File.cpp
@ -698,8 +698,10 @@ bool File::unlink ( ) {
|
||||
if ( status == 0 ) return true;
|
||||
// return false and set g_errno on error
|
||||
if ( status < 0 ) return false;
|
||||
// log it so we can see what happened to timedb!
|
||||
log(LOG_INFO,"disk: unlinking %s", m_filename );
|
||||
// . log it so we can see what happened to timedb!
|
||||
// . don't log startup unlinks of "tmpfile"
|
||||
if ( ! strstr(m_filename,"tmpfile") )
|
||||
log(LOG_INFO,"disk: unlinking %s", m_filename );
|
||||
// remove ourselves from the disk
|
||||
if ( ::unlink ( m_filename ) == 0 ) return true;
|
||||
// sync it to disk in case power goes out
|
||||
|
@ -184,7 +184,7 @@ bool Linkdb::addColl ( char *coll, bool doVerify ) {
|
||||
}
|
||||
|
||||
bool Linkdb::verify ( char *coll ) {
|
||||
log ( LOG_INFO, "db: Verifying Linkdb for coll %s...", coll );
|
||||
log ( LOG_DEBUG, "db: Verifying Linkdb for coll %s...", coll );
|
||||
g_threads.disableThreads();
|
||||
|
||||
Msg5 msg5;
|
||||
@ -265,7 +265,7 @@ bool Linkdb::verify ( char *coll ) {
|
||||
g_threads.enableThreads();
|
||||
return g_conf.m_bypassValidation;
|
||||
}
|
||||
log ( LOG_INFO, "db: Linkdb passed verification successfully for "
|
||||
log ( LOG_DEBUG, "db: Linkdb passed verification successfully for "
|
||||
"%li recs.", count );
|
||||
// DONE
|
||||
g_threads.enableThreads();
|
||||
|
13
Posdb.cpp
13
Posdb.cpp
@ -122,7 +122,7 @@ bool Posdb::init ( ) {
|
||||
long nodeSize = (sizeof(key144_t)+12+4) + sizeof(collnum_t);
|
||||
long maxTreeNodes = maxTreeMem / nodeSize ;
|
||||
|
||||
long pageSize = GB_INDEXDB_PAGE_SIZE;
|
||||
//long pageSize = GB_INDEXDB_PAGE_SIZE;
|
||||
// we now use a disk page cache as opposed to the
|
||||
// old rec cache. i am trying to do away with the Rdb::m_cache rec
|
||||
// cache in favor of cleverly used disk page caches, because
|
||||
@ -141,6 +141,7 @@ bool Posdb::init ( ) {
|
||||
//pcmem = 0;
|
||||
// . init the page cache
|
||||
// . MDW: "minimize disk seeks" not working otherwise i'd enable it!
|
||||
/*
|
||||
if ( ! m_pc.init ( "posdb",
|
||||
RDB_POSDB,
|
||||
pcmem ,
|
||||
@ -148,6 +149,7 @@ bool Posdb::init ( ) {
|
||||
true , // use RAM disk?
|
||||
false )) // minimize disk seeks?
|
||||
return log("db: Posdb init failed.");
|
||||
*/
|
||||
|
||||
// . set our own internal rdb
|
||||
// . max disk space for bin tree is same as maxTreeMem so that we
|
||||
@ -169,7 +171,10 @@ bool Posdb::init ( ) {
|
||||
0 , // maxCacheNodes ,
|
||||
true , // use half keys?
|
||||
false , // g_conf.m_posdbSav
|
||||
&m_pc ,
|
||||
// newer systems have tons of ram to use
|
||||
// for their disk page cache. it is slower than
|
||||
// ours but the new engine has much slower things
|
||||
NULL,//&m_pc ,
|
||||
false , // istitledb?
|
||||
false , // preloaddiskpagecache?
|
||||
sizeof(key144_t)
|
||||
@ -235,7 +240,7 @@ bool Posdb::addColl ( char *coll, bool doVerify ) {
|
||||
|
||||
bool Posdb::verify ( char *coll ) {
|
||||
return true;
|
||||
log ( LOG_INFO, "db: Verifying Posdb for coll %s...", coll );
|
||||
log ( LOG_DEBUG, "db: Verifying Posdb for coll %s...", coll );
|
||||
g_threads.disableThreads();
|
||||
|
||||
Msg5 msg5;
|
||||
@ -314,7 +319,7 @@ bool Posdb::verify ( char *coll ) {
|
||||
g_threads.enableThreads();
|
||||
return g_conf.m_bypassValidation;
|
||||
}
|
||||
log ( LOG_INFO, "db: Posdb passed verification successfully for %li "
|
||||
log ( LOG_DEBUG, "db: Posdb passed verification successfully for %li "
|
||||
"recs.", count );
|
||||
// DONE
|
||||
g_threads.enableThreads();
|
||||
|
@ -771,7 +771,7 @@ long RdbBase::addFile ( long id , bool isNew , long mergeNum , long id2 ,
|
||||
g_statsdb.m_disabled = false;
|
||||
if ( ! status ) return log("db: Save failed.");
|
||||
}
|
||||
if ( ! isNew ) logf(LOG_INFO,"db: Added %s for collnum=%li pages=%li",
|
||||
if ( ! isNew ) log(LOG_DEBUG,"db: Added %s for collnum=%li pages=%li",
|
||||
name ,(long)m_collnum,m->getNumPages());
|
||||
// open this big data file for reading only
|
||||
if ( ! isNew ) {
|
||||
|
@ -625,7 +625,7 @@ bool Spiderdb::addColl ( char *coll, bool doVerify ) {
|
||||
|
||||
bool Spiderdb::verify ( char *coll ) {
|
||||
//return true;
|
||||
log ( LOG_INFO, "db: Verifying Spiderdb for coll %s...", coll );
|
||||
log ( LOG_DEBUG, "db: Verifying Spiderdb for coll %s...", coll );
|
||||
g_threads.disableThreads();
|
||||
|
||||
Msg5 msg5;
|
||||
@ -688,7 +688,7 @@ bool Spiderdb::verify ( char *coll ) {
|
||||
g_threads.enableThreads();
|
||||
return g_conf.m_bypassValidation;
|
||||
}
|
||||
log ( LOG_INFO,"db: Spiderdb passed verification successfully for %li "
|
||||
log (LOG_DEBUG,"db: Spiderdb passed verification successfully for %li "
|
||||
"recs.", count );
|
||||
// DONE
|
||||
g_threads.enableThreads();
|
||||
|
@ -1873,7 +1873,7 @@ bool Tagdb::verify ( char *coll ) {
|
||||
char *rdbName = NULL;
|
||||
rdbName = "Tagdb";
|
||||
|
||||
log ( LOG_INFO, "db: Verifying %s for coll %s...", rdbName, coll );
|
||||
log ( LOG_DEBUG, "db: Verifying %s for coll %s...", rdbName, coll );
|
||||
|
||||
g_threads.disableThreads();
|
||||
|
||||
@ -1945,7 +1945,7 @@ bool Tagdb::verify ( char *coll ) {
|
||||
g_threads.enableThreads();
|
||||
return g_conf.m_bypassValidation;
|
||||
}
|
||||
log ( LOG_INFO, "db: %s passed verification successfully for %li "
|
||||
log ( LOG_DEBUG, "db: %s passed verification successfully for %li "
|
||||
"recs.",rdbName, count );
|
||||
|
||||
// turn threads back on
|
||||
|
11
Titledb.cpp
11
Titledb.cpp
@ -55,13 +55,16 @@ bool Titledb::init ( ) {
|
||||
// do not use any page cache if doing tmp cluster in order to
|
||||
// prevent swapping
|
||||
if ( g_hostdb.m_useTmpCluster ) pcmem = 0;
|
||||
long pageSize = GB_INDEXDB_PAGE_SIZE;
|
||||
//long pageSize = GB_INDEXDB_PAGE_SIZE;
|
||||
// init the page cache
|
||||
// . MDW: "minimize disk seeks" not working otherwise i'd enable it!
|
||||
/*
|
||||
if ( ! m_pc.init ( "titledb",
|
||||
RDB_TITLEDB,
|
||||
pcmem ,
|
||||
pageSize ) )
|
||||
return log("db: Titledb init failed.");
|
||||
*/
|
||||
|
||||
// each entry in the cache is usually just a single record, no lists
|
||||
//long maxCacheNodes = g_conf.m_titledbMaxCacheMem / (10*1024);
|
||||
@ -87,7 +90,7 @@ bool Titledb::init ( ) {
|
||||
0,//maxCacheNodes ,
|
||||
false ,// half keys?
|
||||
false ,// g_conf.m_titledbSav
|
||||
&m_pc , // page cache ptr
|
||||
NULL,//&m_pc , // page cache ptr
|
||||
true ) )// is titledb?
|
||||
return false;
|
||||
return true;
|
||||
@ -136,7 +139,7 @@ bool Titledb::addColl ( char *coll, bool doVerify ) {
|
||||
}
|
||||
|
||||
bool Titledb::verify ( char *coll ) {
|
||||
log ( LOG_INFO, "db: Verifying Titledb for coll %s...", coll );
|
||||
log ( LOG_DEBUG, "db: Verifying Titledb for coll %s...", coll );
|
||||
g_threads.disableThreads();
|
||||
|
||||
Msg5 msg5;
|
||||
@ -209,7 +212,7 @@ bool Titledb::verify ( char *coll ) {
|
||||
return g_conf.m_bypassValidation;
|
||||
}
|
||||
|
||||
log ( LOG_INFO, "db: Titledb passed verification successfully for %li"
|
||||
log ( LOG_DEBUG, "db: Titledb passed verification successfully for %li"
|
||||
" recs.", count );
|
||||
// DONE
|
||||
g_threads.enableThreads();
|
||||
|
@ -968,8 +968,6 @@
|
||||
<harvestLinks>1</>
|
||||
<harvestLinks>1</>
|
||||
<harvestLinks>1</>
|
||||
<harvestLinks>1</>
|
||||
<spidersEnabled>1</>
|
||||
<spidersEnabled>1</>
|
||||
<spidersEnabled>1</>
|
||||
<spidersEnabled>1</>
|
||||
@ -1004,7 +1002,6 @@
|
||||
<filterFrequency>60.000000</>
|
||||
<filterFrequency>30.000000</>
|
||||
<filterFrequency>30.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
|
||||
# Do not allow more than this many outstanding spiders for all urls in this
|
||||
# priority.
|
||||
@ -1025,7 +1022,6 @@
|
||||
<maxSpidersPerRule>1</>
|
||||
<maxSpidersPerRule>99</>
|
||||
<maxSpidersPerRule>99</>
|
||||
<maxSpidersPerRule>0</>
|
||||
|
||||
# Allow this many spiders per IP.
|
||||
<maxSpidersPerIp>1</>
|
||||
@ -1045,7 +1041,6 @@
|
||||
<maxSpidersPerIp>1</>
|
||||
<maxSpidersPerIp>1</>
|
||||
<maxSpidersPerIp>1</>
|
||||
<maxSpidersPerIp>0</>
|
||||
|
||||
# Wait at least this long before downloading urls from the same IP address.
|
||||
<spiderIpWait>1000</>
|
||||
@ -1065,7 +1060,6 @@
|
||||
<spiderIpWait>1000</>
|
||||
<spiderIpWait>1000</>
|
||||
<spiderIpWait>1000</>
|
||||
<spiderIpWait>0</>
|
||||
<filterPriority>80</>
|
||||
<filterPriority>-3</>
|
||||
<filterPriority>3</>
|
||||
@ -1083,6 +1077,5 @@
|
||||
<filterPriority>19</>
|
||||
<filterPriority>1</>
|
||||
<filterPriority>0</>
|
||||
<filterPriority>0</>
|
||||
|
||||
# Use <diffbotAPI> tag.
|
||||
|
Loading…
Reference in New Issue
Block a user