log cleanups mostly.

took out disk page cache,
kinda buggy... need to fix at some point.
This commit is contained in:
Matt Wells 2013-12-18 10:57:18 -08:00
parent 2ffad5d835
commit 1b5057ad42
11 changed files with 37 additions and 32 deletions

View File

@ -88,7 +88,7 @@ bool Cachedb::addColl ( char *coll, bool doVerify ) {
bool Cachedb::verify ( char *coll ) {
// coll is NULL here methinks
log ( LOG_INFO, "db: Verifying %s...",m_name );
log ( LOG_DEBUG, "db: Verifying %s...",m_name );
g_threads.disableThreads();
Msg5 msg5;
@ -167,7 +167,7 @@ bool Cachedb::verify ( char *coll ) {
g_threads.enableThreads();
return g_conf.m_bypassValidation;
}
log ( LOG_INFO, "db: %s passed verification successfully for "
log ( LOG_DEBUG, "db: %s passed verification successfully for "
"%li recs.", m_name,count );
// DONE
g_threads.enableThreads();

View File

@ -350,7 +350,7 @@ bool Clusterdb::addColl ( char *coll, bool doVerify ) {
}
bool Clusterdb::verify ( char *coll ) {
log ( LOG_INFO, "db: Verifying Clusterdb for coll %s...", coll );
log ( LOG_DEBUG, "db: Verifying Clusterdb for coll %s...", coll );
g_threads.disableThreads();
Msg5 msg5;
@ -411,7 +411,7 @@ bool Clusterdb::verify ( char *coll ) {
g_threads.enableThreads();
return g_conf.m_bypassValidation;
}
log ( LOG_INFO, "db: Clusterdb passed verification successfully for "
log ( LOG_DEBUG, "db: Clusterdb passed verification successfully for "
"%li recs.", count );
// DONE
g_threads.enableThreads();

View File

@ -527,8 +527,8 @@ bool Collectiondb::registerCollRec ( CollectionRec *cr ,
if ( ! g_doledb.addColl ( coll, verify ) ) goto hadError;
// debug message
log ( LOG_INFO, "db: verified collection \"%s\" (%li).",
coll,(long)cr->m_collnum);
//log ( LOG_INFO, "db: verified collection \"%s\" (%li).",
// coll,(long)cr->m_collnum);
// tell SpiderCache about this collection, it will create a
// SpiderCollection class for it.
@ -1383,6 +1383,8 @@ bool CollectionRec::load ( char *coll , long i ) {
m_collLen = gbstrlen ( coll );
strcpy ( m_coll , coll );
log(LOG_INFO,"db: loading data for %s",coll);
// collection name HACK for backwards compatibility
//if ( strcmp ( coll , "main" ) == 0 ) {
// m_coll[0] = '\0';
@ -1432,7 +1434,7 @@ bool CollectionRec::load ( char *coll , long i ) {
// LOAD LOCAL
sprintf ( tmp1 , "%scoll.%s.%li/localcrawlinfo.dat",
g_hostdb.m_dir , m_coll , (long)m_collnum );
log(LOG_INFO,"db: loading %s",tmp1);
log(LOG_DEBUG,"db: loading %s",tmp1);
m_localCrawlInfo.reset();
SafeBuf sb;
// fillfromfile returns 0 if does not exist, -1 on read error
@ -1443,7 +1445,7 @@ bool CollectionRec::load ( char *coll , long i ) {
// LOAD GLOBAL
sprintf ( tmp1 , "%scoll.%s.%li/globalcrawlinfo.dat",
g_hostdb.m_dir , m_coll , (long)m_collnum );
log(LOG_INFO,"db: loading %s",tmp1);
log(LOG_DEBUG,"db: loading %s",tmp1);
m_globalCrawlInfo.reset();
sb.reset();
if ( sb.fillFromFile ( tmp1 ) > 0 )

View File

@ -698,8 +698,10 @@ bool File::unlink ( ) {
if ( status == 0 ) return true;
// return false and set g_errno on error
if ( status < 0 ) return false;
// log it so we can see what happened to timedb!
log(LOG_INFO,"disk: unlinking %s", m_filename );
// . log it so we can see what happened to timedb!
// . don't log startup unlinks of "tmpfile"
if ( ! strstr(m_filename,"tmpfile") )
log(LOG_INFO,"disk: unlinking %s", m_filename );
// remove ourselves from the disk
if ( ::unlink ( m_filename ) == 0 ) return true;
// sync it to disk in case power goes out

View File

@ -184,7 +184,7 @@ bool Linkdb::addColl ( char *coll, bool doVerify ) {
}
bool Linkdb::verify ( char *coll ) {
log ( LOG_INFO, "db: Verifying Linkdb for coll %s...", coll );
log ( LOG_DEBUG, "db: Verifying Linkdb for coll %s...", coll );
g_threads.disableThreads();
Msg5 msg5;
@ -265,7 +265,7 @@ bool Linkdb::verify ( char *coll ) {
g_threads.enableThreads();
return g_conf.m_bypassValidation;
}
log ( LOG_INFO, "db: Linkdb passed verification successfully for "
log ( LOG_DEBUG, "db: Linkdb passed verification successfully for "
"%li recs.", count );
// DONE
g_threads.enableThreads();

View File

@ -122,7 +122,7 @@ bool Posdb::init ( ) {
long nodeSize = (sizeof(key144_t)+12+4) + sizeof(collnum_t);
long maxTreeNodes = maxTreeMem / nodeSize ;
long pageSize = GB_INDEXDB_PAGE_SIZE;
//long pageSize = GB_INDEXDB_PAGE_SIZE;
// we now use a disk page cache as opposed to the
// old rec cache. i am trying to do away with the Rdb::m_cache rec
// cache in favor of cleverly used disk page caches, because
@ -141,6 +141,7 @@ bool Posdb::init ( ) {
//pcmem = 0;
// . init the page cache
// . MDW: "minimize disk seeks" not working otherwise i'd enable it!
/*
if ( ! m_pc.init ( "posdb",
RDB_POSDB,
pcmem ,
@ -148,6 +149,7 @@ bool Posdb::init ( ) {
true , // use RAM disk?
false )) // minimize disk seeks?
return log("db: Posdb init failed.");
*/
// . set our own internal rdb
// . max disk space for bin tree is same as maxTreeMem so that we
@ -169,7 +171,10 @@ bool Posdb::init ( ) {
0 , // maxCacheNodes ,
true , // use half keys?
false , // g_conf.m_posdbSav
&m_pc ,
// newer systems have tons of ram to use
// for their disk page cache. it is slower than
// ours but the new engine has much slower things
NULL,//&m_pc ,
false , // istitledb?
false , // preloaddiskpagecache?
sizeof(key144_t)
@ -235,7 +240,7 @@ bool Posdb::addColl ( char *coll, bool doVerify ) {
bool Posdb::verify ( char *coll ) {
return true;
log ( LOG_INFO, "db: Verifying Posdb for coll %s...", coll );
log ( LOG_DEBUG, "db: Verifying Posdb for coll %s...", coll );
g_threads.disableThreads();
Msg5 msg5;
@ -314,7 +319,7 @@ bool Posdb::verify ( char *coll ) {
g_threads.enableThreads();
return g_conf.m_bypassValidation;
}
log ( LOG_INFO, "db: Posdb passed verification successfully for %li "
log ( LOG_DEBUG, "db: Posdb passed verification successfully for %li "
"recs.", count );
// DONE
g_threads.enableThreads();

View File

@ -771,7 +771,7 @@ long RdbBase::addFile ( long id , bool isNew , long mergeNum , long id2 ,
g_statsdb.m_disabled = false;
if ( ! status ) return log("db: Save failed.");
}
if ( ! isNew ) logf(LOG_INFO,"db: Added %s for collnum=%li pages=%li",
if ( ! isNew ) log(LOG_DEBUG,"db: Added %s for collnum=%li pages=%li",
name ,(long)m_collnum,m->getNumPages());
// open this big data file for reading only
if ( ! isNew ) {

View File

@ -625,7 +625,7 @@ bool Spiderdb::addColl ( char *coll, bool doVerify ) {
bool Spiderdb::verify ( char *coll ) {
//return true;
log ( LOG_INFO, "db: Verifying Spiderdb for coll %s...", coll );
log ( LOG_DEBUG, "db: Verifying Spiderdb for coll %s...", coll );
g_threads.disableThreads();
Msg5 msg5;
@ -688,7 +688,7 @@ bool Spiderdb::verify ( char *coll ) {
g_threads.enableThreads();
return g_conf.m_bypassValidation;
}
log ( LOG_INFO,"db: Spiderdb passed verification successfully for %li "
log (LOG_DEBUG,"db: Spiderdb passed verification successfully for %li "
"recs.", count );
// DONE
g_threads.enableThreads();

View File

@ -1873,7 +1873,7 @@ bool Tagdb::verify ( char *coll ) {
char *rdbName = NULL;
rdbName = "Tagdb";
log ( LOG_INFO, "db: Verifying %s for coll %s...", rdbName, coll );
log ( LOG_DEBUG, "db: Verifying %s for coll %s...", rdbName, coll );
g_threads.disableThreads();
@ -1945,7 +1945,7 @@ bool Tagdb::verify ( char *coll ) {
g_threads.enableThreads();
return g_conf.m_bypassValidation;
}
log ( LOG_INFO, "db: %s passed verification successfully for %li "
log ( LOG_DEBUG, "db: %s passed verification successfully for %li "
"recs.",rdbName, count );
// turn threads back on

View File

@ -55,13 +55,16 @@ bool Titledb::init ( ) {
// do not use any page cache if doing tmp cluster in order to
// prevent swapping
if ( g_hostdb.m_useTmpCluster ) pcmem = 0;
long pageSize = GB_INDEXDB_PAGE_SIZE;
//long pageSize = GB_INDEXDB_PAGE_SIZE;
// init the page cache
// . MDW: "minimize disk seeks" not working otherwise i'd enable it!
/*
if ( ! m_pc.init ( "titledb",
RDB_TITLEDB,
pcmem ,
pageSize ) )
return log("db: Titledb init failed.");
*/
// each entry in the cache is usually just a single record, no lists
//long maxCacheNodes = g_conf.m_titledbMaxCacheMem / (10*1024);
@ -87,7 +90,7 @@ bool Titledb::init ( ) {
0,//maxCacheNodes ,
false ,// half keys?
false ,// g_conf.m_titledbSav
&m_pc , // page cache ptr
NULL,//&m_pc , // page cache ptr
true ) )// is titledb?
return false;
return true;
@ -136,7 +139,7 @@ bool Titledb::addColl ( char *coll, bool doVerify ) {
}
bool Titledb::verify ( char *coll ) {
log ( LOG_INFO, "db: Verifying Titledb for coll %s...", coll );
log ( LOG_DEBUG, "db: Verifying Titledb for coll %s...", coll );
g_threads.disableThreads();
Msg5 msg5;
@ -209,7 +212,7 @@ bool Titledb::verify ( char *coll ) {
return g_conf.m_bypassValidation;
}
log ( LOG_INFO, "db: Titledb passed verification successfully for %li"
log ( LOG_DEBUG, "db: Titledb passed verification successfully for %li"
" recs.", count );
// DONE
g_threads.enableThreads();

View File

@ -968,8 +968,6 @@
<harvestLinks>1</>
<harvestLinks>1</>
<harvestLinks>1</>
<harvestLinks>1</>
<spidersEnabled>1</>
<spidersEnabled>1</>
<spidersEnabled>1</>
<spidersEnabled>1</>
@ -1004,7 +1002,6 @@
<filterFrequency>60.000000</>
<filterFrequency>30.000000</>
<filterFrequency>30.000000</>
<filterFrequency>0.000000</>
# Do not allow more than this many outstanding spiders for all urls in this
# priority.
@ -1025,7 +1022,6 @@
<maxSpidersPerRule>1</>
<maxSpidersPerRule>99</>
<maxSpidersPerRule>99</>
<maxSpidersPerRule>0</>
# Allow this many spiders per IP.
<maxSpidersPerIp>1</>
@ -1045,7 +1041,6 @@
<maxSpidersPerIp>1</>
<maxSpidersPerIp>1</>
<maxSpidersPerIp>1</>
<maxSpidersPerIp>0</>
# Wait at least this long before downloading urls from the same IP address.
<spiderIpWait>1000</>
@ -1065,7 +1060,6 @@
<spiderIpWait>1000</>
<spiderIpWait>1000</>
<spiderIpWait>1000</>
<spiderIpWait>0</>
<filterPriority>80</>
<filterPriority>-3</>
<filterPriority>3</>
@ -1083,6 +1077,5 @@
<filterPriority>19</>
<filterPriority>1</>
<filterPriority>0</>
<filterPriority>0</>
# Use <diffbotAPI> tag.