Merge branch 'master' of github.com:gigablast/open-source-search-engine

This commit is contained in:
mwells 2013-08-29 21:17:46 -06:00
commit 2e9c8f7c6e
8 changed files with 84 additions and 58 deletions

View File

@ -220,7 +220,7 @@ bool Log::logR ( long long now , long type , char *msg , bool asterisk ,
// thread id if in "thread" // thread id if in "thread"
if ( pid != s_pid && s_pid != -1 ) { if ( pid != s_pid && s_pid != -1 ) {
//sprintf ( p , "[%li] " , (long)getpid() ); //sprintf ( p , "[%li] " , (long)getpid() );
sprintf ( p , "[%li] " , (long)pid ); sprintf ( p , "[%lu] " , (unsigned long)pid );
p += gbstrlen ( p ); p += gbstrlen ( p );
} }
// then message itself // then message itself

View File

@ -833,7 +833,9 @@ void sigalrmHandler ( int x , siginfo_t *info , void *y ) {
// if we missed to many, then dump core // if we missed to many, then dump core
if ( g_niceness == 1 && g_missedQuickPolls >= 4 ) { if ( g_niceness == 1 && g_missedQuickPolls >= 4 ) {
g_inSigHandler = true;
log("loop: missed quickpoll"); log("loop: missed quickpoll");
g_inSigHandler = false;
// seems to core a lot in gbcompress() we need to // seems to core a lot in gbcompress() we need to
// put a quickpoll into zlib deflate() or // put a quickpoll into zlib deflate() or
// deflat_slot() or logest_match() function // deflat_slot() or logest_match() function

View File

@ -670,7 +670,7 @@ Log.o: Log.cpp gb-include.h types.h fctypes.h Unicode.h \
UnicodeProperties.h UCPropTable.h iconv.h UCNormalizer.h hash.h Errno.h \ UnicodeProperties.h UCPropTable.h iconv.h UCNormalizer.h hash.h Errno.h \
Log.h Mem.h Conf.h Xml.h XmlNode.h Lang.h Iso8859.h iana_charset.h \ Log.h Mem.h Conf.h Xml.h XmlNode.h Lang.h Iso8859.h iana_charset.h \
File.h Loop.h ip.h Hostdb.h HttpRequest.h SafeBuf.h Url.h TcpSocket.h \ File.h Loop.h ip.h Hostdb.h HttpRequest.h SafeBuf.h Url.h TcpSocket.h \
Collectiondb.h Process.h Msg28.h Collectiondb.h Process.h Msg28.h Threads.h
Loop.o: Loop.cpp gb-include.h types.h fctypes.h Unicode.h \ Loop.o: Loop.cpp gb-include.h types.h fctypes.h Unicode.h \
UnicodeProperties.h UCPropTable.h iconv.h UCNormalizer.h hash.h Errno.h \ UnicodeProperties.h UCPropTable.h iconv.h UCNormalizer.h hash.h Errno.h \
Log.h Loop.h Mem.h Conf.h Xml.h XmlNode.h Lang.h Iso8859.h \ Log.h Loop.h Mem.h Conf.h Xml.h XmlNode.h Lang.h Iso8859.h \

View File

@ -3144,6 +3144,9 @@ void SpiderLoop::spiderDoledUrls ( ) {
// can't get spidered until the one that is doled does. // can't get spidered until the one that is doled does.
if ( g_conf.m_testSpiderEnabled ) maxSpiders = 6; if ( g_conf.m_testSpiderEnabled ) maxSpiders = 6;
} }
// debug log
if ( g_conf.m_logDebugSpider )
log("spider: has %li spiders out",m_sc->m_spidersOut);
// obey max spiders per collection too // obey max spiders per collection too
if ( m_sc->m_spidersOut >= maxSpiders ) continue; if ( m_sc->m_spidersOut >= maxSpiders ) continue;
// ok, we are good to launch a spider for coll m_cri // ok, we are good to launch a spider for coll m_cri
@ -3245,8 +3248,12 @@ void SpiderLoop::spiderDoledUrls ( ) {
m_gettingDoledbList = true; m_gettingDoledbList = true;
// log this now // log this now
//if ( g_conf.m_logDebugSpider ) if ( g_conf.m_logDebugSpider ) {
// logf(LOG_DEBUG,"spider: loading list from doledb"); m_doleStart = gettimeofdayInMillisecondsLocal();
// 12 byte doledb keys
logf(LOG_DEBUG,"spider: loading list from doledb startkey=%s",
KEYSTR(&m_sc->m_nextDoledbKey,12));
}
// get a spider rec for us to spider from doledb // get a spider rec for us to spider from doledb
if ( ! m_msg5.getList ( RDB_DOLEDB , if ( ! m_msg5.getList ( RDB_DOLEDB ,
@ -3260,6 +3267,7 @@ void SpiderLoop::spiderDoledUrls ( ) {
// we need to read in a lot because we call // we need to read in a lot because we call
// "goto listLoop" below if the url we want // "goto listLoop" below if the url we want
// to dole is locked. // to dole is locked.
// seems like a ton of negative recs
2000 , // minRecSizes 2000 , // minRecSizes
true , // includeTree true , // includeTree
false , // addToCache false , // addToCache
@ -3315,6 +3323,16 @@ bool SpiderLoop::gotDoledbList2 ( ) {
// unlock // unlock
m_gettingDoledbList = false; m_gettingDoledbList = false;
// log this now
if ( g_conf.m_logDebugSpider ) {
long long now = gettimeofdayInMillisecondsLocal();
long long took = now - m_doleStart;
logf(LOG_DEBUG,"spider: GOT list from doledb in %llims "
"size=%li bytes",
took,m_list.getListSize());
}
// bail instantly if in read-only mode (no RdbTrees!) // bail instantly if in read-only mode (no RdbTrees!)
if ( g_conf.m_readOnlyMode ) return false; if ( g_conf.m_readOnlyMode ) return false;
// or if doing a daily merge // or if doing a daily merge

View File

@ -1126,6 +1126,8 @@ class SpiderLoop {
// for round robining in SpiderLoop::doleUrls(), etc. // for round robining in SpiderLoop::doleUrls(), etc.
long m_cri; long m_cri;
long long m_doleStart;
long m_processed; long m_processed;
}; };

View File

@ -702,8 +702,9 @@ ThreadEntry *ThreadQueue::addEntry ( long niceness ,
if ( i == m_top ) m_top++; if ( i == m_top ) m_top++;
// debug msg // debug msg
if ( g_conf.m_logDebugThread ) if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: [%lu] queued %s thread. " log(LOG_DEBUG,"thread: [t=0x%lx] queued %s thread for launch. "
"niceness=%lu. ", (long)t,getThreadType(), niceness ); "niceness=%lu. ", (unsigned long)t,
getThreadType(), niceness );
// success // success
return t; return t;
} }
@ -889,6 +890,12 @@ bool ThreadQueue::timedCleanUp ( long maxNiceness ) {
log("threads: pthread_join %li = %s (%li)", log("threads: pthread_join %li = %s (%li)",
(long)t->m_joinTid,mstrerror(status),status); (long)t->m_joinTid,mstrerror(status),status);
} }
// debug msg
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: joined1 with t=0x%lx "
"jointid=0x%lx.",
(long)t,(long)t->m_joinTid);
#else #else
again: again:
@ -897,8 +904,8 @@ bool ThreadQueue::timedCleanUp ( long maxNiceness ) {
int err = errno; int err = errno;
// debug the waitpid // debug the waitpid
if ( g_conf.m_logDebugThread || g_process.m_exiting ) if ( g_conf.m_logDebugThread || g_process.m_exiting )
log(LOG_DEBUG,"thread: Waiting for t=%lu pid=%li.", log(LOG_DEBUG,"thread: Waiting for t=0x%lx pid=%li.",
(long)t,(long)t->m_pid); (unsigned long)t,(long)t->m_pid);
// bitch and continue if join failed // bitch and continue if join failed
if ( pid != t->m_pid ) { if ( pid != t->m_pid ) {
// waitpid() gets interrupted by various signals so // waitpid() gets interrupted by various signals so
@ -924,14 +931,14 @@ bool ThreadQueue::timedCleanUp ( long maxNiceness ) {
// re-protect this stack // re-protect this stack
mprotect ( t->m_stack + GUARDSIZE , STACK_SIZE - GUARDSIZE, mprotect ( t->m_stack + GUARDSIZE , STACK_SIZE - GUARDSIZE,
PROT_NONE ); PROT_NONE );
#endif
// debug msg // debug msg
if ( g_conf.m_logDebugThread ) if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: joined with pid=%li pid=%li.", log(LOG_DEBUG,"thread: joined with pid=%li pid=%li.",
(long)t->m_pid,(long)t->m_pid); (long)t->m_pid,(long)t->m_pid);
#endif
// we may get cleaned up and re-used and our niceness reassignd // we may get cleaned up and re-used and our niceness reassignd
// right after set m_isDone to true, so save niceness // right after set m_isDone to true, so save niceness
long niceness = t->m_niceness; long niceness = t->m_niceness;
@ -1025,18 +1032,8 @@ bool ThreadQueue::timedCleanUp ( long maxNiceness ) {
//only allow a quickpoll if we are nice. //only allow a quickpoll if we are nice.
//g_loop.canQuickPoll(t->m_niceness); //g_loop.canQuickPoll(t->m_niceness);
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: enter thread callback type=%s "
"nice=%li",getThreadType(),(long)t->m_niceness);
makeCallback ( t ); makeCallback ( t );
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: exit thread callback type=%s "
"nice=%li", getThreadType(),(long)t->m_niceness);
//long long took = gettimeofdayInMilliseconds()-startTime; //long long took = gettimeofdayInMilliseconds()-startTime;
//if(took > 8 && maxNiceness > 0) { //if(took > 8 && maxNiceness > 0) {
// if(g_conf.m_sequentialProfiling) // if(g_conf.m_sequentialProfiling)
@ -1053,13 +1050,13 @@ bool ThreadQueue::timedCleanUp ( long maxNiceness ) {
if ( g_conf.m_logDebugThread ) { if ( g_conf.m_logDebugThread ) {
long long now = gettimeofdayInMilliseconds(); long long now = gettimeofdayInMilliseconds();
log(LOG_DEBUG,"thread: [%lu] %s done. " log(LOG_DEBUG,"thread: [t=0x%lx] %s done1. "
"active=%li " "active=%li "
"time since queued = %llu ms " "time since queued = %llu ms "
"time since launch = %llu ms " "time since launch = %llu ms "
"time since pre-exit = %llu ms " "time since pre-exit = %llu ms "
"time since exit = %llu ms", "time since exit = %llu ms",
(long)t, (unsigned long)t,
getThreadType() , getThreadType() ,
(long)(m_launched - m_returned) , (long)(m_launched - m_returned) ,
now - t->m_queuedTime, now - t->m_queuedTime,
@ -1087,6 +1084,17 @@ void makeCallback ( ThreadEntry *t ) {
// save it // save it
long saved = g_niceness; long saved = g_niceness;
// log it now
if ( g_conf.m_logDebugLoop || g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: enter thread callback t=0x%lx "
//"type=%s "
"state=0x%lx "
"nice=%li",
(long)t,
//getThreadType(),
(long)t->m_state,
(long)t->m_niceness);
// time it? // time it?
long long start; long long start;
if ( g_conf.m_maxCallbackDelay >= 0 ) if ( g_conf.m_maxCallbackDelay >= 0 )
@ -1109,6 +1117,16 @@ void makeCallback ( ThreadEntry *t ) {
} }
// log it now
if ( g_conf.m_logDebugLoop || g_conf.m_logDebugThread )
log(LOG_DEBUG,"loop: exit thread callback t=0x%lx "
//"type=%s "
"nice=%li",
(long)t,
//getThreadType(),
(long)t->m_niceness);
// restore global niceness // restore global niceness
g_niceness = saved; g_niceness = saved;
@ -1199,6 +1217,12 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , long maxNiceness ) {
log("threads: pthread_join2 %li = %s (%li)", log("threads: pthread_join2 %li = %s (%li)",
(long)t->m_joinTid,mstrerror(status),status); (long)t->m_joinTid,mstrerror(status),status);
} }
// debug msg
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: joined2 with t=0x%lx "
"jointid=0x%lx.",
(long)t,(long)t->m_joinTid);
#else #else
again: again:
@ -1207,7 +1231,7 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , long maxNiceness ) {
int err = errno; int err = errno;
// debug the waitpid // debug the waitpid
if ( g_conf.m_logDebugThread ) if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: Waiting for t=%lu pid=%li.", log(LOG_DEBUG,"thread: Waiting for t=0x%lx pid=%li.",
(long)t,(long)t->m_pid); (long)t,(long)t->m_pid);
// bitch and continue if join failed // bitch and continue if join failed
if ( pid != t->m_pid ) { if ( pid != t->m_pid ) {
@ -1362,21 +1386,10 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , long maxNiceness ) {
//g_threads.launchThreads(); //g_threads.launchThreads();
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: enter thread callback type=%s",
getThreadType());
g_errno = 0; g_errno = 0;
makeCallback ( t ); makeCallback ( t );
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: exit thread callback type=%s",
getThreadType());
// long long took = gettimeofdayInMilliseconds()-startTime; // long long took = gettimeofdayInMilliseconds()-startTime;
// if(took > 8 && maxNiceness > 0) { // if(took > 8 && maxNiceness > 0) {
// if(g_conf.m_sequentialProfiling) // if(g_conf.m_sequentialProfiling)
@ -1393,13 +1406,13 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , long maxNiceness ) {
if ( g_conf.m_logDebugThread ) { if ( g_conf.m_logDebugThread ) {
long long now = gettimeofdayInMilliseconds(); long long now = gettimeofdayInMilliseconds();
log(LOG_DEBUG,"thread: [%lu] %s done. " log(LOG_DEBUG,"thread: [t=0x%lx] %s done2. "
"active=%li " "active=%li "
"time since queued = %llu ms " "time since queued = %llu ms "
"time since launch = %llu ms " "time since launch = %llu ms "
"time since pre-exit = %llu ms " "time since pre-exit = %llu ms "
"time since exit = %llu ms", "time since exit = %llu ms",
(long)t, (unsigned long)t,
getThreadType() , getThreadType() ,
(long)(m_launched - m_returned) , (long)(m_launched - m_returned) ,
now - t->m_queuedTime, now - t->m_queuedTime,
@ -1438,13 +1451,13 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , long maxNiceness ) {
if ( g_conf.m_logDebugThread ) { if ( g_conf.m_logDebugThread ) {
long long now = gettimeofdayInMilliseconds(); long long now = gettimeofdayInMilliseconds();
for ( long i = 0 ; i < numCallbacks ; i++ ) for ( long i = 0 ; i < numCallbacks ; i++ )
log(LOG_DEBUG,"thread: [%lu] %s done. " log(LOG_DEBUG,"thread: [tid=%lu] %s done3. "
"active=%li " "active=%li "
"time since queued = %llu ms " "time since queued = %llu ms "
"time since launch = %llu ms " "time since launch = %llu ms "
"time since pre-exit = %llu ms " "time since pre-exit = %llu ms "
"time since exit = %llu ms", "time since exit = %llu ms",
(long)tids[i], (unsigned long)tids[i],
getThreadType() , getThreadType() ,
(long)(m_launched - m_returned) , (long)(m_launched - m_returned) ,
now - times [i], now - times [i],
@ -1923,9 +1936,10 @@ bool ThreadQueue::launchThread ( ThreadEntry *te ) {
if ( g_conf.m_logDebugThread ) { if ( g_conf.m_logDebugThread ) {
active = m_launched - m_returned ; active = m_launched - m_returned ;
long long now = gettimeofdayInMilliseconds(); long long now = gettimeofdayInMilliseconds();
log(LOG_DEBUG,"thread: [%lu] launched %s thread. active=%lli " log(LOG_DEBUG,"thread: [t=0x%lx] launched %s thread. "
"active=%lli "
"niceness=%lu. waited %llu ms in queue.", "niceness=%lu. waited %llu ms in queue.",
(long)t, getThreadType(), active, realNiceness, (unsigned long)t, getThreadType(), active, realNiceness,
now - t->m_queuedTime); now - t->m_queuedTime);
} }
// be lazy with this since it uses a significant amount of cpu // be lazy with this since it uses a significant amount of cpu
@ -1998,7 +2012,7 @@ bool ThreadQueue::launchThread ( ThreadEntry *te ) {
// we're back from pthread_create // we're back from pthread_create
if ( g_conf.m_logDebugThread ) if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: Back from clone t=%lu pid=%li.", log(LOG_DEBUG,"thread: Back from clone t=0x%lx pid=%li.",
(long)t,(long)pid); (long)t,(long)pid);
@ -2162,8 +2176,8 @@ int startUp ( void *state ) {
//t->m_tid = pthread_self(); //t->m_tid = pthread_self();
// debug // debug
if ( g_conf.m_logDebugThread ) if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: [%lu] in startup pid=%li pppid=%li", log(LOG_DEBUG,"thread: [t=0x%lx] in startup pid=%li pppid=%li",
(long)t,(long)getpidtid(),(long)getppid()); (unsigned long)t,(long)getpidtid(),(long)getppid());
// debug msg // debug msg
//fprintf(stderr,"new thread tid=%li pid=%li\n", //fprintf(stderr,"new thread tid=%li pid=%li\n",
// (long)t->m_tid,(long)t->m_pid); // (long)t->m_tid,(long)t->m_pid);
@ -2219,8 +2233,8 @@ int startUp ( void *state ) {
t->m_exitTime = now; t->m_exitTime = now;
if ( g_conf.m_logDebugThread ) { if ( g_conf.m_logDebugThread ) {
log(LOG_DEBUG,"thread: [%lu] done with startup pid=%li", log(LOG_DEBUG,"thread: [t=0x%lx] done with startup pid=%li",
(long)t,(long)getpidtid()); (unsigned long)t,(long)getpidtid());
} }
// . now mark thread as ready for removal // . now mark thread as ready for removal
@ -2299,7 +2313,7 @@ void ThreadQueue::print ( ) {
// print it // print it
log(LOG_INIT,"thread: address=%lu pid=%u state=%lu " log(LOG_INIT,"thread: address=%lu pid=%u state=%lu "
"occ=%i done=%i lnch=%i", "occ=%i done=%i lnch=%i",
(long)t , t->m_pid , (unsigned long)t , t->m_pid ,
(unsigned long)t->m_state , t->m_isOccupied , t->m_isDone , (unsigned long)t->m_state , t->m_isOccupied , t->m_isDone ,
t->m_isLaunched ); t->m_isLaunched );
} }
@ -2411,18 +2425,8 @@ void ThreadQueue::removeThreads ( BigFile *bf ) {
// keep track // keep track
maxi = i; maxi = i;
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: enter thread callback2 type=%s",
getThreadType());
makeCallback ( t ); makeCallback ( t );
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: exit thread callback2 type=%s",
getThreadType());
} }
// do we have to decrement top // do we have to decrement top
if ( m_top == maxi + 1 ) if ( m_top == maxi + 1 )

View File

@ -1,5 +1,5 @@
// iana_charset.h // iana_charset.h
// Generated automatically by parse_iana_charsets.pl Wed Jul 31 00:10:12 2013 // Generated automatically by parse_iana_charsets.pl Fri Aug 30 03:13:59 2013
// DO NOT EDIT!!! // DO NOT EDIT!!!
#include "gb-include.h" #include "gb-include.h"

View File

@ -1,5 +1,5 @@
// iana_charset.h // iana_charset.h
// Generated automatically by parse_iana_charsets.pl Wed Jul 31 00:10:12 2013 // Generated automatically by parse_iana_charsets.pl Fri Aug 30 03:13:59 2013
// DO NOT EDIT!!! // DO NOT EDIT!!!
#ifndef IANA_CHARSET_H__ #ifndef IANA_CHARSET_H__