Merge branch 'master' of github.com:gigablast/open-source-search-engine

This commit is contained in:
mwells 2013-08-29 21:17:46 -06:00
commit 2e9c8f7c6e
8 changed files with 84 additions and 58 deletions

View File

@ -220,7 +220,7 @@ bool Log::logR ( long long now , long type , char *msg , bool asterisk ,
// thread id if in "thread"
if ( pid != s_pid && s_pid != -1 ) {
//sprintf ( p , "[%li] " , (long)getpid() );
sprintf ( p , "[%li] " , (long)pid );
sprintf ( p , "[%lu] " , (unsigned long)pid );
p += gbstrlen ( p );
}
// then message itself

View File

@ -833,7 +833,9 @@ void sigalrmHandler ( int x , siginfo_t *info , void *y ) {
// if we missed to many, then dump core
if ( g_niceness == 1 && g_missedQuickPolls >= 4 ) {
g_inSigHandler = true;
log("loop: missed quickpoll");
g_inSigHandler = false;
// seems to core a lot in gbcompress() we need to
// put a quickpoll into zlib deflate() or
// deflat_slot() or logest_match() function

View File

@ -670,7 +670,7 @@ Log.o: Log.cpp gb-include.h types.h fctypes.h Unicode.h \
UnicodeProperties.h UCPropTable.h iconv.h UCNormalizer.h hash.h Errno.h \
Log.h Mem.h Conf.h Xml.h XmlNode.h Lang.h Iso8859.h iana_charset.h \
File.h Loop.h ip.h Hostdb.h HttpRequest.h SafeBuf.h Url.h TcpSocket.h \
Collectiondb.h Process.h Msg28.h
Collectiondb.h Process.h Msg28.h Threads.h
Loop.o: Loop.cpp gb-include.h types.h fctypes.h Unicode.h \
UnicodeProperties.h UCPropTable.h iconv.h UCNormalizer.h hash.h Errno.h \
Log.h Loop.h Mem.h Conf.h Xml.h XmlNode.h Lang.h Iso8859.h \

View File

@ -3144,6 +3144,9 @@ void SpiderLoop::spiderDoledUrls ( ) {
// can't get spidered until the one that is doled does.
if ( g_conf.m_testSpiderEnabled ) maxSpiders = 6;
}
// debug log
if ( g_conf.m_logDebugSpider )
log("spider: has %li spiders out",m_sc->m_spidersOut);
// obey max spiders per collection too
if ( m_sc->m_spidersOut >= maxSpiders ) continue;
// ok, we are good to launch a spider for coll m_cri
@ -3245,8 +3248,12 @@ void SpiderLoop::spiderDoledUrls ( ) {
m_gettingDoledbList = true;
// log this now
//if ( g_conf.m_logDebugSpider )
// logf(LOG_DEBUG,"spider: loading list from doledb");
if ( g_conf.m_logDebugSpider ) {
m_doleStart = gettimeofdayInMillisecondsLocal();
// 12 byte doledb keys
logf(LOG_DEBUG,"spider: loading list from doledb startkey=%s",
KEYSTR(&m_sc->m_nextDoledbKey,12));
}
// get a spider rec for us to spider from doledb
if ( ! m_msg5.getList ( RDB_DOLEDB ,
@ -3260,6 +3267,7 @@ void SpiderLoop::spiderDoledUrls ( ) {
// we need to read in a lot because we call
// "goto listLoop" below if the url we want
// to dole is locked.
// seems like a ton of negative recs
2000 , // minRecSizes
true , // includeTree
false , // addToCache
@ -3315,6 +3323,16 @@ bool SpiderLoop::gotDoledbList2 ( ) {
// unlock
m_gettingDoledbList = false;
// log this now
if ( g_conf.m_logDebugSpider ) {
long long now = gettimeofdayInMillisecondsLocal();
long long took = now - m_doleStart;
logf(LOG_DEBUG,"spider: GOT list from doledb in %llims "
"size=%li bytes",
took,m_list.getListSize());
}
// bail instantly if in read-only mode (no RdbTrees!)
if ( g_conf.m_readOnlyMode ) return false;
// or if doing a daily merge

View File

@ -1126,6 +1126,8 @@ class SpiderLoop {
// for round robining in SpiderLoop::doleUrls(), etc.
long m_cri;
long long m_doleStart;
long m_processed;
};

View File

@ -702,8 +702,9 @@ ThreadEntry *ThreadQueue::addEntry ( long niceness ,
if ( i == m_top ) m_top++;
// debug msg
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: [%lu] queued %s thread. "
"niceness=%lu. ", (long)t,getThreadType(), niceness );
log(LOG_DEBUG,"thread: [t=0x%lx] queued %s thread for launch. "
"niceness=%lu. ", (unsigned long)t,
getThreadType(), niceness );
// success
return t;
}
@ -889,6 +890,12 @@ bool ThreadQueue::timedCleanUp ( long maxNiceness ) {
log("threads: pthread_join %li = %s (%li)",
(long)t->m_joinTid,mstrerror(status),status);
}
// debug msg
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: joined1 with t=0x%lx "
"jointid=0x%lx.",
(long)t,(long)t->m_joinTid);
#else
again:
@ -897,8 +904,8 @@ bool ThreadQueue::timedCleanUp ( long maxNiceness ) {
int err = errno;
// debug the waitpid
if ( g_conf.m_logDebugThread || g_process.m_exiting )
log(LOG_DEBUG,"thread: Waiting for t=%lu pid=%li.",
(long)t,(long)t->m_pid);
log(LOG_DEBUG,"thread: Waiting for t=0x%lx pid=%li.",
(unsigned long)t,(long)t->m_pid);
// bitch and continue if join failed
if ( pid != t->m_pid ) {
// waitpid() gets interrupted by various signals so
@ -924,14 +931,14 @@ bool ThreadQueue::timedCleanUp ( long maxNiceness ) {
// re-protect this stack
mprotect ( t->m_stack + GUARDSIZE , STACK_SIZE - GUARDSIZE,
PROT_NONE );
#endif
// debug msg
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: joined with pid=%li pid=%li.",
(long)t->m_pid,(long)t->m_pid);
#endif
// we may get cleaned up and re-used and our niceness reassignd
// right after set m_isDone to true, so save niceness
long niceness = t->m_niceness;
@ -1025,18 +1032,8 @@ bool ThreadQueue::timedCleanUp ( long maxNiceness ) {
//only allow a quickpoll if we are nice.
//g_loop.canQuickPoll(t->m_niceness);
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: enter thread callback type=%s "
"nice=%li",getThreadType(),(long)t->m_niceness);
makeCallback ( t );
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: exit thread callback type=%s "
"nice=%li", getThreadType(),(long)t->m_niceness);
//long long took = gettimeofdayInMilliseconds()-startTime;
//if(took > 8 && maxNiceness > 0) {
// if(g_conf.m_sequentialProfiling)
@ -1053,13 +1050,13 @@ bool ThreadQueue::timedCleanUp ( long maxNiceness ) {
if ( g_conf.m_logDebugThread ) {
long long now = gettimeofdayInMilliseconds();
log(LOG_DEBUG,"thread: [%lu] %s done. "
log(LOG_DEBUG,"thread: [t=0x%lx] %s done1. "
"active=%li "
"time since queued = %llu ms "
"time since launch = %llu ms "
"time since pre-exit = %llu ms "
"time since exit = %llu ms",
(long)t,
(unsigned long)t,
getThreadType() ,
(long)(m_launched - m_returned) ,
now - t->m_queuedTime,
@ -1087,6 +1084,17 @@ void makeCallback ( ThreadEntry *t ) {
// save it
long saved = g_niceness;
// log it now
if ( g_conf.m_logDebugLoop || g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: enter thread callback t=0x%lx "
//"type=%s "
"state=0x%lx "
"nice=%li",
(long)t,
//getThreadType(),
(long)t->m_state,
(long)t->m_niceness);
// time it?
long long start;
if ( g_conf.m_maxCallbackDelay >= 0 )
@ -1109,6 +1117,16 @@ void makeCallback ( ThreadEntry *t ) {
}
// log it now
if ( g_conf.m_logDebugLoop || g_conf.m_logDebugThread )
log(LOG_DEBUG,"loop: exit thread callback t=0x%lx "
//"type=%s "
"nice=%li",
(long)t,
//getThreadType(),
(long)t->m_niceness);
// restore global niceness
g_niceness = saved;
@ -1199,6 +1217,12 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , long maxNiceness ) {
log("threads: pthread_join2 %li = %s (%li)",
(long)t->m_joinTid,mstrerror(status),status);
}
// debug msg
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: joined2 with t=0x%lx "
"jointid=0x%lx.",
(long)t,(long)t->m_joinTid);
#else
again:
@ -1207,7 +1231,7 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , long maxNiceness ) {
int err = errno;
// debug the waitpid
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: Waiting for t=%lu pid=%li.",
log(LOG_DEBUG,"thread: Waiting for t=0x%lx pid=%li.",
(long)t,(long)t->m_pid);
// bitch and continue if join failed
if ( pid != t->m_pid ) {
@ -1362,21 +1386,10 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , long maxNiceness ) {
//g_threads.launchThreads();
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: enter thread callback type=%s",
getThreadType());
g_errno = 0;
makeCallback ( t );
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: exit thread callback type=%s",
getThreadType());
// long long took = gettimeofdayInMilliseconds()-startTime;
// if(took > 8 && maxNiceness > 0) {
// if(g_conf.m_sequentialProfiling)
@ -1393,13 +1406,13 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , long maxNiceness ) {
if ( g_conf.m_logDebugThread ) {
long long now = gettimeofdayInMilliseconds();
log(LOG_DEBUG,"thread: [%lu] %s done. "
log(LOG_DEBUG,"thread: [t=0x%lx] %s done2. "
"active=%li "
"time since queued = %llu ms "
"time since launch = %llu ms "
"time since pre-exit = %llu ms "
"time since exit = %llu ms",
(long)t,
(unsigned long)t,
getThreadType() ,
(long)(m_launched - m_returned) ,
now - t->m_queuedTime,
@ -1438,13 +1451,13 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , long maxNiceness ) {
if ( g_conf.m_logDebugThread ) {
long long now = gettimeofdayInMilliseconds();
for ( long i = 0 ; i < numCallbacks ; i++ )
log(LOG_DEBUG,"thread: [%lu] %s done. "
log(LOG_DEBUG,"thread: [tid=%lu] %s done3. "
"active=%li "
"time since queued = %llu ms "
"time since launch = %llu ms "
"time since pre-exit = %llu ms "
"time since exit = %llu ms",
(long)tids[i],
(unsigned long)tids[i],
getThreadType() ,
(long)(m_launched - m_returned) ,
now - times [i],
@ -1923,9 +1936,10 @@ bool ThreadQueue::launchThread ( ThreadEntry *te ) {
if ( g_conf.m_logDebugThread ) {
active = m_launched - m_returned ;
long long now = gettimeofdayInMilliseconds();
log(LOG_DEBUG,"thread: [%lu] launched %s thread. active=%lli "
log(LOG_DEBUG,"thread: [t=0x%lx] launched %s thread. "
"active=%lli "
"niceness=%lu. waited %llu ms in queue.",
(long)t, getThreadType(), active, realNiceness,
(unsigned long)t, getThreadType(), active, realNiceness,
now - t->m_queuedTime);
}
// be lazy with this since it uses a significant amount of cpu
@ -1998,7 +2012,7 @@ bool ThreadQueue::launchThread ( ThreadEntry *te ) {
// we're back from pthread_create
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: Back from clone t=%lu pid=%li.",
log(LOG_DEBUG,"thread: Back from clone t=0x%lx pid=%li.",
(long)t,(long)pid);
@ -2162,8 +2176,8 @@ int startUp ( void *state ) {
//t->m_tid = pthread_self();
// debug
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: [%lu] in startup pid=%li pppid=%li",
(long)t,(long)getpidtid(),(long)getppid());
log(LOG_DEBUG,"thread: [t=0x%lx] in startup pid=%li pppid=%li",
(unsigned long)t,(long)getpidtid(),(long)getppid());
// debug msg
//fprintf(stderr,"new thread tid=%li pid=%li\n",
// (long)t->m_tid,(long)t->m_pid);
@ -2219,8 +2233,8 @@ int startUp ( void *state ) {
t->m_exitTime = now;
if ( g_conf.m_logDebugThread ) {
log(LOG_DEBUG,"thread: [%lu] done with startup pid=%li",
(long)t,(long)getpidtid());
log(LOG_DEBUG,"thread: [t=0x%lx] done with startup pid=%li",
(unsigned long)t,(long)getpidtid());
}
// . now mark thread as ready for removal
@ -2299,7 +2313,7 @@ void ThreadQueue::print ( ) {
// print it
log(LOG_INIT,"thread: address=%lu pid=%u state=%lu "
"occ=%i done=%i lnch=%i",
(long)t , t->m_pid ,
(unsigned long)t , t->m_pid ,
(unsigned long)t->m_state , t->m_isOccupied , t->m_isDone ,
t->m_isLaunched );
}
@ -2411,18 +2425,8 @@ void ThreadQueue::removeThreads ( BigFile *bf ) {
// keep track
maxi = i;
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: enter thread callback2 type=%s",
getThreadType());
makeCallback ( t );
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: exit thread callback2 type=%s",
getThreadType());
}
// do we have to decrement top
if ( m_top == maxi + 1 )

View File

@ -1,5 +1,5 @@
// iana_charset.h
// Generated automatically by parse_iana_charsets.pl Wed Jul 31 00:10:12 2013
// Generated automatically by parse_iana_charsets.pl Fri Aug 30 03:13:59 2013
// DO NOT EDIT!!!
#include "gb-include.h"

View File

@ -1,5 +1,5 @@
// iana_charset.h
// Generated automatically by parse_iana_charsets.pl Wed Jul 31 00:10:12 2013
// Generated automatically by parse_iana_charsets.pl Fri Aug 30 03:13:59 2013
// DO NOT EDIT!!!
#ifndef IANA_CHARSET_H__