Merge branch 'diffbot' into diffbot-testing

This commit is contained in:
Matt Wells 2015-12-04 09:03:16 -08:00
commit a3a7635dcf
8 changed files with 64 additions and 8 deletions

View File

@ -1014,7 +1014,7 @@ void printStackTrace ( int signum , siginfo_t *info , void *ptr ) {
// right now only works for 32 bit
//if ( arch != 32 ) return;
logf(LOG_DEBUG,"gb: seg fault. printing stack trace. use "
logf(LOG_DEBUG,"gb: Printing stack trace. use "
"'addr2line -e gb' to decode the hex below.");
if ( g_inMemFunction ) {
@ -1035,6 +1035,16 @@ void printStackTrace ( int signum , siginfo_t *info , void *ptr ) {
//,ba
//,g_profiler.getFnName(ba,0));
);
#ifdef INLINEDECODE
char cmd[256];
sprintf(cmd,"addr2line -e gb 0x%"XINT64" > ./tmpout"
,(uint64_t)s_bt[i]);
gbsystem ( cmd );
char obuf[1024];
SafeBuf fb (obuf,1024);
fb.load("./tmpout");
log("stack: %s",fb.getBufStart());
#endif
}
}
@ -1171,7 +1181,8 @@ void sigvtalrmHandler ( int x , siginfo_t *info , void *y ) {
//g_inSigHandler = true;
// NOT SAFE for pthreads cuz we're in sig handler
#ifndef PTHREADS
log("loop: missed quickpoll");
log("loop: missed quickpoll. Dumping stack.");
printStackTrace( x , info , y );
#endif
//g_inSigHandler = false;
// seems to core a lot in gbcompress() we need to
@ -1183,15 +1194,19 @@ void sigvtalrmHandler ( int x , siginfo_t *info , void *y ) {
}
// if it has been a while since heartbeat (> 10000ms) dump core so
// we can see where the process was... that is a missed quick poll?
// we can see where the process was... we are in a long niceness 0
// function or a niceness 1 function without a quickpoll, so that
// heartbeatWrapper() function never gets called.
if ( g_process.m_lastHeartbeatApprox == 0 ) return;
if ( g_conf.m_maxHeartbeatDelay <= 0 ) return;
if ( g_nowApprox - g_process.m_lastHeartbeatApprox >
g_conf.m_maxHeartbeatDelay ) {
#ifndef PTHREADS
logf(LOG_DEBUG,"gb: CPU seems blocked. Forcing core.");
logf(LOG_DEBUG,"gb: CPU seems blocked. Dumping stack.");
printStackTrace( x , info , y );
#endif
//char *xx=NULL; *xx=0;
}
//logf(LOG_DEBUG, "xxx now: %"INT64"! approx: %"INT64"", g_now, g_nowApprox);

View File

@ -1115,6 +1115,8 @@ bool Msg3::doneScanning ( ) {
m_lists[i].getListSize() ,
0 ); // timestamp. 0 = now
QUICKPOLL(m_niceness);
// if from our 'page' cache, no need to constrain
if ( ! m_lists[i].constrain ( m_startKey ,
m_constrainKey , // m_endKey

View File

@ -10548,7 +10548,7 @@ void Parms::init ( ) {
m->m_off = (char *)&g_conf.m_maxHeartbeatDelay - g;
m->m_type = TYPE_LONG;
m->m_def = "0";
m->m_flags = PF_HIDDEN | PF_NOSAVE;
m->m_flags = PF_CLONE; // PF_HIDDEN | PF_NOSAVE;
m->m_page = PAGE_MASTER;
m->m_obj = OBJ_CONF;
m++;

View File

@ -1175,8 +1175,12 @@ void heartbeatWrapper ( int fd , void *state ) {
// check the "cat /proc/<pid>/status | grep SigQ" output
// to see if its overflowed. hopefully i will fix this by
// queue the signals myself in Loop.cpp.
log("db: missed heartbeat by %"INT64" ms. Num elapsed alarms = "
"%"INT32"", elapsed-100,(int32_t)(g_numAlarms - s_lastNumAlarms));
log("db: missed calling niceness 0 heartbeatWrapper "
"function by %"INT64" ms. Either you need a quickpoll "
"somewhere or a niceness 0 function is taking too long. "
"Num elapsed alarms = "
"%"INT32"", elapsed-100,(int32_t)(g_numAlarms -
s_lastNumAlarms));
s_last = now;
s_lastNumAlarms = g_numAlarms;

View File

@ -1813,6 +1813,8 @@ void attemptMergeAll2 ( ) {
tryLoop:
QUICKPOLL(niceness);
// if a collection got deleted, reset this to 0
if ( s_lastCollnum >= g_collectiondb.m_numRecs ) {
s_lastCollnum = 0;

View File

@ -13838,6 +13838,8 @@ void gotCrawlInfoReply ( void *state , UdpSlot *slot ) {
// . TODO: do not update on error???
for ( ; ptr < end ; ptr++ ) {
QUICKPOLL ( slot->m_niceness );
// get collnum
collnum_t collnum = (collnum_t)(ptr->m_collnum);
@ -13903,6 +13905,12 @@ void gotCrawlInfoReply ( void *state , UdpSlot *slot ) {
// loop over
for ( int32_t x = 0 ; x < g_collectiondb.m_numRecs ; x++ ) {
QUICKPOLL ( slot->m_niceness );
// a niceness 0 routine could have nuked it?
if ( x >= g_collectiondb.m_numRecs )
break;
CollectionRec *cr = g_collectiondb.m_recs[x];
if ( ! cr ) continue;
@ -13925,6 +13933,7 @@ void gotCrawlInfoReply ( void *state , UdpSlot *slot ) {
if ( ! cia ) continue;
for ( int32_t k = 0 ; k < g_hostdb.m_numHosts; k++ ) {
QUICKPOLL ( slot->m_niceness );
// get the CrawlInfo for the ith host
CrawlInfo *stats = &cia[k];
// point to the stats for that host
@ -14205,7 +14214,7 @@ void handleRequestc1 ( UdpSlot *slot , int32_t niceness ) {
for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
QUICKPOLL(MAX_NICENESS);
QUICKPOLL(slot->m_niceness);
CollectionRec *cr = g_collectiondb.m_recs[i];
if ( ! cr ) continue;

View File

@ -1300,7 +1300,15 @@ bool ThreadQueue::timedCleanUp ( int32_t maxNiceness ) {
// . join up with that thread
// . damn, sometimes he can block forever on his
// call to sigqueue(),
int64_t startTime = gettimeofdayInMillisecondsLocal();
int64_t took;
int32_t status = pthread_join ( t->m_joinTid , NULL );
took = startTime - gettimeofdayInMillisecondsLocal();
if ( took > 50 ) {
log("threads: pthread_join took %i ms",
(int)took);
}
if ( status != 0 ) {
log("threads: pthread_join %"INT64" = %s (%"INT32")",
(int64_t)t->m_joinTid,mstrerror(status),

View File

@ -4577,6 +4577,22 @@ int32_t *XmlDoc::getIndexCode2 ( ) {
if ( m_recycleContent )
check = false;
// if &links was given in the diffbot api url then do not do
// spider time deduping because the pages are likely rendered using
// javascript, so they'd all seem to be dups of one another.
if ( cr->m_isCustomCrawl && check ) {
SafeBuf *au = getDiffbotApiUrl();
if ( ! au || au == (void *)-1 ) return (int32_t *)au;
char *linksParm = NULL;
if ( au->length() > 0 )
linksParm = strstr ( au->getBufStart() , "&links");
if ( linksParm && linksParm[6] && linksParm[6] != '&' )
linksParm = NULL;
if ( linksParm )
check = false;
}
if ( check ) {
// check inlinks now too!
LinkInfo *info1 = getLinkInfo1 ();