mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 04:07:13 +03:00
Merge branch 'diffbot' into diffbot-testing
This commit is contained in:
commit
a3a7635dcf
23
Loop.cpp
23
Loop.cpp
@ -1014,7 +1014,7 @@ void printStackTrace ( int signum , siginfo_t *info , void *ptr ) {
|
||||
// right now only works for 32 bit
|
||||
//if ( arch != 32 ) return;
|
||||
|
||||
logf(LOG_DEBUG,"gb: seg fault. printing stack trace. use "
|
||||
logf(LOG_DEBUG,"gb: Printing stack trace. use "
|
||||
"'addr2line -e gb' to decode the hex below.");
|
||||
|
||||
if ( g_inMemFunction ) {
|
||||
@ -1035,6 +1035,16 @@ void printStackTrace ( int signum , siginfo_t *info , void *ptr ) {
|
||||
//,ba
|
||||
//,g_profiler.getFnName(ba,0));
|
||||
);
|
||||
#ifdef INLINEDECODE
|
||||
char cmd[256];
|
||||
sprintf(cmd,"addr2line -e gb 0x%"XINT64" > ./tmpout"
|
||||
,(uint64_t)s_bt[i]);
|
||||
gbsystem ( cmd );
|
||||
char obuf[1024];
|
||||
SafeBuf fb (obuf,1024);
|
||||
fb.load("./tmpout");
|
||||
log("stack: %s",fb.getBufStart());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@ -1171,7 +1181,8 @@ void sigvtalrmHandler ( int x , siginfo_t *info , void *y ) {
|
||||
//g_inSigHandler = true;
|
||||
// NOT SAFE for pthreads cuz we're in sig handler
|
||||
#ifndef PTHREADS
|
||||
log("loop: missed quickpoll");
|
||||
log("loop: missed quickpoll. Dumping stack.");
|
||||
printStackTrace( x , info , y );
|
||||
#endif
|
||||
//g_inSigHandler = false;
|
||||
// seems to core a lot in gbcompress() we need to
|
||||
@ -1183,15 +1194,19 @@ void sigvtalrmHandler ( int x , siginfo_t *info , void *y ) {
|
||||
}
|
||||
|
||||
// if it has been a while since heartbeat (> 10000ms) dump core so
|
||||
// we can see where the process was... that is a missed quick poll?
|
||||
// we can see where the process was... we are in a long niceness 0
|
||||
// function or a niceness 1 function without a quickpoll, so that
|
||||
// heartbeatWrapper() function never gets called.
|
||||
if ( g_process.m_lastHeartbeatApprox == 0 ) return;
|
||||
if ( g_conf.m_maxHeartbeatDelay <= 0 ) return;
|
||||
if ( g_nowApprox - g_process.m_lastHeartbeatApprox >
|
||||
g_conf.m_maxHeartbeatDelay ) {
|
||||
#ifndef PTHREADS
|
||||
logf(LOG_DEBUG,"gb: CPU seems blocked. Forcing core.");
|
||||
logf(LOG_DEBUG,"gb: CPU seems blocked. Dumping stack.");
|
||||
printStackTrace( x , info , y );
|
||||
#endif
|
||||
//char *xx=NULL; *xx=0;
|
||||
|
||||
}
|
||||
|
||||
//logf(LOG_DEBUG, "xxx now: %"INT64"! approx: %"INT64"", g_now, g_nowApprox);
|
||||
|
2
Msg3.cpp
2
Msg3.cpp
@ -1115,6 +1115,8 @@ bool Msg3::doneScanning ( ) {
|
||||
m_lists[i].getListSize() ,
|
||||
0 ); // timestamp. 0 = now
|
||||
|
||||
QUICKPOLL(m_niceness);
|
||||
|
||||
// if from our 'page' cache, no need to constrain
|
||||
if ( ! m_lists[i].constrain ( m_startKey ,
|
||||
m_constrainKey , // m_endKey
|
||||
|
@ -10548,7 +10548,7 @@ void Parms::init ( ) {
|
||||
m->m_off = (char *)&g_conf.m_maxHeartbeatDelay - g;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "0";
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_flags = PF_CLONE; // PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_MASTER;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m++;
|
||||
|
@ -1175,8 +1175,12 @@ void heartbeatWrapper ( int fd , void *state ) {
|
||||
// check the "cat /proc/<pid>/status | grep SigQ" output
|
||||
// to see if its overflowed. hopefully i will fix this by
|
||||
// queue the signals myself in Loop.cpp.
|
||||
log("db: missed heartbeat by %"INT64" ms. Num elapsed alarms = "
|
||||
"%"INT32"", elapsed-100,(int32_t)(g_numAlarms - s_lastNumAlarms));
|
||||
log("db: missed calling niceness 0 heartbeatWrapper "
|
||||
"function by %"INT64" ms. Either you need a quickpoll "
|
||||
"somewhere or a niceness 0 function is taking too long. "
|
||||
"Num elapsed alarms = "
|
||||
"%"INT32"", elapsed-100,(int32_t)(g_numAlarms -
|
||||
s_lastNumAlarms));
|
||||
s_last = now;
|
||||
s_lastNumAlarms = g_numAlarms;
|
||||
|
||||
|
2
Rdb.cpp
2
Rdb.cpp
@ -1813,6 +1813,8 @@ void attemptMergeAll2 ( ) {
|
||||
|
||||
tryLoop:
|
||||
|
||||
QUICKPOLL(niceness);
|
||||
|
||||
// if a collection got deleted, reset this to 0
|
||||
if ( s_lastCollnum >= g_collectiondb.m_numRecs ) {
|
||||
s_lastCollnum = 0;
|
||||
|
11
Spider.cpp
11
Spider.cpp
@ -13838,6 +13838,8 @@ void gotCrawlInfoReply ( void *state , UdpSlot *slot ) {
|
||||
// . TODO: do not update on error???
|
||||
for ( ; ptr < end ; ptr++ ) {
|
||||
|
||||
QUICKPOLL ( slot->m_niceness );
|
||||
|
||||
// get collnum
|
||||
collnum_t collnum = (collnum_t)(ptr->m_collnum);
|
||||
|
||||
@ -13903,6 +13905,12 @@ void gotCrawlInfoReply ( void *state , UdpSlot *slot ) {
|
||||
// loop over
|
||||
for ( int32_t x = 0 ; x < g_collectiondb.m_numRecs ; x++ ) {
|
||||
|
||||
QUICKPOLL ( slot->m_niceness );
|
||||
|
||||
// a niceness 0 routine could have nuked it?
|
||||
if ( x >= g_collectiondb.m_numRecs )
|
||||
break;
|
||||
|
||||
CollectionRec *cr = g_collectiondb.m_recs[x];
|
||||
if ( ! cr ) continue;
|
||||
|
||||
@ -13925,6 +13933,7 @@ void gotCrawlInfoReply ( void *state , UdpSlot *slot ) {
|
||||
if ( ! cia ) continue;
|
||||
|
||||
for ( int32_t k = 0 ; k < g_hostdb.m_numHosts; k++ ) {
|
||||
QUICKPOLL ( slot->m_niceness );
|
||||
// get the CrawlInfo for the ith host
|
||||
CrawlInfo *stats = &cia[k];
|
||||
// point to the stats for that host
|
||||
@ -14205,7 +14214,7 @@ void handleRequestc1 ( UdpSlot *slot , int32_t niceness ) {
|
||||
|
||||
for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
|
||||
|
||||
QUICKPOLL(MAX_NICENESS);
|
||||
QUICKPOLL(slot->m_niceness);
|
||||
|
||||
CollectionRec *cr = g_collectiondb.m_recs[i];
|
||||
if ( ! cr ) continue;
|
||||
|
@ -1300,7 +1300,15 @@ bool ThreadQueue::timedCleanUp ( int32_t maxNiceness ) {
|
||||
// . join up with that thread
|
||||
// . damn, sometimes he can block forever on his
|
||||
// call to sigqueue(),
|
||||
int64_t startTime = gettimeofdayInMillisecondsLocal();
|
||||
int64_t took;
|
||||
int32_t status = pthread_join ( t->m_joinTid , NULL );
|
||||
took = startTime - gettimeofdayInMillisecondsLocal();
|
||||
if ( took > 50 ) {
|
||||
log("threads: pthread_join took %i ms",
|
||||
(int)took);
|
||||
}
|
||||
|
||||
if ( status != 0 ) {
|
||||
log("threads: pthread_join %"INT64" = %s (%"INT32")",
|
||||
(int64_t)t->m_joinTid,mstrerror(status),
|
||||
|
16
XmlDoc.cpp
16
XmlDoc.cpp
@ -4577,6 +4577,22 @@ int32_t *XmlDoc::getIndexCode2 ( ) {
|
||||
if ( m_recycleContent )
|
||||
check = false;
|
||||
|
||||
// if &links was given in the diffbot api url then do not do
|
||||
// spider time deduping because the pages are likely rendered using
|
||||
// javascript, so they'd all seem to be dups of one another.
|
||||
if ( cr->m_isCustomCrawl && check ) {
|
||||
SafeBuf *au = getDiffbotApiUrl();
|
||||
if ( ! au || au == (void *)-1 ) return (int32_t *)au;
|
||||
char *linksParm = NULL;
|
||||
if ( au->length() > 0 )
|
||||
linksParm = strstr ( au->getBufStart() , "&links");
|
||||
if ( linksParm && linksParm[6] && linksParm[6] != '&' )
|
||||
linksParm = NULL;
|
||||
if ( linksParm )
|
||||
check = false;
|
||||
}
|
||||
|
||||
|
||||
if ( check ) {
|
||||
// check inlinks now too!
|
||||
LinkInfo *info1 = getLinkInfo1 ();
|
||||
|
Loading…
Reference in New Issue
Block a user