fix a couple of cores happening on crawlbot.

fix bug of a urls.csv or other streaming download
being truncated because gb thinks a shard is down.
even if it is down, wait for it to come back up.
This commit is contained in:
Matt Wells 2015-11-30 13:26:43 -08:00
parent ec5c38bab5
commit fc4731b11c
3 changed files with 41 additions and 21 deletions

View File

@ -1778,8 +1778,8 @@ bool HttpServer::sendSuccessReply ( TcpSocket *s , char format, char *addMsg) {
else now = getTimeLocal();
// . buffer for the MIME request and brief html err msg
// . NOTE: ctime appends a \n to the time, so we don't need to
char msg[1024];
SafeBuf sb(msg,1024,0,false);
char msg[1524];
SafeBuf sb(msg,1524,0,false);
char *tt = asctime(gmtime ( &now ));
tt [ gbstrlen(tt) - 1 ] = '\0';
@ -1838,7 +1838,7 @@ bool HttpServer::sendSuccessReply ( TcpSocket *s , char format, char *addMsg) {
// use this new function that will compress the reply now if the
// request was a ZET instead of a GET
return sendReply2 ( msg , sb.length() , NULL , 0 , s );
return sendReply2 ( sb.getBufStart(), sb.length() , NULL , 0 , s );
}
bool HttpServer::sendErrorReply ( GigablastRequest *gr ) {
@ -1851,8 +1851,8 @@ bool HttpServer::sendErrorReply ( GigablastRequest *gr ) {
else now = getTimeLocal();
int32_t format = gr->m_hr.getReplyFormat();
char msg[1024];
SafeBuf sb(msg,1024,0,false);
char msg[1524];
SafeBuf sb(msg,1524,0,false);
char *tt = asctime(gmtime ( &now ));
tt [ gbstrlen(tt) - 1 ] = '\0';
@ -1904,7 +1904,7 @@ bool HttpServer::sendErrorReply ( GigablastRequest *gr ) {
// use this new function that will compress the reply now if the
// request was a ZET instead of a GET
return sendReply2 ( msg , sb.length() , NULL , 0 , gr->m_socket );
return sendReply2 ( sb.getBufStart(),sb.length(),NULL,0,gr->m_socket );
}
// . send an error reply, like "HTTP/1.1 404 Not Found"
@ -1931,8 +1931,8 @@ bool HttpServer::sendErrorReply ( TcpSocket *s , int32_t error , char *errmsg ,
// . buffer for the MIME request and brief html err msg
// . NOTE: ctime appends a \n to the time, so we don't need to
char msg[1024];
SafeBuf sb(msg,1024,0,false);
char msg[1524];
SafeBuf sb(msg,1524,0,false);
// if it's a 404, redirect to home page
/*
if ( error == 404 )
@ -2000,8 +2000,8 @@ bool HttpServer::sendErrorReply ( TcpSocket *s , int32_t error , char *errmsg ,
// record it
if ( bytesSent ) *bytesSent = sb.length();//sendBufSize;
// use this new function that will compress the reply now if the
// request was a ZET instead of a GET
return sendReply2 ( msg , sb.length() , NULL , 0 , s );
// request was a ZET instead of a GET mdw
return sendReply2 ( sb.getBufStart() , sb.length() , NULL , 0 , s );
/*
// . this returns false if blocked, true otherwise

View File

@ -109,6 +109,8 @@ Msg40::Msg40() {
m_printCount = 0;
//m_numGigabitInfos = 0;
m_numCollsToSearch = 0;
m_numMsg20sIn = 0;
m_numMsg20sOut = 0;
}
#define MAX2 50
@ -1500,14 +1502,27 @@ bool Msg40::launchMsg20s ( bool recalled ) {
int64_t docId = m_msg3a.m_docIds[i];
uint32_t shardNum = g_hostdb.getShardNumFromDocId ( docId );
if ( g_hostdb.isShardDead ( shardNum ) ) {
log("msg40: skipping summary lookup #%"INT32" of "
"docid %"INT64" for dead shard #%"INT32""
, i
, docId
, shardNum );
m_numRequests++;
m_numReplies++;
continue;
CollectionRec *cr ;
cr = g_collectiondb.getRec(m_firstCollnum);
if ( cr &&
// diffbot urls.csv downloads often encounter dead
// hosts that are not really dead, so wait for it
! cr->m_isCustomCrawl &&
// this is causing us to truncate streamed results
// too early when we have false positives that a
// host is dead because the server is locking up
// periodically
! m_si->m_streamResults ) {
log("msg40: skipping summary "
"lookup #%"INT32" of "
"docid %"INT64" for dead shard #%"INT32""
, i
, docId
, shardNum );
m_numRequests++;
m_numReplies++;
continue;
}
}
@ -2215,12 +2230,11 @@ bool Msg40::gotSummary ( ) {
complete:
// . ok, now i wait for everybody.
// . ok, now i wait for all msg20s (getsummary) to come back in.
// . TODO: evaluate if this hurts us
if ( m_numReplies < m_numRequests )
return false;
// if streaming results, we are done
if ( m_si && m_si->m_streamResults ) {
// unless waiting for last transmit to complete

View File

@ -8300,7 +8300,13 @@ bool SpiderLoop::spiderUrl2 ( ) {
// count it as a hit
//g_stats.m_spiderUrlsHit++;
// sanity check
if (m_sreq->m_priority <= -1 ) { char *xx=NULL;*xx=0; }
if (m_sreq->m_priority <= -1 ) {
log("spider: fixing bogus spider req priority of %i for "
"url %s",
(int)m_sreq->m_priority,m_sreq->m_url);
m_sreq->m_priority = 0;
//char *xx=NULL;*xx=0;
}
//if(m_sreq->m_priority >= MAX_SPIDER_PRIORITIES){char *xx=NULL;*xx=0;}
// update this
m_sc->m_outstandingSpiders[(unsigned char)m_sreq->m_priority]++;