compiles again now

This commit is contained in:
Matt 2015-04-30 18:23:46 -07:00
parent df7dec9c74
commit f1663402d9
2 changed files with 49 additions and 13 deletions

View File

@ -522,15 +522,21 @@ void handleRequest7 ( UdpSlot *slot , int32_t netnice ) {
sendReply ( slot ); sendReply ( slot );
} }
void gotWarcContentWrapper ( void *state ) { void gotWarcContentWrapper ( void *state , TcpSocket *ts ) {
Msg7 *THIS = (Msg7 *)state; Msg7 *THIS = (Msg7 *)state;
// set content to that // set content to that
GigablastRequest *gr = &THIS->m_gr; GigablastRequest *gr = &THIS->m_gr;
gr->m_contentBuf.stealBuf (ts->m_readBuf, ts->m_readBufSize ); gr->m_contentBuf.setBuf (ts->m_readBuf,
ts->m_readBuf = NULL; ts->m_readBufSize ,
ts->m_readOffset ,
true , // ownBuf?
0 ); // encoding
// just ref it
gr->m_content = ts->m_readBuf; gr->m_content = ts->m_readBuf;
// so tcpserver.cpp doesn't free the ward/arc file
ts->m_readBuf = NULL;
// continue with injection // continue with injection
inject ( THIS->m_state , THIS->m_callback ); THIS->inject ( THIS->m_state , THIS->m_callback );
} }
// . returns false if blocked and callback will be called, true otherwise // . returns false if blocked and callback will be called, true otherwise
@ -583,19 +589,48 @@ bool Msg7::inject ( void *state ,
// get the normalized url // get the normalized url
u.set ( gr->m_url ); u.set ( gr->m_url );
if ( u.isWarc() ) char *ustr = u.getUrl();
int32_t ulen = u.getUrlLen();
char *uend = ustr + ulen;
m_isWarc = false;
m_isArc = false;
if ( ulen>8 && strncmp(uend-8,".warc.gz",8)==0 )
m_isWarc = true; m_isWarc = true;
if ( u.isArc () ) if ( ulen>8 && strncmp(uend-5,".warc" ,5)==0 )
m_isArc = true; m_isWarc = true;
if ( ulen>8 && strncmp(uend-7,".arc.gz",7)==0 )
m_isArc = true;
if ( ulen>8 && strncmp(uend-4,".arc" ,4)==0 )
m_isArc = true;
// if warc/arc download it and make gr->m_content reference it... // if warc/arc download it and make gr->m_content reference it...
// we won't handle redirects though. // we won't handle redirects though.
if ( ! content && ( m_isWarc || m_isArc) ) { if ( ! content && ( m_isWarc || m_isArc) ) {
// download the warc/arc url // download the warc/arc url
if ( ! g_httpServer.getDoc ( &u , if ( ! g_httpServer.getDoc ( ustr ,
this , 0 , // urlip
gotWarcContentWrapper ) ) 0 , // offset
// we blocked -1 ,
0,//r->m_ifModifiedSince ,
this , // state
gotWarcContentWrapper ,// callback
30*1000 , // 30 sec timeout
0 , // r->m_proxyIp ,
0 , // r->m_proxyPort ,
-1,//r->m_maxTextDocLen ,
-1,//r->m_maxOtherDocLen ,
NULL,//agent ,
DEFAULT_HTTP_PROTO , // "HTTP/1.0"
false , // doPost?
NULL , // cookie
NULL , // additionalHeader
NULL , // our own mime!
NULL , // postContent
NULL))//proxyUsernamePwdAuth ) )
// return false if blocked
return false; return false;
// error? // error?
log("inject: %s",mstrerror(g_errno)); log("inject: %s",mstrerror(g_errno));
@ -640,7 +675,7 @@ bool Msg7::inject ( void *state ,
// contains a mime, as a mime a level above that whose // contains a mime, as a mime a level above that whose
// content-length: field includes the original http reply mime // content-length: field includes the original http reply mime
// as part of its content. // as part of its content.
if ( u.isWarc() ) { // gr->m_containerContentType == CT_WARC ) { if ( m_isWarc ) { // gr->m_containerContentType == CT_WARC ) {
// no setting delim for this! // no setting delim for this!
if ( delim ) { char *xx=NULL;*xx=0; } if ( delim ) { char *xx=NULL;*xx=0; }
// should have the url as well // should have the url as well
@ -650,7 +685,7 @@ bool Msg7::inject ( void *state ,
if ( ! mm || ! mmend ) { if ( ! mm || ! mmend ) {
log("inject: warc: all done"); log("inject: warc: all done");
// XmlDoc.cpp checks for this to stop calling us // XmlDoc.cpp checks for this to stop calling us
m_isDoneInjecting = true; //m_isDoneInjecting = true;
return true; return true;
} }
char c = *mmend; char c = *mmend;

View File

@ -145,6 +145,7 @@ class GigablastRequest {
// older ones // older ones
uint32_t m_firstIndexed; // firstimdexed uint32_t m_firstIndexed; // firstimdexed
uint32_t m_lastSpidered; // lastspidered; uint32_t m_lastSpidered; // lastspidered;
SafeBuf m_contentBuf; // for holding a warc/arc file