fix mem leak of LinkInfo.

fixed json output from injecting url.
This commit is contained in:
Matt Wells 2013-10-16 17:17:28 -07:00
parent 70c4ef682d
commit 74c2742ced
5 changed files with 58 additions and 23 deletions

View File

@ -415,8 +415,9 @@ void Msg25::reset() {
m_numReplyPtrs = 0;
// . free the linkinfo if we are responsible for it
// . if someone "steals" it from us, they should set this to NULL
if ( m_linkInfo )
mfree ( m_linkInfo , m_linkInfo->getStoredSize(),"msg25s");
//if ( m_linkInfo )
// mfree ( m_linkInfo , m_linkInfo->getStoredSize(),"msg25s");
// this now points into m_linkInfoBuf safebuf, just NULL it
m_linkInfo = NULL;
m_table.reset();
@ -468,7 +469,8 @@ bool Msg25::getLinkInfo ( char *site ,
bool onlyNeedGoodInlinks ,
bool getLinkerTitles ,
long ourHostHash32 ,
long ourDomHash32 ) {
long ourDomHash32 ,
SafeBuf *linkInfoBuf ) {
// reset the ip table
reset();
@ -483,6 +485,9 @@ bool Msg25::getLinkInfo ( char *site ,
if ( ! coll ) { char *xx=NULL; *xx=0; }
m_onlyNeedGoodInlinks = onlyNeedGoodInlinks;
m_getLinkerTitles = getLinkerTitles;
// save safebuf ptr, where we store the link info
m_linkInfoBuf = linkInfoBuf;
if ( ! linkInfoBuf ) { char *xx=NULL;*xx=0; }
// sanity check
if ( m_mode == MODE_PAGELINKINFO && ! docId ) {char *xx=NULL; *xx=0; }
// must have a valid ip
@ -1903,7 +1908,8 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) {
m_lastUpdateTime ,
m_onlyNeedGoodInlinks ,
m_niceness ,
this );
this ,
m_linkInfoBuf );
// return true with g_errno set on error
if ( ! m_linkInfo ) {
log("build: msg25 linkinfo set: %s",mstrerror(g_errno));
@ -3182,7 +3188,8 @@ LinkInfo *makeLinkInfo ( char *coll ,
long lastUpdateTime ,
bool onlyNeedGoodInlinks ,
long niceness ,
Msg25 *msg25 ) {
Msg25 *msg25 ,
SafeBuf *linkInfoBuf ) {
// for parsing the link text
Words words;
@ -3449,10 +3456,11 @@ LinkInfo *makeLinkInfo ( char *coll ,
// we need space for our header
need += sizeof(LinkInfo);
// alloc the buffer
char *buf = (char *)mmalloc ( need,"LinkInfo");
if ( ! buf ) return NULL;
//char *buf = (char *)mmalloc ( need,"LinkInfo");
//if ( ! buf ) return NULL;
if ( ! linkInfoBuf->reserve ( need , "LinkInfo" ) ) return NULL;
// set ourselves to this new buffer
LinkInfo *info = (LinkInfo *)buf;
LinkInfo *info = (LinkInfo *)(linkInfoBuf->getBufStart());
// set our header
info->m_version = 0;
@ -3487,7 +3495,7 @@ LinkInfo *makeLinkInfo ( char *coll ,
// point to our buf
char *p = info->m_buf;
char *pend = buf + need;
char *pend = linkInfoBuf->getBufStart() + need;
// count the ones we store that are internal
long icount3 = 0;
// now set each inlink

View File

@ -341,7 +341,8 @@ class Msg25 {
// to not perform this algo in handleRequest20()'s
// call to XmlDoc::getMsg20Reply().
long ourHostHash32 , // = 0 ,
long ourDomHash32 ); // = 0 );
long ourDomHash32 , // = 0 );
SafeBuf *myLinkInfoBuf );
Msg25();
~Msg25();
void reset();
@ -370,6 +371,9 @@ class Msg25 {
class LinkInfo *getLinkInfo () { return m_linkInfo; };
// m_linkInfo ptr references into here. provided by caller.
SafeBuf *m_linkInfoBuf;
// private:
// these need to be public for wrappers to call:
bool gotTermFreq ( bool msg42Called ) ;
@ -886,7 +890,8 @@ LinkInfo *makeLinkInfo ( char *coll ,
long lastUpdateTime ,
bool onlyNeedGoodInlinks ,
long niceness ,
class Msg25 *msg25 ) ;
class Msg25 *msg25 ,
SafeBuf *linkInfoBuf ) ;
// . set from the Msg20 replies in MsgE
// . Msg20 uses this to set the LinkInfo class to the "outlinks"

View File

@ -2119,7 +2119,7 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
if ( ! name ) {
log("crawlbot: no crawl name given");
char *msg = "invalid or missing \"name\"";
char *msg = "invalid or missing name";
return sendErrorReply2 (socket,fmt,msg);
}
@ -2548,13 +2548,16 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
if ( fmt == FMT_HTML )
sb.safePrintf ( "</center><br/>" );
// the ROOT JSON {
if ( fmt == FMT_JSON )
sb.safePrintf("{\n");
if ( fmt == FMT_JSON && injectionResponse )
sb.safePrintf("{\"seedResponse\":\"%s\"},\n\n"
sb.safePrintf("\"seedResponse\":\"%s\",\n\n"
, injectionResponse->getBufStart() );
if ( fmt == FMT_JSON && urlUploadResponse )
sb.safePrintf("{\"addUrlsResponse\":\"%s\"},\n\n"
sb.safePrintf("\"addUrlsResponse\":\"%s\",\n\n"
, urlUploadResponse->getBufStart() );
@ -2565,7 +2568,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
//////
if ( fmt == FMT_JSON )
sb.safePrintf("{\"crawls\":[");//\"collections\":");
sb.safePrintf("\"crawls\":[");//\"collections\":");
long summary = hr->getLong("summary",0);
// enter summary mode for json
@ -2692,7 +2695,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
if ( fmt == FMT_JSON )
// end the array of collection objects
sb.safePrintf("\n]}\n");
sb.safePrintf("\n]\n");
///////
//
@ -3363,6 +3366,9 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
}
// the ROOT JSON }
if ( fmt == FMT_JSON )
sb.safePrintf("}\n");
char *ct = "text/html";
if ( fmt == FMT_JSON ) ct = "application/json";

View File

@ -180,6 +180,10 @@ static long long s_lastTimeStart = 0LL;
void XmlDoc::reset ( ) {
m_mySiteLinkInfoBuf.purge();
m_myPageLinkInfoBuf.purge();
m_myTempLinkInfoBuf.purge();
// reset count for nukeJSONObjects() function
m_joc = 0;
@ -489,12 +493,14 @@ void XmlDoc::reset ( ) {
if ( m_ahrefsDocValid ) nukeDoc ( m_ahrefsDoc );
if ( m_linkInfo1Valid && ptr_linkInfo1 && m_freeLinkInfo1 ) {
mfree ( ptr_linkInfo1 , size_linkInfo1, "LinkInfo1");
// it now points into m_myPageLinkInfoBuf !
//mfree ( ptr_linkInfo1 , size_linkInfo1, "LinkInfo1");
ptr_linkInfo1 = NULL;
m_linkInfo1Valid = false;
}
if ( m_linkInfo2Valid && ptr_linkInfo2 && m_freeLinkInfo2 ) {
mfree ( ptr_linkInfo2 , size_linkInfo2, "LinkInfo2");
// should point into a safebuf as well
//mfree ( ptr_linkInfo2 , size_linkInfo2, "LinkInfo2");
ptr_linkInfo2 = NULL;
m_linkInfo1Valid = false;
}
@ -11302,7 +11308,9 @@ LinkInfo *XmlDoc::getSiteLinkInfo() {
setStatus ( "getting site link info" );
if ( m_siteLinkInfoValid ) return m_msg25.m_linkInfo;
if ( m_siteLinkInfoValid )
//return msg25.m_linkInfo;
return (LinkInfo *)m_mySiteLinkInfoBuf.getBufStart();
char *mysite = getSite();
if ( ! mysite || mysite == (void *)-1 ) return (LinkInfo *)mysite;
long *fip = getFirstIp();
@ -11366,7 +11374,9 @@ LinkInfo *XmlDoc::getSiteLinkInfo() {
onlyNeedGoodInlinks ,
false,
0,
0) )
0,
// it will store the linkinfo into this safebuf
&m_mySiteLinkInfoBuf) )
// return -1 if it blocked
return (LinkInfo *)-1;
// sanity check
@ -12171,7 +12181,8 @@ LinkInfo *XmlDoc::getLinkInfo1 ( ) {
onlyNeedGoodInlinks ,
false, // getlinkertitles
0, // ourhosthash32 (special)
0 // ourdomhash32 (special)
0, // ourdomhash32 (special)
&m_myPageLinkInfoBuf
) )
// blocked
return (LinkInfo *)-1;
@ -12185,7 +12196,8 @@ LinkInfo *XmlDoc::getLinkInfo1 ( ) {
// at this point assume its valid
m_linkInfo1Valid = true;
// get the link info we got set
// . get the link info we got set
// . this ptr references into m_myPageLinkInfoBuf safebuf
ptr_linkInfo1 = m_msg25.m_linkInfo;
size_linkInfo1 = m_msg25.m_linkInfo->getSize();
// we should free it
@ -41359,7 +41371,8 @@ Msg25 *XmlDoc::getAllInlinks ( bool forSite ) {
false ,//onlyneedgoodinlinks?
false,//getlinkertitles?
0, // ourhosthash32 (special)
0)) // ourdomhash32 (special)
0, // ourdomhash32 (special)
&m_myTempLinkInfoBuf ) )
// blocked?
return (Msg25 *)-1;
}

View File

@ -1423,6 +1423,9 @@ class XmlDoc {
uint8_t m_siteNumInlinks8;
//long m_siteNumInlinks;
LinkInfo m_siteLinkInfo;
SafeBuf m_mySiteLinkInfoBuf;
SafeBuf m_myPageLinkInfoBuf;
SafeBuf m_myTempLinkInfoBuf;
char m_isInjecting;
char m_useFakeMime;
char m_useSiteLinkBuf;