mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 12:17:35 +03:00
fix mem leak of LinkInfo.
fixed json output from injecting url.
This commit is contained in:
parent
70c4ef682d
commit
74c2742ced
26
Linkdb.cpp
26
Linkdb.cpp
@ -415,8 +415,9 @@ void Msg25::reset() {
|
||||
m_numReplyPtrs = 0;
|
||||
// . free the linkinfo if we are responsible for it
|
||||
// . if someone "steals" it from us, they should set this to NULL
|
||||
if ( m_linkInfo )
|
||||
mfree ( m_linkInfo , m_linkInfo->getStoredSize(),"msg25s");
|
||||
//if ( m_linkInfo )
|
||||
// mfree ( m_linkInfo , m_linkInfo->getStoredSize(),"msg25s");
|
||||
// this now points into m_linkInfoBuf safebuf, just NULL it
|
||||
m_linkInfo = NULL;
|
||||
|
||||
m_table.reset();
|
||||
@ -468,7 +469,8 @@ bool Msg25::getLinkInfo ( char *site ,
|
||||
bool onlyNeedGoodInlinks ,
|
||||
bool getLinkerTitles ,
|
||||
long ourHostHash32 ,
|
||||
long ourDomHash32 ) {
|
||||
long ourDomHash32 ,
|
||||
SafeBuf *linkInfoBuf ) {
|
||||
|
||||
// reset the ip table
|
||||
reset();
|
||||
@ -483,6 +485,9 @@ bool Msg25::getLinkInfo ( char *site ,
|
||||
if ( ! coll ) { char *xx=NULL; *xx=0; }
|
||||
m_onlyNeedGoodInlinks = onlyNeedGoodInlinks;
|
||||
m_getLinkerTitles = getLinkerTitles;
|
||||
// save safebuf ptr, where we store the link info
|
||||
m_linkInfoBuf = linkInfoBuf;
|
||||
if ( ! linkInfoBuf ) { char *xx=NULL;*xx=0; }
|
||||
// sanity check
|
||||
if ( m_mode == MODE_PAGELINKINFO && ! docId ) {char *xx=NULL; *xx=0; }
|
||||
// must have a valid ip
|
||||
@ -1903,7 +1908,8 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) {
|
||||
m_lastUpdateTime ,
|
||||
m_onlyNeedGoodInlinks ,
|
||||
m_niceness ,
|
||||
this );
|
||||
this ,
|
||||
m_linkInfoBuf );
|
||||
// return true with g_errno set on error
|
||||
if ( ! m_linkInfo ) {
|
||||
log("build: msg25 linkinfo set: %s",mstrerror(g_errno));
|
||||
@ -3182,7 +3188,8 @@ LinkInfo *makeLinkInfo ( char *coll ,
|
||||
long lastUpdateTime ,
|
||||
bool onlyNeedGoodInlinks ,
|
||||
long niceness ,
|
||||
Msg25 *msg25 ) {
|
||||
Msg25 *msg25 ,
|
||||
SafeBuf *linkInfoBuf ) {
|
||||
|
||||
// for parsing the link text
|
||||
Words words;
|
||||
@ -3449,10 +3456,11 @@ LinkInfo *makeLinkInfo ( char *coll ,
|
||||
// we need space for our header
|
||||
need += sizeof(LinkInfo);
|
||||
// alloc the buffer
|
||||
char *buf = (char *)mmalloc ( need,"LinkInfo");
|
||||
if ( ! buf ) return NULL;
|
||||
//char *buf = (char *)mmalloc ( need,"LinkInfo");
|
||||
//if ( ! buf ) return NULL;
|
||||
if ( ! linkInfoBuf->reserve ( need , "LinkInfo" ) ) return NULL;
|
||||
// set ourselves to this new buffer
|
||||
LinkInfo *info = (LinkInfo *)buf;
|
||||
LinkInfo *info = (LinkInfo *)(linkInfoBuf->getBufStart());
|
||||
|
||||
// set our header
|
||||
info->m_version = 0;
|
||||
@ -3487,7 +3495,7 @@ LinkInfo *makeLinkInfo ( char *coll ,
|
||||
|
||||
// point to our buf
|
||||
char *p = info->m_buf;
|
||||
char *pend = buf + need;
|
||||
char *pend = linkInfoBuf->getBufStart() + need;
|
||||
// count the ones we store that are internal
|
||||
long icount3 = 0;
|
||||
// now set each inlink
|
||||
|
9
Linkdb.h
9
Linkdb.h
@ -341,7 +341,8 @@ class Msg25 {
|
||||
// to not perform this algo in handleRequest20()'s
|
||||
// call to XmlDoc::getMsg20Reply().
|
||||
long ourHostHash32 , // = 0 ,
|
||||
long ourDomHash32 ); // = 0 );
|
||||
long ourDomHash32 , // = 0 );
|
||||
SafeBuf *myLinkInfoBuf );
|
||||
Msg25();
|
||||
~Msg25();
|
||||
void reset();
|
||||
@ -370,6 +371,9 @@ class Msg25 {
|
||||
|
||||
class LinkInfo *getLinkInfo () { return m_linkInfo; };
|
||||
|
||||
// m_linkInfo ptr references into here. provided by caller.
|
||||
SafeBuf *m_linkInfoBuf;
|
||||
|
||||
// private:
|
||||
// these need to be public for wrappers to call:
|
||||
bool gotTermFreq ( bool msg42Called ) ;
|
||||
@ -886,7 +890,8 @@ LinkInfo *makeLinkInfo ( char *coll ,
|
||||
long lastUpdateTime ,
|
||||
bool onlyNeedGoodInlinks ,
|
||||
long niceness ,
|
||||
class Msg25 *msg25 ) ;
|
||||
class Msg25 *msg25 ,
|
||||
SafeBuf *linkInfoBuf ) ;
|
||||
|
||||
// . set from the Msg20 replies in MsgE
|
||||
// . Msg20 uses this to set the LinkInfo class to the "outlinks"
|
||||
|
@ -2119,7 +2119,7 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
|
||||
|
||||
if ( ! name ) {
|
||||
log("crawlbot: no crawl name given");
|
||||
char *msg = "invalid or missing \"name\"";
|
||||
char *msg = "invalid or missing name";
|
||||
return sendErrorReply2 (socket,fmt,msg);
|
||||
}
|
||||
|
||||
@ -2548,13 +2548,16 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
|
||||
if ( fmt == FMT_HTML )
|
||||
sb.safePrintf ( "</center><br/>" );
|
||||
|
||||
// the ROOT JSON {
|
||||
if ( fmt == FMT_JSON )
|
||||
sb.safePrintf("{\n");
|
||||
|
||||
if ( fmt == FMT_JSON && injectionResponse )
|
||||
sb.safePrintf("{\"seedResponse\":\"%s\"},\n\n"
|
||||
sb.safePrintf("\"seedResponse\":\"%s\",\n\n"
|
||||
, injectionResponse->getBufStart() );
|
||||
|
||||
if ( fmt == FMT_JSON && urlUploadResponse )
|
||||
sb.safePrintf("{\"addUrlsResponse\":\"%s\"},\n\n"
|
||||
sb.safePrintf("\"addUrlsResponse\":\"%s\",\n\n"
|
||||
, urlUploadResponse->getBufStart() );
|
||||
|
||||
|
||||
@ -2565,7 +2568,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
|
||||
//////
|
||||
|
||||
if ( fmt == FMT_JSON )
|
||||
sb.safePrintf("{\"crawls\":[");//\"collections\":");
|
||||
sb.safePrintf("\"crawls\":[");//\"collections\":");
|
||||
|
||||
long summary = hr->getLong("summary",0);
|
||||
// enter summary mode for json
|
||||
@ -2692,7 +2695,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
|
||||
|
||||
if ( fmt == FMT_JSON )
|
||||
// end the array of collection objects
|
||||
sb.safePrintf("\n]}\n");
|
||||
sb.safePrintf("\n]\n");
|
||||
|
||||
///////
|
||||
//
|
||||
@ -3363,6 +3366,9 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
|
||||
}
|
||||
|
||||
|
||||
// the ROOT JSON }
|
||||
if ( fmt == FMT_JSON )
|
||||
sb.safePrintf("}\n");
|
||||
|
||||
char *ct = "text/html";
|
||||
if ( fmt == FMT_JSON ) ct = "application/json";
|
||||
|
27
XmlDoc.cpp
27
XmlDoc.cpp
@ -180,6 +180,10 @@ static long long s_lastTimeStart = 0LL;
|
||||
|
||||
void XmlDoc::reset ( ) {
|
||||
|
||||
m_mySiteLinkInfoBuf.purge();
|
||||
m_myPageLinkInfoBuf.purge();
|
||||
m_myTempLinkInfoBuf.purge();
|
||||
|
||||
// reset count for nukeJSONObjects() function
|
||||
m_joc = 0;
|
||||
|
||||
@ -489,12 +493,14 @@ void XmlDoc::reset ( ) {
|
||||
if ( m_ahrefsDocValid ) nukeDoc ( m_ahrefsDoc );
|
||||
|
||||
if ( m_linkInfo1Valid && ptr_linkInfo1 && m_freeLinkInfo1 ) {
|
||||
mfree ( ptr_linkInfo1 , size_linkInfo1, "LinkInfo1");
|
||||
// it now points into m_myPageLinkInfoBuf !
|
||||
//mfree ( ptr_linkInfo1 , size_linkInfo1, "LinkInfo1");
|
||||
ptr_linkInfo1 = NULL;
|
||||
m_linkInfo1Valid = false;
|
||||
}
|
||||
if ( m_linkInfo2Valid && ptr_linkInfo2 && m_freeLinkInfo2 ) {
|
||||
mfree ( ptr_linkInfo2 , size_linkInfo2, "LinkInfo2");
|
||||
// should point into a safebuf as well
|
||||
//mfree ( ptr_linkInfo2 , size_linkInfo2, "LinkInfo2");
|
||||
ptr_linkInfo2 = NULL;
|
||||
m_linkInfo1Valid = false;
|
||||
}
|
||||
@ -11302,7 +11308,9 @@ LinkInfo *XmlDoc::getSiteLinkInfo() {
|
||||
|
||||
setStatus ( "getting site link info" );
|
||||
|
||||
if ( m_siteLinkInfoValid ) return m_msg25.m_linkInfo;
|
||||
if ( m_siteLinkInfoValid )
|
||||
//return msg25.m_linkInfo;
|
||||
return (LinkInfo *)m_mySiteLinkInfoBuf.getBufStart();
|
||||
char *mysite = getSite();
|
||||
if ( ! mysite || mysite == (void *)-1 ) return (LinkInfo *)mysite;
|
||||
long *fip = getFirstIp();
|
||||
@ -11366,7 +11374,9 @@ LinkInfo *XmlDoc::getSiteLinkInfo() {
|
||||
onlyNeedGoodInlinks ,
|
||||
false,
|
||||
0,
|
||||
0) )
|
||||
0,
|
||||
// it will store the linkinfo into this safebuf
|
||||
&m_mySiteLinkInfoBuf) )
|
||||
// return -1 if it blocked
|
||||
return (LinkInfo *)-1;
|
||||
// sanity check
|
||||
@ -12171,7 +12181,8 @@ LinkInfo *XmlDoc::getLinkInfo1 ( ) {
|
||||
onlyNeedGoodInlinks ,
|
||||
false, // getlinkertitles
|
||||
0, // ourhosthash32 (special)
|
||||
0 // ourdomhash32 (special)
|
||||
0, // ourdomhash32 (special)
|
||||
&m_myPageLinkInfoBuf
|
||||
) )
|
||||
// blocked
|
||||
return (LinkInfo *)-1;
|
||||
@ -12185,7 +12196,8 @@ LinkInfo *XmlDoc::getLinkInfo1 ( ) {
|
||||
|
||||
// at this point assume its valid
|
||||
m_linkInfo1Valid = true;
|
||||
// get the link info we got set
|
||||
// . get the link info we got set
|
||||
// . this ptr references into m_myPageLinkInfoBuf safebuf
|
||||
ptr_linkInfo1 = m_msg25.m_linkInfo;
|
||||
size_linkInfo1 = m_msg25.m_linkInfo->getSize();
|
||||
// we should free it
|
||||
@ -41359,7 +41371,8 @@ Msg25 *XmlDoc::getAllInlinks ( bool forSite ) {
|
||||
false ,//onlyneedgoodinlinks?
|
||||
false,//getlinkertitles?
|
||||
0, // ourhosthash32 (special)
|
||||
0)) // ourdomhash32 (special)
|
||||
0, // ourdomhash32 (special)
|
||||
&m_myTempLinkInfoBuf ) )
|
||||
// blocked?
|
||||
return (Msg25 *)-1;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user