mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 04:07:13 +03:00
fix some mem leaks from allowing really big queries.
added a max query term control to search controls to limit users doing really big queries. but default it very high to 1M.
This commit is contained in:
parent
fc4b4db425
commit
c8cf0e5440
25
Msg39.cpp
25
Msg39.cpp
@ -34,6 +34,10 @@ Msg39::Msg39 () {
|
||||
reset();
|
||||
}
|
||||
|
||||
Msg39::~Msg39 () {
|
||||
reset();
|
||||
}
|
||||
|
||||
void Msg39::reset() {
|
||||
if ( m_inUse ) { char *xx=NULL;*xx=0; }
|
||||
m_allocedTree = false;
|
||||
@ -46,8 +50,14 @@ void Msg39::reset() {
|
||||
|
||||
void Msg39::reset2() {
|
||||
// reset lists
|
||||
for ( int32_t j = 0 ; j < m_msg2.m_numLists && m_lists ; j++ )
|
||||
m_lists[j].freeList();
|
||||
int32_t nqt = m_stackBuf.getLength() / sizeof(RdbList);
|
||||
//for ( int32_t j = 0 ; j < m_msg2.m_numLists && m_lists ; j++ ) {
|
||||
for ( int32_t j = 0 ; j < nqt && m_lists ; j++ ) {
|
||||
//m_lists[j].freeList();
|
||||
//log("msg39: destroy list @ 0x%"PTRFMT,(PTRTYPE)&m_lists[j]);
|
||||
// same thing but more generic
|
||||
m_lists[j].destructor();
|
||||
}
|
||||
m_stackBuf.purge();
|
||||
m_lists = NULL;
|
||||
m_msg2.reset();
|
||||
@ -207,7 +217,8 @@ void Msg39::getDocIds2 ( Msg39Request *req ) {
|
||||
if ( ! m_tmpq.set2 ( m_r->ptr_query ,
|
||||
m_r->m_language ,
|
||||
m_r->m_queryExpansion ,
|
||||
m_r->m_useQueryStopWords ) ) {
|
||||
m_r->m_useQueryStopWords ,
|
||||
m_r->m_maxQueryTerms ) ) {
|
||||
log("query: msg39: setQuery: %s." ,
|
||||
mstrerror(g_errno) );
|
||||
sendReply ( m_slot , this , NULL , 0 , 0 , true );
|
||||
@ -767,11 +778,15 @@ bool Msg39::getLists () {
|
||||
|
||||
|
||||
int32_t nqt = m_tmpq.getNumTerms();
|
||||
if ( ! m_stackBuf.reserve ( sizeof(RdbList) * nqt ) ) return true;
|
||||
int32_t need = sizeof(RdbList) * nqt ;
|
||||
m_stackBuf.setLabel("stkbuf2");
|
||||
if ( ! m_stackBuf.reserve ( need ) ) return true;
|
||||
m_lists = (IndexList *)m_stackBuf.getBufStart();
|
||||
for ( int32_t i = 0 ; i < nqt ; i++ )
|
||||
m_stackBuf.setLength ( need );
|
||||
for ( int32_t i = 0 ; i < nqt ; i++ ) {
|
||||
m_lists[i].constructor();
|
||||
//log("msg39: constructlist @ 0x%"PTRFMT,(PTRTYPE)&m_lists[i]);
|
||||
}
|
||||
|
||||
// call msg2
|
||||
if ( ! m_msg2.getLists ( rdbId ,
|
||||
|
1
Msg39.h
1
Msg39.h
@ -216,6 +216,7 @@ class Msg39 {
|
||||
public:
|
||||
|
||||
Msg39();
|
||||
~Msg39();
|
||||
void reset();
|
||||
void reset2();
|
||||
// register our request handler for Msg39's
|
||||
|
@ -666,7 +666,7 @@ bool Msg40::federatedLoop ( ) {
|
||||
mr.size_whiteList = slen;
|
||||
mr.m_timeout = -1; // auto-determine based on #terms
|
||||
// make sure query term counts match in msg39
|
||||
mr.m_maxQueryTerms = m_si->m_maxQueryTerms;
|
||||
//mr.m_maxQueryTerms = m_si->m_maxQueryTerms;
|
||||
mr.m_realMaxTop = m_si->m_realMaxTop;
|
||||
|
||||
mr.m_minSerpDocId = m_si->m_minSerpDocId;
|
||||
@ -699,6 +699,9 @@ bool Msg40::federatedLoop ( ) {
|
||||
//if ( numDocIdSplits < 5 ) numDocIdSplits = 5;
|
||||
//}
|
||||
|
||||
if ( cr ) mr.m_maxQueryTerms = cr->m_maxQueryTerms;
|
||||
else mr.m_maxQueryTerms = 100;
|
||||
|
||||
// special oom hack fix
|
||||
if ( cr && cr->m_isCustomCrawl && numDocIdSplits < 4 )
|
||||
numDocIdSplits = 4;
|
||||
|
22
Parms.cpp
22
Parms.cpp
@ -7879,17 +7879,19 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
//m->m_title = "max query terms";
|
||||
//m->m_desc = "Do not allow more than this many query terms. Will "
|
||||
// "return error in XML feed error tag if breeched.";
|
||||
//m->m_cgi = "mqt";
|
||||
//m->m_off = (char *)&cr.m_maxQueryTerms - x;
|
||||
m->m_title = "max query terms";
|
||||
m->m_desc = "Do not allow more than this many query terms. Helps "
|
||||
"prevent big queries from resource hogging.";
|
||||
m->m_cgi = "mqt";
|
||||
m->m_off = (char *)&cr.m_maxQueryTerms - x;
|
||||
//m->m_soff = (char *)&si.m_maxQueryTerms - y;
|
||||
//m->m_type = TYPE_LONG;
|
||||
//m->m_def = "20"; // 20 for testing, normally 16
|
||||
//m->m_sparm = 1;
|
||||
//m->m_spriv = 1;
|
||||
//m++;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "999999"; // now we got synonyms... etc
|
||||
m->m_group = 0;
|
||||
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
/*
|
||||
m->m_title = "dictionary site";
|
||||
|
20
Query.cpp
20
Query.cpp
@ -143,14 +143,16 @@ bool Query::set2 ( char *query ,
|
||||
// need language for doing synonyms
|
||||
uint8_t langId ,
|
||||
char queryExpansion ,
|
||||
bool useQueryStopWords ) {
|
||||
//int32_t maxQueryTerms ) {
|
||||
bool useQueryStopWords ,
|
||||
int32_t maxQueryTerms ) {
|
||||
|
||||
m_langId = langId;
|
||||
m_useQueryStopWords = useQueryStopWords;
|
||||
// fix summary rerank and highlighting.
|
||||
bool keepAllSingles = true;
|
||||
|
||||
m_maxQueryTerms = maxQueryTerms;
|
||||
|
||||
// assume boolean auto-detect.
|
||||
char boolFlag = 2;
|
||||
|
||||
@ -162,7 +164,7 @@ bool Query::set2 ( char *query ,
|
||||
if ( ! query ) return true;
|
||||
|
||||
// set to 256 for synonyms?
|
||||
m_maxQueryTerms = 256;
|
||||
//m_maxQueryTerms = 256;
|
||||
m_queryExpansion = queryExpansion;
|
||||
|
||||
int32_t queryLen = gbstrlen(query);
|
||||
@ -676,7 +678,7 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
|
||||
nqt += naids;
|
||||
}
|
||||
|
||||
|
||||
if ( nqt > m_maxQueryTerms ) nqt = m_maxQueryTerms;
|
||||
|
||||
// allocate the stack buf
|
||||
if ( nqt ) {
|
||||
@ -722,6 +724,11 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
|
||||
"limit of %"INT32"",(int32_t)ABS_MAX_QUERY_TERMS );
|
||||
break;
|
||||
}
|
||||
if ( n >= m_maxQueryTerms ) {
|
||||
log("query: lost query phrase terms to max term cr "
|
||||
"limit of %"INT32"",(int32_t)m_maxQueryTerms);
|
||||
break;
|
||||
}
|
||||
|
||||
QueryTerm *qt = &m_qterms[n];
|
||||
qt->m_qword = qw ;
|
||||
@ -880,6 +887,11 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
|
||||
"limit of %"INT32"",(int32_t)ABS_MAX_QUERY_TERMS );
|
||||
break;
|
||||
}
|
||||
if ( n >= m_maxQueryTerms ) {
|
||||
log("query: lost query terms to max term cr "
|
||||
"limit of %"INT32"",(int32_t)m_maxQueryTerms);
|
||||
break;
|
||||
}
|
||||
|
||||
QueryTerm *qt = &m_qterms[n];
|
||||
qt->m_qword = qw ;
|
||||
|
8
Query.h
8
Query.h
@ -635,10 +635,10 @@ class Query {
|
||||
//int32_t collLen ,
|
||||
uint8_t langId ,
|
||||
char queryExpansion ,
|
||||
bool useQueryStopWords = true );
|
||||
//char boolFlag = 2 , // auto-detect if boolean query
|
||||
//bool keepAllSingles = false ,
|
||||
//int32_t maxQueryTerms = 0x7fffffff );
|
||||
bool useQueryStopWords = true ,
|
||||
//char boolFlag = 2 , // auto-detect if boolean query
|
||||
//bool keepAllSingles = false ,
|
||||
int32_t maxQueryTerms = 0x7fffffff );
|
||||
|
||||
// serialize/deserialize ourselves so we don't have to pass the
|
||||
// unmodified string around and reparse it every time
|
||||
|
@ -624,7 +624,8 @@ bool RdbList::growList ( int32_t newSize ) {
|
||||
// don't shrink list
|
||||
if ( newSize <= m_allocSize ) return true;
|
||||
// debug msg
|
||||
//log("RdbList::growList from %"INT32" to %"INT32"",m_allocSize , newSize );
|
||||
// log("RdbList::growList 0x%"PTRFMT "from %"INT32" to %"INT32"",
|
||||
// (PTRTYPE)this,m_allocSize , newSize );
|
||||
// make a new buffer
|
||||
char *tmp =(char *) mrealloc ( m_alloc,m_allocSize,newSize,"RdbList");
|
||||
//if ( (int32_t)tmp == 0x904dbd0 )
|
||||
|
@ -13,6 +13,7 @@ Summary::Summary()
|
||||
m_bitScoresBuf = NULL;
|
||||
m_bitScoresBufSize = 0;
|
||||
m_wordWeights = NULL;
|
||||
m_buf4 = NULL;
|
||||
reset();
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user