mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 20:27:43 +03:00
limit posdb merging to 8 files max.
added some more url filters documentation.
This commit is contained in:
parent
78a4cfe6da
commit
65e75167e3
36
Parms.cpp
36
Parms.cpp
@ -702,6 +702,19 @@ bool Parms::sendPageGeneric ( TcpSocket *s , HttpRequest *r , long page ,
|
||||
"This is true if we have tried to spider "
|
||||
"this url, even if we got an error while trying."
|
||||
"</td></tr>"
|
||||
|
||||
"<tr><td>lastspidertime >= <b>{roundstart}</b></td>"
|
||||
"<td>"
|
||||
"This is true if the url's last spidered time "
|
||||
"indicates it was spidered already for this "
|
||||
"current round of spidering. When no more urls "
|
||||
"are available for spidering, then gigablast "
|
||||
"automatically sets {roundstart} to the current "
|
||||
"time so all the urls can be spidered again. This "
|
||||
"is how you do round-based spidering. "
|
||||
"You have to use the respider frequency as well "
|
||||
"to adjust how often you want things respidered."
|
||||
"</td></tr>"
|
||||
|
||||
|
||||
//"<tr><td>!newoutlink</td>"
|
||||
@ -725,7 +738,6 @@ bool Parms::sendPageGeneric ( TcpSocket *s , HttpRequest *r , long page ,
|
||||
"</td></tr>"
|
||||
|
||||
|
||||
|
||||
"<tr><td>isaddurl | !isaddurl</td>"
|
||||
"<td>"
|
||||
"This is true if the url was added from the add "
|
||||
@ -740,6 +752,28 @@ bool Parms::sendPageGeneric ( TcpSocket *s , HttpRequest *r , long page ,
|
||||
"/inject page or API."
|
||||
"</td></tr>"
|
||||
|
||||
"<tr><td>isdocidbased | !isdocidbased</td>"
|
||||
"<td>"
|
||||
"This is true if the url was added from the "
|
||||
"reindex interface. The request does not contain "
|
||||
"a url, but only a docid, that way we can add "
|
||||
"millions of search results very quickly without "
|
||||
"having to lookup each of their urls. You should "
|
||||
"definitely have this if you use the reindexing "
|
||||
"feature. You can temporarily disabled the "
|
||||
"spidering enabled checkbox for non "
|
||||
"docidbased requests while you reindex or delete "
|
||||
"the results of a query for extra speed."
|
||||
"</td></tr>"
|
||||
|
||||
"<tr><td>ismanualadd | !ismanualadd</td>"
|
||||
"<td>"
|
||||
"This is true if the url was added manually. "
|
||||
"Which means it matches isaddurl, isinjected, "
|
||||
" or isdocidbased. as opposed to only "
|
||||
"being discovered from the spider. "
|
||||
"</td></tr>"
|
||||
|
||||
"<tr><td><nobr>inpingserver | !inpingserver"
|
||||
"</nobr></td>"
|
||||
"<td>"
|
||||
|
@ -1672,6 +1672,9 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
// . files must be consecutive, however
|
||||
// . but ALWAYS make sure file i-1 is bigger than file i
|
||||
n = numFiles - minToMerge + 2 ;
|
||||
// limit for posdb since more than about 8 gets abnormally slow
|
||||
if ( m_rdb && m_rdb->m_rdbId == RDB_POSDB && n > 8 )
|
||||
n = 8;
|
||||
// titledb should always merge at least 50 files no matter what though
|
||||
// cuz i don't want it merging its huge root file and just one
|
||||
// other file... i've seen that happen... but don't know why it didn't
|
||||
|
Loading…
Reference in New Issue
Block a user