limit posdb merging to 8 files max.

added some more url filters documentation.
This commit is contained in:
Matt Wells 2013-12-08 09:41:05 -07:00
parent 78a4cfe6da
commit 65e75167e3
2 changed files with 38 additions and 1 deletions

View File

@ -703,6 +703,19 @@ bool Parms::sendPageGeneric ( TcpSocket *s , HttpRequest *r , long page ,
"this url, even if we got an error while trying."
"</td></tr>"
"<tr><td>lastspidertime >= <b>{roundstart}</b></td>"
"<td>"
"This is true if the url's last spidered time "
"indicates it was spidered already for this "
"current round of spidering. When no more urls "
"are available for spidering, then gigablast "
"automatically sets {roundstart} to the current "
"time so all the urls can be spidered again. This "
"is how you do round-based spidering. "
"You have to use the respider frequency as well "
"to adjust how often you want things respidered."
"</td></tr>"
//"<tr><td>!newoutlink</td>"
//"<td>Matches if document is NOT a new outlink."
@ -725,7 +738,6 @@ bool Parms::sendPageGeneric ( TcpSocket *s , HttpRequest *r , long page ,
"</td></tr>"
"<tr><td>isaddurl | !isaddurl</td>"
"<td>"
"This is true if the url was added from the add "
@ -740,6 +752,28 @@ bool Parms::sendPageGeneric ( TcpSocket *s , HttpRequest *r , long page ,
"/inject page or API."
"</td></tr>"
"<tr><td>isdocidbased | !isdocidbased</td>"
"<td>"
"This is true if the url was added from the "
"reindex interface. The request does not contain "
"a url, but only a docid, that way we can add "
"millions of search results very quickly without "
"having to lookup each of their urls. You should "
"definitely have this if you use the reindexing "
"feature. You can temporarily disabled the "
"spidering enabled checkbox for non "
"docidbased requests while you reindex or delete "
"the results of a query for extra speed."
"</td></tr>"
"<tr><td>ismanualadd | !ismanualadd</td>"
"<td>"
"This is true if the url was added manually. "
"Which means it matches isaddurl, isinjected, "
" or isdocidbased. as opposed to only "
"being discovered from the spider. "
"</td></tr>"
"<tr><td><nobr>inpingserver | !inpingserver"
"</nobr></td>"
"<td>"

View File

@ -1672,6 +1672,9 @@ void RdbBase::gotTokenForMerge ( ) {
// . files must be consecutive, however
// . but ALWAYS make sure file i-1 is bigger than file i
n = numFiles - minToMerge + 2 ;
// limit for posdb since more than about 8 gets abnormally slow
if ( m_rdb && m_rdb->m_rdbId == RDB_POSDB && n > 8 )
n = 8;
// titledb should always merge at least 50 files no matter what though
// cuz i don't want it merging its huge root file and just one
// other file... i've seen that happen... but don't know why it didn't