Merge branch 'diffbot' into diffbot-testing

This commit is contained in:
Matt Wells 2014-03-17 17:27:28 -07:00
commit 6e23d37e47
35 changed files with 336 additions and 110 deletions

View File

@ -859,7 +859,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
&msecs);
sb.safePrintf("<tr><td colspan=18 bgcolor=#%s>"
"<center><b>Code Usage "
"(<a href=\"/master/"
"(<a href=\"/admin/"
"autoban?c=%s&resetcodes=1\">reset</a> "
"%li days %li hours %li "
"minutes %li sec ago)"
@ -1271,15 +1271,15 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
// "%li days %li hrs %li min ago"
// "</center></td>"
"<td><center><a href=\"/master/"
"<td><center><a href=\"/admin/"
"autoban?c=%s&allow=%s&showAllIps=%li\">"
"allow/</a>"
"<a href=\"/master/"
"<a href=\"/admin/"
"autoban?c=%s&deny=%s&showAllIps=%li\">"
"deny/</a>"
"<a href=\"/master/"
"<a href=\"/admin/"
"autoban?c=%s&clear=%s&showAllIps=%li\">"
"clear</a></center>"
"</td>",color,
@ -1320,22 +1320,22 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
"<td bgcolor=#%s><center><b>Show Ips by Number of Queries"
"</b></center></td>",
LIGHT_BLUE);
sb.safePrintf("<td><center><font color=red><b><a href=\"/master/"
sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/"
"autoban?c=%s&showAllIps=0\">"
"0 Queries</a></b>"
"</font></center></td>",
coll);
sb.safePrintf("<td><center><font color=red><b><a href=\"/master/"
sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/"
"autoban?c=%s&showAllIps=1\">"
"1 Query</a></b>"
"</font></center></td>",
coll);
sb.safePrintf("<td><center><font color=red><b><a href=\"/master/"
sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/"
"autoban?c=%s&showAllIps=10\">"
"10 Queries</a></b>"
"</font></center></td>",
coll);
sb.safePrintf("<td><center><font color=red><b><a href=\"/master/"
sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/"
"autoban?c=%s&showAllIps=100\">"
"100 Queries</a></b>"
"</font></center></td></tr>",
@ -1469,10 +1469,10 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
m_detectVals[i].m_timesBanned);
}
sb.safePrintf("<td><center>"
"<a href=\"/master/"
"<a href=\"/admin/"
"autoban?c=%s&allow=%s&showAllIps=%li\">"
"allow/</a>"
"<a href=\"/master/"
"<a href=\"/admin/"
"autoban?c=%s&deny=%s&showAllIps=%li\">"
"deny</a></center>"
"</td>",

View File

@ -468,6 +468,9 @@ bool BigFile::readwrite ( void *buf ,
fstate->m_callback = callback;
fstate->m_niceness = niceness;
fstate->m_flags = m_flags;
// sanity
if ( fstate->m_bytesToGo > 150000000 )
log("file: huge read of %lli bytes",(long long)size);
// . set our fd's before entering the thread in case RdbMerge
// calls our unlinkPart()
// . it's thread-UNsafe to call getfd() from within the thread
@ -563,10 +566,12 @@ bool BigFile::readwrite ( void *buf ,
// request originated through Multicast, then multicast will sleep
// and retry. Msg3 could retry, the multicast thing should be more
// for running out of udp slots though...
if ( g_errno && ! doWrite && g_errno != ENOTHREADSLOTS ) {
log (LOG_INFO,"disk: May retry later.");
return true;
}
// crap, call to clone() now fails a lot since we use pthreads
// library ... so assume that is it i guess (MDW 3/15/2014)
//if ( g_errno && ! doWrite && g_errno != ENOTHREADSLOTS ) {
// log (LOG_INFO,"disk: May retry later.");
// return true;
//}
// otherwise, thread spawn failed, do it blocking then
g_errno = 0;
// if threads are manually disabled don't print these msgs because
@ -577,7 +582,8 @@ bool BigFile::readwrite ( void *buf ,
if ( now - s_lastTime >= 1 ) {
s_lastTime = now;
log (LOG_INFO,
"disk: Doing blocking disk access. This will hurt "
"disk: Doing blocking disk access. "
"This will hurt "
"performance. isWrite=%li.",(long)doWrite);
}
}

View File

@ -395,6 +395,8 @@ bool Clusterdb::verify ( char *coll ) {
for ( list.resetListPtr() ; ! list.isExhausted() ;
list.skipCurrentRecord() ) {
key_t k = list.getCurrentKey();
// skip negative keys
if ( (k.n0 & 0x01) == 0x00 ) continue;
count++;
//unsigned long groupId = getGroupId ( RDB_CLUSTERDB , &k );
//if ( groupId == g_hostdb.m_groupId ) got++;

View File

@ -108,7 +108,10 @@ bool DiskPageCache::init ( const char *dbname ,
// void (*rmVfd2)(DiskPageCache*, long) ) {
reset();
// fix cores while rebalancing
// seems like we lose data when it prints "Caught add breach"
// so let's stop using until we fix that... happens while we are
// dumping i think and somehow the data seems to get lost that
// we were dumping.
//maxMem = 0;
m_rdbId = rdbId;

View File

@ -1903,7 +1903,7 @@ long getMsgSize ( char *buf, long bufSize, TcpSocket *s ) {
max = 0x7fffffff; // maxOtherDocLen not available
// if post is a /cgi/12.cgi (tagdb) allow 10 megs
//if ( pp + 11 < ppend && strncmp ( pp ,"/cgi/12.cgi",11)==0)
if ( pp + 11 < ppend && strncmp ( pp ,"/master/tagdb",13)==0)
if ( pp + 12 < ppend && strncmp ( pp ,"/admin/tagdb",12)==0)
max = 10*1024*1024;
if ( pp + 4 < ppend && strncmp ( pp ,"/vec",4)==0)
max = 0x7fffffff;

View File

@ -233,6 +233,8 @@ bool Linkdb::verify ( char *coll ) {
list.skipCurrentRecord() ) {
key224_t k;
list.getCurrentKey((char*)&k);
// skip negative keys
if ( (k.n0 & 0x01) == 0x00 ) continue;
count++;
//uint32_t shardNum = getShardNum ( RDB_LINKDB , &k );
//if ( groupId == g_hostdb.m_groupId ) got++;
@ -2588,7 +2590,7 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) {
for ( long j = 0 ; j < MAX_ENTRY_DOCIDS ; j++ ) {
if ( e->m_docIds[j] == -1LL ) break;
if ( ! m_printInXml )
m_pbuf->safePrintf ("<a href=\"/master/titledb"
m_pbuf->safePrintf ("<a href=\"/admin/titledb"
"?c=%s&d=%lli\">"
"%li</a> ",
coll,e->m_docIds[j],j);
@ -4608,7 +4610,7 @@ bool LinkInfo::print ( SafeBuf *sb , char *coll ) {
"<tr><td colspan=2>link #%04li "
"("
//"baseScore=%010li, "
"d=<a href=\"/master/titledb?c=%s&"
"d=<a href=\"/admin/titledb?c=%s&"
"d=%lli\">%016lli</a>, "
"siterank=%li, "
"hopcount=%03li "

View File

@ -2156,7 +2156,9 @@ bool Msg40::gotSummary ( ) {
m_docsToGet , m_msg3aRecallCnt);
// if we do not have enough visible, try to get more
if ( visible < m_docsToGetVisible && m_msg3a.m_moreDocIdsAvail ) {
if ( visible < m_docsToGetVisible && m_msg3a.m_moreDocIdsAvail &&
// doesn't work on multi-coll just yet, it cores
m_numCollsToSearch == 1 ) {
// can it cover us?
long need = m_msg3a.m_docsToGet + 20;
// note it

View File

@ -722,6 +722,11 @@ bool Msg5::readList ( ) {
}
}
// limit to 20MB so we don't go OOM!
if ( m_newMinRecSizes > 2 * m_minRecSizes &&
m_newMinRecSizes > 20000000 )
m_newMinRecSizes = 20000000;
QUICKPOLL((m_niceness));
// debug msg
@ -849,6 +854,9 @@ bool Msg5::needsRecall ( ) {
// seems to be very common for doledb, so don't log unless extreme
//if ( m_rdbId == RDB_DOLEDB && m_round < 15 ) logIt = false;
if ( m_round > 100 && (m_round % 1000) != 0 ) logIt = false;
// seems very common when doing rebalancing then merging to have
// to do at least one round of re-reading, so note that
if ( m_round == 0 ) logIt = false;
if ( logIt )
logf(LOG_DEBUG,"db: Reading %li again from %s (need %li total "
"got %li) this=0x%lx round=%li.",

View File

@ -181,14 +181,14 @@ bool sendReply ( void *state ) {
// print the generate Catdb link
sb.safePrintf ( "<tr class=poo><td>Update Catdb from DMOZ data.</td>"
"<td><center>"
"<a href=\"/master/catdb?c=%s&gencatdb=2\">"
"<a href=\"/admin/catdb?c=%s&gencatdb=2\">"
"Update Catdb</a> "
"</center></td></tr>",
st->m_coll );
sb.safePrintf ( "<tr class=poo>"
"<td>Generate New Catdb from DMOZ data.</td>"
"<td><center>"
"<a href=\"/master/catdb?c=%s&gencatdb=1\">"
"<a href=\"/admin/catdb?c=%s&gencatdb=1\">"
"Generate Catdb</a> "
"</center></td></tr>",
st->m_coll );

View File

@ -7610,8 +7610,8 @@ bool printAdminLinks ( SafeBuf &sb , State7 *st ) {
// get the filename directly
sb.safePrintf (" &nbsp; "
"<font color=red><b>"
//"<a href=\"/master/tagdb?f=%li&c=%s&u=%s\">"
"<a href=\"/master/tagdb?"
//"<a href=\"/admin/tagdb?f=%li&c=%s&u=%s\">"
"<a href=\"/admin/tagdb?"
//"tagid0=%li&"
"tagtype0=manualban&"
"tagdata0=1&"
@ -7631,7 +7631,7 @@ bool printAdminLinks ( SafeBuf &sb , State7 *st ) {
//long bannedTagId = getTagTypeFromStr("manualban",9);
sb.safePrintf (" &nbsp; "
"<font color=red><b>"
"<a href=\"/master/tagdb?"
"<a href=\"/admin/tagdb?"
//"tagid0=%li&"
"tagtype0=manualban&"
"tagdata0=1&"
@ -7876,7 +7876,7 @@ void printAdminEventOptions ( SafeBuf* sb,
sb->safePrintf("Ban By Domain: ");
//long bannedTagId = getTagTypeFromStr("manualban",9);
sb->safePrintf("<a href=\"/master/tagdb?"
sb->safePrintf("<a href=\"/admin/tagdb?"
"tagtype0=manualban&"
"tagdata0=1&"
"u=%s&c=%s\">"
@ -8561,13 +8561,13 @@ static bool printResult ( CollectionRec *cr,
// . if it's local, don't put the hostname/port in
// there cuz it will mess up Global Spec's machine
//if ( h->m_groupId == g_hostdb.m_groupId )
sb.safePrintf(" - <a href=\"/master/titledb?c=%s&"
sb.safePrintf(" - <a href=\"/admin/titledb?c=%s&"
"d=%lli",coll,mr->m_docId);
// then the [info] link to show the TitleRec
sb.safePrintf ( "\">[info]</a>" );
// now the analyze link
sb.safePrintf (" - <a href=\"/master/parser?c=%s&"
sb.safePrintf (" - <a href=\"/admin/parser?c=%s&"
"old=1&hc=%li&u=",
coll,
(long)mr->m_hopcount);
@ -8629,7 +8629,7 @@ static bool printResult ( CollectionRec *cr,
dbuf ,
coll , dbuf );
sb.safePrintf(" - "
" <a href=\"/master/tagdb?"
" <a href=\"/admin/tagdb?"
"tagtype0=manualban&"
"tagdata0=1&"
"u=%s&c=%s\">"
@ -8641,7 +8641,7 @@ static bool printResult ( CollectionRec *cr,
memcpy ( dbuf , uu.getHost() , dlen );
dbuf [ dlen ] = '\0';
sb.safePrintf(" - "
" <a href=\"/master/tagdb?"
" <a href=\"/admin/tagdb?"
"tagtype0=manualban&"
"tagdata0=1&"
"u=%s&c=%s\">"
@ -17616,7 +17616,7 @@ bool gotCaptchaReply ( State9 *st9 , TcpSocket *s ) {
if ( st9->m_isAdmin && 1 == 2) {
SafeBuf ttt;
ttt.safePrintf("<br>"
"<a href=/master/parser?"
"<a href=/admin/parser?"
//"user=mwells&pwd=mwell62&"
"c=%s&u=%s&content=",
st9->m_coll,

View File

@ -131,7 +131,7 @@ skipReplaceHost:
if ( g_conf.m_useShotgun ) {
colspan = "31";
//shotcol = "<td><b>ip2</b></td>";
sprintf ( shotcol, "<td><a href=\"/master/hosts?c=%s"
sprintf ( shotcol, "<td><a href=\"/admin/hosts?c=%s"
"&sort=2\">"
"<b>ping2</b></td></a>",
coll);
@ -143,12 +143,12 @@ skipReplaceHost:
"<tr><td colspan=%s><center>"
//"<font size=+1>"
"<b>Hosts "
"(<a href=\"/master/hosts?c=%s&sort=%li&reset=1\">"
"(<a href=\"/admin/hosts?c=%s&sort=%li&reset=1\">"
"reset)</b>"
//"</font>"
"</td></tr>"
"<tr bgcolor=#%s>"
"<td><a href=\"/master/hosts?c=%s&sort=0\">"
"<td><a href=\"/admin/hosts?c=%s&sort=0\">"
"<b>hostId</b></td>"
"<td><b>host ip</b></td>"
@ -188,52 +188,52 @@ skipReplaceHost:
//"<td><b>resends sent</td>"
//"<td><b>errors recvd</td>"
//"<td><b>ETRYAGAINS recvd</td>"
"<td><a href=\"/master/hosts?c=%s&sort=3\">"
"<td><a href=\"/admin/hosts?c=%s&sort=3\">"
"<b>dgrams resent</a></td>"
"<td><a href=\"/master/hosts?c=%s&sort=4\">"
"<td><a href=\"/admin/hosts?c=%s&sort=4\">"
"<b>errors recvd</a></td>"
"<td><a href=\"/master/hosts?c=%s&sort=5\">"
"<td><a href=\"/admin/hosts?c=%s&sort=5\">"
"<b>ETRY AGAINS recvd</a></td>"
"<td><a href=\"/master/hosts?c=%s&sort=6\">"
"<td><a href=\"/admin/hosts?c=%s&sort=6\">"
"<b>dgrams to</a></td>"
"<td><a href=\"/master/hosts?c=%s&sort=7\">"
"<td><a href=\"/admin/hosts?c=%s&sort=7\">"
"<b>dgrams from</a></td>"
//"<td><a href=\"/master/hosts?c=%s&sort=8\">"
//"<td><a href=\"/admin/hosts?c=%s&sort=8\">"
//"<b>loadavg</a></td>"
"<td><a href=\"/master/hosts?c=%s&sort=13\">"
"<td><a href=\"/admin/hosts?c=%s&sort=13\">"
"<b>avg split time</a></td>"
"<td><b>splits done</a></td>"
"<td><a href=\"/master/hosts?c=%s&sort=12\">"
"<td><a href=\"/admin/hosts?c=%s&sort=12\">"
"<b>status</a></td>"
"<td><a href=\"/master/hosts?c=%s&sort=15\">"
"<td><a href=\"/admin/hosts?c=%s&sort=15\">"
"<b>slow reads</a></td>"
"<td><b>docs indexed</a></td>"
"<td><a href=\"/master/hosts?c=%s&sort=9\">"
"<td><a href=\"/admin/hosts?c=%s&sort=9\">"
"<b>mem used</a></td>"
"<td><a href=\"/master/hosts?c=%s&sort=10\">"
"<td><a href=\"/admin/hosts?c=%s&sort=10\">"
"<b>cpu</a></td>"
"<td><a href=\"/master/hosts?c=%s&sort=17\">"
"<td><a href=\"/admin/hosts?c=%s&sort=17\">"
"<b>disk</a></td>"
"<td><a href=\"/master/hosts?c=%s&sort=14\">"
"<td><a href=\"/admin/hosts?c=%s&sort=14\">"
"<b>max ping1</a></td>"
"<td><a href=\"/master/hosts?c=%s&sort=11\">"
"<td><a href=\"/admin/hosts?c=%s&sort=11\">"
"<b>ping1 age</a></td>"
//"<td><b>ip1</td>"
"<td><a href=\"/master/hosts?c=%s&sort=1\">"
"<td><a href=\"/admin/hosts?c=%s&sort=1\">"
"<b>ping1</a></td>"
"%s"// "<td><b>ip2</td>"
@ -452,7 +452,7 @@ skipReplaceHost:
// print it
sb.safePrintf (
"<tr bgcolor=#%s>"
"<td><a href=\"http://%s:%hi/master/hosts?"
"<td><a href=\"http://%s:%hi/admin/hosts?"
""
"c=%s"
"&sort=%li\">%li</a></td>"
@ -711,7 +711,7 @@ skipReplaceHost:
sb.safePrintf (
"<tr bgcolor=#%s>"
"<td><a href=\"http://%s:%hi/master/hosts?"
"<td><a href=\"http://%s:%hi/admin/hosts?"
""
"c=%s\">"
"%li</a></td>"

View File

@ -535,8 +535,8 @@ bool gotIndexList2 ( void *state , RdbList *list ) {
"<tr><td>%li.</td>"
"<td>%s%i</td>"
"<td>"
//"<a href=http://%s:%hu/master/titledb?d=%llu>"
"<a href=/master/titledb?c=%s&d=%llu>"
//"<a href=http://%s:%hu/admin/titledb?d=%llu>"
"<a href=/admin/titledb?c=%s&d=%llu>"
"%llu"
//"<td><a href=/cgi/4.cgi?d=%llu>%llu"
"</td>"
@ -602,8 +602,8 @@ bool gotIndexList2 ( void *state , RdbList *list ) {
"<td>%llu</td>"
"<td>%lu</td><td>%i</td>"
"<td>"
//"<a href=http://%s:%hu/master/titledb?d=%llu>"
"<a href=/master/titledb?c=%s&d=%llu>"
//"<a href=http://%s:%hu/admin/titledb?d=%llu>"
"<a href=/admin/titledb?c=%s&d=%llu>"
"%llu"
//"<td><a href=/cgi/4.cgi?d=%llu>%llu"
"</td></tr>\n" ,

View File

@ -1451,8 +1451,8 @@ bool sendPageOverview ( TcpSocket *s , HttpRequest *r ) {
"You can specify different indexing and spider parameters on a per URL basis by one or more of the following methods:\n"
"<br><br>\n"
"<ul>\n"
"<li>Using the <a href=\"/master/tagdb\">tagdb interface</a>, you can assign a <a href=#ruleset>ruleset</a> to a set of sites. All you do is provide Gigablast with a list of sites and the ruleset to use for those sites.\n"
"You can enter the sites via the <a href=\"/master/tagdb\">HTML form</a> or you can provide Gigablast with a file of the sites. Each file must be limited to 1 Megabyte, but you can add hundreds of millions of sites. \n"
"<li>Using the <a href=\"/admin/tagdb\">tagdb interface</a>, you can assign a <a href=#ruleset>ruleset</a> to a set of sites. All you do is provide Gigablast with a list of sites and the ruleset to use for those sites.\n"
"You can enter the sites via the <a href=\"/admin/tagdb\">HTML form</a> or you can provide Gigablast with a file of the sites. Each file must be limited to 1 Megabyte, but you can add hundreds of millions of sites. \n"
"Sites can be full URLs, hostnames, domain names or IP addresses.\n"
"If you add a site which is just a canonical domain name with no explicit host name, like gigablast.com, then any URL with the same domain name, regardless of its host name will match that site. That is, \"hostname.gigablast.com\" will match the site \"gigablast.com\" and therefore be assigned the associated ruleset.\n"
"Sites may also use IP addresses instead of domain names. If the least significant byte of an IP address that you submit to tagdb is 0 then any URL with the same top 3 IP bytes as that IP will be considered a match.\n"
@ -1917,7 +1917,7 @@ bool sendPageOverview ( TcpSocket *s , HttpRequest *r ) {
"<br>\n"
"After the base score is computed, it is multiplied by the number of occurences of the word or phrase in the portion of the document being indexed as specified by the index rule. This score may then be reduced if spam detection occurred and the word or phrase was deemed repetitious. Spam detection is triggered when the quality of the document is at or below the value specified in the &lt;minQualityForSpamDetect&gt; tag in the index rule. Finally, the score is mapped into an 8 bit value, from 1 to 255, and stored in the index."
"<br><br>\n"
"To see the scoring algorithm in action you can use the <b><a href=\"/master/parser\">Parser Tool</a></b>. It will show each indexed word and phrase and its associated score, as well as some attributes associated with the indexed document."
"To see the scoring algorithm in action you can use the <b><a href=\"/admin/parser\">Parser Tool</a></b>. It will show each indexed word and phrase and its associated score, as well as some attributes associated with the indexed document."
""
"<br>\n"
"<br>\n"

View File

@ -537,7 +537,7 @@ void printUdpTable ( SafeBuf *p, char *title, UdpServer *server ,
long dlen;
char *dbuf = ::getDomFast ( hostname,&dlen,false);
p->safePrintf(
" <a href=\"/master/tagdb?"
" <a href=\"/admin/tagdb?"
"user=admin&"
"tagtype0=manualban&"
"tagdata0=1&"

View File

@ -620,7 +620,7 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
"<td colspan=50>"
"<center><b>Spider Compression Proxy Stats</b> "
" &nbsp; [<a href=\"/master/stats?reset=2\">"
" &nbsp; [<a href=\"/admin/stats?reset=2\">"
"reset</a>]</td></tr>\n"
"<tr class=poo>"
@ -828,7 +828,7 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
"<td colspan=50>"
"<center><b>Message Stats</b> "
" &nbsp; [<a href=\"/master/stats?reset=1\">"
" &nbsp; [<a href=\"/admin/stats?reset=1\">"
"reset</a>]</td></tr>\n"
"<tr class=poo>"

View File

@ -284,8 +284,8 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
"</font>"
"</td>"
"<td width=12%% bgcolor=#0000ff>"
"<center><b><a href=\"/master/thesaurus?rebuild=1&%s\">"
"rebuild all data</a> <a href=\"/master/thesaurus?"
"<center><b><a href=\"/admin/thesaurus?rebuild=1&%s\">"
"rebuild all data</a> <a href=\"/admin/thesaurus?"
"rebuild=1&full=1&%s\">(full)</a></b></center>"
"</td>"
"</tr>\n", getBuf, getBuf);
@ -300,7 +300,7 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
"</font>"
"</td>"
"<td width=12%% bgcolor=#0000ff>"
"<center><b><a href=\"/master/thesaurus?distribute=1&%s\">"
"<center><b><a href=\"/admin/thesaurus?distribute=1&%s\">"
"distribute data</a></b></center>"
"</td>"
"</tr>\n", getBuf);
@ -314,7 +314,7 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
"</td>"
"<td width=12%% bgcolor=#0000ff>"
"<center><b>"
"<a href=\"/master/thesaurus?reload=1&cast=0&%s\">"
"<a href=\"/admin/thesaurus?reload=1&cast=0&%s\">"
"reload data</a></b></center>"
"</td>"
"</tr>\n", getBuf);
@ -328,7 +328,7 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
"</td>"
"<td width=12%% bgcolor=#0000ff>"
"<center><b>"
"<a href=\"/master/thesaurus?reload=1&cast=1&%s\">"
"<a href=\"/admin/thesaurus?reload=1&cast=1&%s\">"
"reload data (all hosts)</a></b></center>"
"</td>"
"</tr>\n", getBuf);
@ -342,7 +342,7 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
"</font>"
"</td>"
"<td width=12%%>"
"<form action=\"/master/thesaurus>\">"
"<form action=\"/admin/thesaurus>\">"
"<input type=text name=synonym size=20>"
"<input type=submit value=Submit>"
"%s"
@ -365,7 +365,7 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
"</font>"
"</td>"
"<td width=12%% bgcolor=#0000ff>"
"<center><b><a href=\"/master/thesaurus?cancel=1&%s\">"
"<center><b><a href=\"/admin/thesaurus?cancel=1&%s\">"
"cancel running rebuild</a></b></center>"
"</td>"
"</tr>\n", getBuf);
@ -380,8 +380,8 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
"</font>"
"</td>"
"<td width=12%% bgcolor=#0000ff>"
"<center><b><a href=\"/master/thesaurus?rebuildaff=1&%s\">"
"rebuild affinity</a> <a href=\"/master/thesaurus?"
"<center><b><a href=\"/admin/thesaurus?rebuildaff=1&%s\">"
"rebuild affinity</a> <a href=\"/admin/thesaurus?"
"rebuildaff=1&full=1&%s\">(full)</a></b></center>"
"</td>"
"</tr>\n", getBuf, getBuf);
@ -405,7 +405,7 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
"character, optionally followed by another pipe and a type "
"designation; any badly formatted lines will be silently "
"ignored</font><br>\n"
"<form action=\"/master/thesaurus\" method=post>"
"<form action=\"/admin/thesaurus\" method=post>"
"<textarea name=\"manualadd\" rows=20 cols=80>");
if (manualAdd && manualAddLen) {
@ -434,7 +434,7 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
"that these pairs will only work if the thesaurus otherwise "
"has an entry for them, so add them to the manual add file "
"above if need be</font><br>\n"
"<form action=\"/master/thesaurus\" method=post>"
"<form action=\"/admin/thesaurus\" method=post>"
"<textarea name=\"affinityadd\" rows=20 cols=80>");
if (affinityAdd && affinityAddLen) {

View File

@ -16794,14 +16794,15 @@ bool Parms::convertHttpRequestToParmList (HttpRequest *hr, SafeBuf *parmList,
if ( strncmp(path,"/crawlbot",9) == 0 ) customCrawl = 1;
if ( strncmp(path,"/v2/crawl",9) == 0 ) customCrawl = 1;
if ( strncmp(path,"/v2/bulk" ,8) == 0 ) customCrawl = 2;
if (cr) {
// throw error if collection record custom crawl type doesn't equal the crawl type of current request
if (customCrawl != cr->m_isCustomCrawl) {
g_errno = ECUSTOMCRAWLMISMATCH;
return false;
// throw error if collection record custom crawl type doesn't equal
// the crawl type of current request
if (cr && customCrawl && customCrawl != cr->m_isCustomCrawl ) {
g_errno = ECUSTOMCRAWLMISMATCH;
return false;
}
}
bool hasAddCrawl = hr->hasField("addCrawl");
bool hasAddCrawl = hr->hasField("addCrawl");
bool hasAddBulk = hr->hasField("addBulk");
bool hasAddColl = hr->hasField("addColl");
// sometimes they try to delete a collection that is not there so do

View File

@ -289,6 +289,8 @@ bool Posdb::verify ( char *coll ) {
list.skipCurrentRecord() ) {
key144_t k;
list.getCurrentKey(&k);
// skip negative keys
if ( (k.n0 & 0x01) == 0x00 ) continue;
count++;
//unsigned long groupId = k.n1 & g_hostdb.m_groupMask;
//unsigned long groupId = getGroupId ( RDB_POSDB , &k );

View File

@ -403,6 +403,7 @@ Process::Process ( ) {
bool Process::init ( ) {
// -1 means unknown
m_diskUsage = -1.0;
m_diskAvail = -1LL;
// we do not know if the fans are turned off or on
m_currentFanState = -1;
m_threadOut = false;
@ -877,12 +878,14 @@ void hdtempDoneWrapper ( void *state , ThreadEntry *t ) {
// set Process::m_diskUsage
float getDiskUsage ( ) {
float getDiskUsage ( long long *diskAvail ) {
// first get disk usage now
char cmd[10048];
char *out = "/tmp/diskusage";
snprintf(cmd,10000,"df -ka %s | tail -1 | awk '{print $5}' > %s",
char out[1024];
sprintf(out,"%sdiskusage",g_hostdb.m_dir);
snprintf(cmd,10000,"df -ka %s | tail -1 | "
"awk '{print $4\" \"$5}' > %s",
g_hostdb.m_dir,
out);
int err = system ( cmd );
@ -897,7 +900,7 @@ float getDiskUsage ( ) {
}
// read in temperatures from file
int fd = open ( "/tmp/diskusage" , O_RDONLY );
int fd = open ( out , O_RDONLY );
if ( fd < 0 ) {
//m_errno = errno;
log("build: Could not open %s for reading: %s.",
@ -917,17 +920,19 @@ float getDiskUsage ( ) {
close ( fd );
float usage;
sscanf(buf,"%f",&usage);
long long avail;
sscanf(buf,"%lli %f",&avail,&usage);
// it is in KB so make it into bytes
if ( diskAvail ) *diskAvail = avail * 1000LL;
return usage;
}
// . sets m_errno on error
// . taken from Msg16.cpp
void *hdtempStartWrapper_r ( void *state , ThreadEntry *t ) {
// run the df -ka cmd
g_process.m_diskUsage = getDiskUsage();
g_process.m_diskUsage = getDiskUsage( &g_process.m_diskAvail );
// ignore temps now. ssds don't have it

View File

@ -93,6 +93,7 @@ class Process {
long m_currentFanState;
long m_desiredFanState;
float m_diskUsage;
long long m_diskAvail;
};
extern Process g_process;

View File

@ -933,13 +933,16 @@ bool RdbBase::incorporateMerge ( ) {
long b = m_mergeStartFileNum + m_numFilesToMerge;
// shouldn't be called if no files merged
if ( a == b ) {
// unless resuming after a merge completed and we exited
// but forgot to finish renaming the final file!!!!
log("merge: renaming final file");
// decrement this count
if ( m_isMerging ) m_rdb->m_numMergesOut--;
// exit merge mode
m_isMerging = false;
// return the merge token, no need for a callback
g_msg35.releaseToken ( );
return true;
//return true;
}
// file #x is the merge file
long x = a - 1;
@ -1033,6 +1036,9 @@ bool RdbBase::incorporateMerge ( ) {
// on success unlink the files we merged and free them
for ( long i = a ; i < b ; i++ ) {
// incase we are starting with just the
// linkdb0001.003.dat file and not the stuff we merged
if ( ! m_files[i] ) continue;
// debug msg
log(LOG_INFO,"merge: Unlinking merged file %s (#%li).",
m_files[i]->getFilename(),i);
@ -1413,6 +1419,15 @@ void RdbBase::attemptMerge ( long niceness, bool forceMergeAll, bool doLog ,
m_minToMerge = 4;
if ( cr && m_rdb == g_tagdb.getRdb() )
m_minToMerge = 2;//cr->m_tagdbMinFilesToMerge;
// if we are reblancing this coll then keep merges tight so all
// the negative recs annihilate with the positive recs to free
// up disk space since we could be short on disk space.
//if ( g_rebalance.m_isScanning &&
// // if might have moved on if not able to merge because
// // another was merging... so do this anyway...
// g_rebalance.m_collnum == m_collnum )
// m_minToMerge = 2;
// secondary rdbs are used for rebuilding, so keep their limits high
@ -1467,6 +1482,13 @@ void RdbBase::attemptMerge ( long niceness, bool forceMergeAll, bool doLog ,
m_dbname);
g_numUrgentMerges++;
}
// tfndb has his own merge class since titledb merges write tfndb recs
RdbMerge *m = &g_merge;
if ( m->isMerging() )
return;
// if we are tfndb and someone else is merging, do not merge unless
// we have 3 or more files
long minToMerge = m_minToMerge;
@ -1486,6 +1508,31 @@ void RdbBase::attemptMerge ( long niceness, bool forceMergeAll, bool doLog ,
resuming = true;
break;
}
// what percent of recs in the collections' rdb are negative?
// the rdbmaps hold this info
float percentNegativeRecs = getPercentNegativeRecsOnDisk ( );
// 1. if disk space is tight and >20% negative recs, force it
if ( g_process.m_diskAvail >= 0 &&
g_process.m_diskAvail < 10000000000LL && // 10GB
percentNegativeRecs > .20 ) {
m_nextMergeForced = true;
forceMergeAll = true;
log("rdb: hit negative rec concentration of %.01f for "
"collnum %li on db %s when diskAvail=%lli bytes",
percentNegativeRecs,(long)m_collnum,m_rdb->m_dbname,
g_process.m_diskAvail);
}
// 2. if >40% negative recs force it
if ( percentNegativeRecs > .40 ) {
m_nextMergeForced = true;
forceMergeAll = true;
log("rdb: hit negative rec concentration of %.01f for "
"collnum %li on db %s",
percentNegativeRecs,(long)m_collnum,m_rdb->m_dbname);
}
// . don't merge if we don't have the min # of files
// . but skip this check if there is a merge to be resumed from b4
if ( ! resuming && ! forceMergeAll && numFiles < minToMerge ) return;
@ -1719,11 +1766,49 @@ void RdbBase::gotTokenForMerge ( ) {
"original %li files.",mm,n);
// how many files to merge?
n = mm;
// allow a single file to continue merging if the other
// file got merged out already
if ( mm > 0 ) overide = true;
// if we've already merged and already unlinked, then the
// process exited, now we restart with just the final
// merge final and we need to do the rename
if ( mm == 0 ) {
m_isMerging = false;
// make a fake file before us that we were merging
// since it got nuked on disk
//incorporateMerge();
char fbuf[256];
sprintf(fbuf,"%s%04li.dat",m_dbname,mergeFileId-1);
if ( m_isTitledb )
sprintf(fbuf,"%s%04li-%03li.dat",
m_dbname,mergeFileId-1,id2);
log("merge: renaming final merged file %s",fbuf);
m_files[j]->rename(fbuf);
sprintf(fbuf,"%s%04li.map",m_dbname,mergeFileId-1);
//File *mf = m_maps[j]->getFile();
m_maps[j]->rename(fbuf);
log("merge: renaming final merged file %s",fbuf);
return;
}
// resume the merging
goto startMerge;
}
minToMerge = m_minToMerge;
// if we are reblancing this coll then keep merges tight so all
// the negative recs annihilate with the positive recs to free
// up disk space since we could be short on disk space.
//if ( g_rebalance.m_isScanning &&
// // if might have moved on if not able to merge because
// // another was merging... so do this anyway...
// g_rebalance.m_collnum == m_collnum )
// minToMerge = 2;
//if (m_rdb==g_tfndb.getRdb()&& g_merge.isMerging() && minToMerge <=2 )
// minToMerge = 3;
@ -1772,6 +1857,9 @@ void RdbBase::gotTokenForMerge ( ) {
//smini = -1;
// but if we are forcing then merge ALL, except one being dumped
if ( m_nextMergeForced ) n = numFiles;
// or if doing relabalncing, merge them all. tight merge
//if ( g_rebalance.m_isScanning && g_rebalance.m_collnum == m_collnum)
// n = numFiles;
//else if ( m_isTitledb ) {
// RdbBase *base = g_tfndb.getRdb()->m_bases[m_collnum];
// tfndbSize = base->getDiskSpaceUsed();
@ -2305,6 +2393,10 @@ bool RdbBase::verifyFileSharding ( ) {
list.skipCurrentRecord() ) {
//key144_t k;
list.getCurrentKey(k);
// skip negative keys
if ( (k[0] & 0x01) == 0x00 ) continue;
count++;
//unsigned long groupId = k.n1 & g_hostdb.m_groupMask;
//unsigned long groupId = getGroupId ( RDB_POSDB , &k );
@ -2349,4 +2441,15 @@ bool RdbBase::verifyFileSharding ( ) {
//return true;
}
float RdbBase::getPercentNegativeRecsOnDisk ( ) {
// scan the maps
long long numPos = 0LL;
long long numNeg = 0LL;
for ( long i = 0 ; i < m_numFiles ; i++ ) {
numPos += m_maps[i]->getNumPositiveRecs();
numNeg += m_maps[i]->getNumNegativeRecs();
}
long long total = numPos + numNeg;
float percent = (float)numNeg / (float)total;
return percent;
}

View File

@ -168,6 +168,8 @@ class RdbBase {
//RdbMem *getRdbMem () { return &m_mem; };
float getPercentNegativeRecsOnDisk ( ) ;
// how much mem is alloced for our maps?
long long getMapMemAlloced ();

View File

@ -3039,7 +3039,7 @@ bool RdbList::posdbMerge_r ( RdbList **lists ,
if ( maxPtr > m_alloc + m_allocSize ) maxPtr = m_alloc + m_allocSize;
// debug note
if ( m_listSize )
if ( m_listSize && g_conf.m_logDebugBuild )
log(LOG_LOGIC,"db: storing recs in a non-empty list for merge"
" probably from recall from negative key loss");

View File

@ -21,8 +21,9 @@
Rebalance g_rebalance;
Rebalance::Rebalance ( ) {
m_registered = false;
m_allowSave = false;
m_inRebalanceLoop = false;
//m_inRebalanceLoop = false;
m_numForeignRecs = 0;
m_rebalanceCount = 0LL;
m_scannedCount = 0LL;
@ -225,6 +226,15 @@ void Rebalance::scanLoop ( ) {
m_lastRdb = rdb;
// reset key cursor as well!!!
KEYMIN ( m_nextKey , MAX_KEY_BYTES );
// This logic now in RdbBase.cpp.
// let's keep posdb and titledb tight-merged so
// we do not run out of disk space because we
// will be dumping tons of negative recs
//RdbBase *base = rdb->getBase(m_collnum);
//base->m_savedMin = base->m_minFilesToMerge;
//base->m_minFilesToMerge = 2;
}
// percent update?
long percent = (unsigned char)m_nextKey[rdb->m_ks-1];
@ -245,6 +255,12 @@ void Rebalance::scanLoop ( ) {
m_rebalanceCount = 0;
m_scannedCount = 0;
m_lastPercent = -1;
// This logic now in RdbBase.cpp.
// go back to normal merge threshold
//RdbBase *base = rdb->getBase(m_collnum);
//base->m_minFilesToMerge = base->m_savedMin;
}
// reset it for next colls
m_rdbNum = 0;
@ -310,6 +326,11 @@ static void gotListWrapper ( void *state , RdbList *list, Msg5 *msg5 ) {
g_rebalance.scanLoop();
}
void sleepWrapper ( int fd , void *state ) {
// try a re-call since we were merging last time
g_rebalance.scanLoop();
}
bool Rebalance::scanRdb ( ) {
// get collrec i guess
@ -317,6 +338,40 @@ bool Rebalance::scanRdb ( ) {
Rdb *rdb = g_process.m_rdbs[m_rdbNum];
// unregister it if it was registered
if ( m_registered ) {
g_loop.unregisterSleepCallback ( NULL,sleepWrapper );
m_registered = false;
}
if ( g_process.m_mode == EXIT_MODE ) return false;
// . if this rdb is merging wait until merge is done
// . we will be dumping out a lot of negative recs and if we are
// short on disk space we need to merge them in immediately with
// all our data so that they annihilate quickly with the positive
// keys in there to free up more disk
RdbBase *base = rdb->getBase ( m_collnum );
// base is NULL for like monitordb...
if ( base && base->isMerging() ) {
log("rebal: waiting for merge on %s for coll #%li to complete",
rdb->m_dbname,(long)m_collnum);
g_loop.registerSleepCallback ( 1000,NULL,sleepWrapper,1);
m_registered = true;
// we blocked, return false
return false;
}
// or really if any merging is going on way for it to save disk space
if ( rdb->isMerging() ) {
log("rebal: waiting for merge on %s for coll ??? to complete",
rdb->m_dbname);
g_loop.registerSleepCallback ( 1000,NULL,sleepWrapper,1);
m_registered = true;
// we blocked, return false
return false;
}
// skip empty collrecs, unless like statsdb or something
//if ( ! cr && ! rdb->m_isCollectionLess ) return true;

View File

@ -23,7 +23,7 @@ class Rebalance {
bool gotList ( ) ;
bool saveRebalanceFile ( ) ;
bool m_inRebalanceLoop;
//bool m_inRebalanceLoop;
long m_numForeignRecs;
long long m_rebalanceCount;
long long m_scannedCount;
@ -43,6 +43,7 @@ class Rebalance {
long m_blocked;
bool m_allowSave;
bool m_registered;
RdbList m_list;
SafeBuf m_posMetaList;
SafeBuf m_negMetaList;

View File

@ -2289,7 +2289,7 @@ bool Repair::printRepairStatus ( SafeBuf *sb , long fromIp ) {
"<tr bgcolor=#%s>"
"<td width=50%%><b>host ID with min repair mode"
"</b></td>"
"<td><a href=\"http://%s:%hu/master/repair\">"
"<td><a href=\"http://%s:%hu/admin/repair\">"
"%li</a></td></tr>\n"
"<tr bgcolor=#%s><td><b>old collection</b></td>"

View File

@ -4124,6 +4124,14 @@ bool SpiderColl::scanListForWinners ( ) {
continue;
if ( sreq->m_hopCount < m_tailHopCount )
goto gotNewWinner;
// if hopcounts tied prefer the unindexed doc
// i don't think we need this b/c spidertimems
// for new docs should be less than old docs...
// TODO: verify that
//if ( sreq->m_isIndexed && ! m_tailIsIndexed )
// continue;
//if ( ! sreq->m_isIndexed && m_tailIsIndexed )
// goto gotNewWinner;
// if tied, use actual times. assuming both<nowGlobalMS
if ( spiderTimeMS > m_tailTimeMS )
continue;

View File

@ -1921,6 +1921,8 @@ bool Tagdb::verify ( char *coll ) {
//key128_t k = list.getCurrentKey();
key128_t k;
list.getCurrentKey ( &k );
// skip negative keys
if ( (k.n0 & 0x01) == 0x00 ) continue;
count++;
// see if it is the "old" school tagdb rec
//char *data = list.getCurrentData();

View File

@ -586,7 +586,7 @@ void Test::stopIt ( ) {
// link to page parser
char ubuf[2000];
urlEncode(ubuf,2000,u,gbstrlen(u),true);
tmp.safePrintf(" <a href=\"/master/parser?c=test&"
tmp.safePrintf(" <a href=\"/admin/parser?c=test&"
"u=%s\">parser</a> ",ubuf);
//tmp.safePrintf(" (%llu)",h);
tmp.safePrintf("<br>\n");

View File

@ -464,7 +464,12 @@ bool Threads::call ( char type ,
// . try to launch as many threads as we can
// . this sets g_errno on error
// . if it has an error, just ignore it, our thread is queued
m_threadQueues[i].launchThread ( t ) ;
m_threadQueues[i].launchThread2 ( NULL );
//if ( ! m_threadQueues[i].launchThread2 ( t ) && g_errno ) {
// log("thread: failed thread launch: %s",mstrerror(g_errno));
// return false;
//}
// return false if there was an error launching the thread
//if ( g_errno ) return false;
// clear g_errno
@ -512,7 +517,7 @@ long Threads::launchThreads ( ) {
// clear g_errno
g_errno = 0;
// launch as many threads as we can from queue #i
while ( m_threadQueues[i].launchThread ( ) ) numLaunched++;
while ( m_threadQueues[i].launchThread2(NULL) ) numLaunched++;
// continue if no g_errno set
if ( ! g_errno ) continue;
// otherwise bitch about it
@ -1596,7 +1601,7 @@ long Threads::getNumActiveHighPriorityThreads() {
// . sets g_errno on error
// . don't launch a low priority thread if a high priority thread is running
// . i.e. don't launch a high niceness thread if a low niceness is running
bool ThreadQueue::launchThread ( ThreadEntry *te ) {
bool ThreadQueue::launchThread2 ( ThreadEntry *te ) {
// debug msg
//log("trying to launch for type=%li",(long)m_threadType);
// clean up any threads that have exited
@ -2151,13 +2156,23 @@ bool ThreadQueue::launchThread ( ThreadEntry *te ) {
mfree ( fs->m_allocBuf , fs->m_allocSize , "ThreadReadBuf" );
fs->m_buf = NULL;
}
// i'm not sure return value matters at this point? the thread
// is queued and hopefully will launch at some point
return false;
// if this is the direct thread request do not call callback, just
// return false
// return false, otherwise we get into an unexpected loop thingy
if ( t == te )
return log("thread: Returning false.");
// do it blocking
log("thread: Calling without thread. This will crash many times. "
"Please fix it.");
// return false so caller will re-do without thread!
// so BigFile::readwrite() will retry without thread and we won't
// get into a wierd loop thingy
if ( te ) return false;
// unsigned long long profilerStart,profilerEnd;
// unsigned long long statStart,statEnd;

View File

@ -139,7 +139,7 @@ class ThreadQueue {
// . launch a thread from our queue
// . returns false and sets errno on error
bool launchThread ( ThreadEntry *te = NULL );
bool launchThread2 ( ThreadEntry *te );
void print ( ) ;

View File

@ -182,6 +182,8 @@ bool Titledb::verify ( char *coll ) {
for ( list.resetListPtr() ; ! list.isExhausted() ;
list.skipCurrentRecord() ) {
key_t k = list.getCurrentKey();
// skip negative keys
if ( (k.n0 & 0x01) == 0x00 ) continue;
count++;
//unsigned long groupId = getGroupId ( RDB_TITLEDB , &k );
//if ( groupId == g_hostdb.m_groupId ) got++;

View File

@ -12,7 +12,7 @@
# must be represented as &lt;, &gt;, &#34; and &#035; respectively.
# Controls just the spiders for this collection.
<spideringEnabled>1</>
<spideringEnabled>0</>
# What is the maximum number of web pages the spider is allowed to download
# simultaneously PER HOST for THIS collection?
@ -289,7 +289,6 @@
# spidered, or if it has already been indexed, it will be deleted when it is
# respidered.<br><br>
<filterExpression><![CDATA[isdocidbased]]></>
<filterExpression><![CDATA[!insitelist &amp;&amp; !ismanualadd]]></>
<filterExpression><![CDATA[ismedia]]></>
<filterExpression><![CDATA[errorcount&gt;=3 &amp;&amp; hastmperror]]></>
<filterExpression><![CDATA[errorcount&gt;=1 &amp;&amp; hastmperror]]></>
@ -307,7 +306,6 @@
<filterExpression><![CDATA[isnew]]></>
<filterExpression><![CDATA[default]]></>
<harvestLinks>1</>
<harvestLinks>0</>
<harvestLinks>1</>
<harvestLinks>1</>
<harvestLinks>1</>
@ -325,7 +323,6 @@
<harvestLinks>1</>
<harvestLinks>1</>
<filterFrequency>0.000000</>
<filterFrequency>30.000000</>
<filterFrequency>0.000000</>
<filterFrequency>1.000000</>
<filterFrequency>1.000000</>
@ -346,7 +343,6 @@
# Do not allow more than this many outstanding spiders for all urls in this
# priority.
<maxSpidersPerRule>99</>
<maxSpidersPerRule>0</>
<maxSpidersPerRule>99</>
<maxSpidersPerRule>1</>
<maxSpidersPerRule>1</>
@ -366,7 +362,6 @@
# Allow this many spiders per IP.
<maxSpidersPerIp>1</>
<maxSpidersPerIp>7</>
<maxSpidersPerIp>1</>
<maxSpidersPerIp>1</>
<maxSpidersPerIp>1</>
@ -402,10 +397,8 @@
<spiderIpWait>1000</>
<spiderIpWait>1000</>
<spiderIpWait>1000</>
<spiderIpWait>1000</>
<filterPriority>80</>
<filterPriority>-3</>
<filterPriority>-3</>
<filterPriority>3</>
<filterPriority>45</>
<filterPriority>85</>

View File

@ -51,7 +51,7 @@
<readOnlyMode>0</>
# Controls all spidering for all collections
<spideringEnabled>1</>
<spideringEnabled>0</>
# What is the maximum number of web pages the spider is allowed to download
# simultaneously for ALL collections PER HOST?

View File

@ -4298,6 +4298,7 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
"%slocalhosts.conf "
//"%shosts2.conf "
"%sgb.conf "
"%slocalgb.conf "
"%stmpgb "
//"%scollections.dat "
"%sgb.pem "
@ -4351,6 +4352,7 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
dir,
dir,
dir,
dir,
dir,
dir,
@ -5756,11 +5758,12 @@ void dumpTitledb (char *coll,long startFileNum,long numFiles,bool includeTree,
lastKey.n1,lastKey.n0,
k.n1,k.n0);
lastKey = k;
long shard = g_hostdb.getShardNum ( RDB_TITLEDB , &k );
// print deletes
if ( (k.n0 & 0x01) == 0) {
fprintf(stdout,"n1=%08lx n0=%016llx docId=%012lli "
"(del)\n",
k.n1 , k.n0 , docId );
"shard=%li (del)\n",
k.n1 , k.n0 , docId , shard );
continue;
}
// free the mem
@ -5830,6 +5833,7 @@ void dumpTitledb (char *coll,long startFileNum,long numFiles,bool includeTree,
"redir=%s "
"url=%s "
"firstdup=1 "
"shard=%li "
"\n",
k.n1 , k.n0 ,
//rec[0] ,
@ -5852,7 +5856,8 @@ void dumpTitledb (char *coll,long startFileNum,long numFiles,bool includeTree,
//ms,
(long)xd->m_hopCount,
ru,
u->getUrl() );
u->getUrl() ,
shard );
prevId = docId;
count = 0;
continue;
@ -5952,6 +5957,7 @@ void dumpTitledb (char *coll,long startFileNum,long numFiles,bool includeTree,
"version=%02li "
//"maxLinkTextWeight=%06lu%% "
"hc=%li "
"shard=%li "
//"diffbot=%li "
"redir=%s "
"url=%s\n",
@ -5975,6 +5981,7 @@ void dumpTitledb (char *coll,long startFileNum,long numFiles,bool includeTree,
(long)xd->m_version,
//ms,
(long)xd->m_hopCount,
shard,
//(long)xd->m_isDiffbotJSONObject,
ru,
u->getUrl() );
@ -14438,6 +14445,8 @@ bool checkDataParity ( ) {
for ( list.resetListPtr() ; ! list.isExhausted() ;
list.skipCurrentRecord() ) {
key_t k = list.getCurrentKey();
// skip negative keys
if ( (k.n0 & 0x01) == 0x00 ) continue;
count++;
//unsigned long groupId = k.n1 & g_hostdb.m_groupMask;
uint32_t shardNum = getShardNum ( RDB_INDEXDB, &k );
@ -14485,6 +14494,8 @@ bool checkDataParity ( ) {
for ( list.resetListPtr() ; ! list.isExhausted() ;
list.skipCurrentRecord() ) {
key_t k = list.getCurrentKey();
// skip negative keys
if ( (k.n0 & 0x01) == 0x00 ) continue;
count++;
uint32_t shardNum = getShardNum ( RDB_TITLEDB , &k );
//long groupId = k.n1 & g_hostdb.m_groupMask;
@ -14527,6 +14538,8 @@ bool checkDataParity ( ) {
for ( list.resetListPtr() ; ! list.isExhausted() ;
list.skipCurrentRecord() ) {
key_t k = list.getCurrentKey();
// skip negative keys
if ( (k.n0 & 0x01) == 0x00 ) continue;
count++;
// verify the group
uint32_t shardNum = getShardNum ( RDB_TFNDB , &k );