mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 20:27:43 +03:00
5884951190
on a machine in matt wells datacenter. like fan switching based on temps, or printing seo links. made seo functions weak overridable placeholder stubs so if seo.o is linked in it will override. include seo.o object if seo.cpp file exists for automatic seo module building and linking.
1654 lines
52 KiB
C++
1654 lines
52 KiB
C++
#include "gb-include.h"
|
|
|
|
#include "Indexdb.h" // makeKey(long long docId)
|
|
#include "Titledb.h"
|
|
#include "Spider.h"
|
|
#include "Tagdb.h"
|
|
#include "Dns.h"
|
|
//#include "PageResults.h" // for query buf, g_qbuf
|
|
#include "Collectiondb.h"
|
|
#include "CollectionRec.h"
|
|
#include "Clusterdb.h" // for getting # of docs indexed
|
|
//#include "Checksumdb.h" // should migrate to this one, though
|
|
#include "Pages.h"
|
|
#include "Query.h" // MAX_QUERY_LEN
|
|
#include "SafeBuf.h"
|
|
#include "LanguageIdentifier.h"
|
|
#include "LanguagePages.h"
|
|
#include "Users.h"
|
|
#include "Address.h" // getIPLocation
|
|
#include "Proxy.h"
|
|
|
|
//char *printNumResultsDropDown ( char *p, long n, bool *printedDropDown);
|
|
bool printNumResultsDropDown ( SafeBuf& sb, long n, bool *printedDropDown);
|
|
//static char *printTopDirectory ( char *p, char *pend );
|
|
static bool printTopDirectory ( SafeBuf& sb );
|
|
|
|
// this prints the last five queries
|
|
//static long printLastQueries ( char *p , char *pend ) ;
|
|
|
|
//static char *expandRootHtml ( char *p , long plen ,
|
|
/*
|
|
static bool expandRootHtml ( SafeBuf& sb,
|
|
uint8_t *html , long htmlLen ,
|
|
char *q , long qlen ,
|
|
HttpRequest *r ,
|
|
TcpSocket *s ,
|
|
long long docsInColl ,
|
|
CollectionRec *cr ) ;
|
|
*/
|
|
|
|
bool sendPageRoot ( TcpSocket *s, HttpRequest *r ){
|
|
return sendPageRoot ( s, r, NULL );
|
|
}
|
|
|
|
bool printNav ( SafeBuf &sb , HttpRequest *r ) {
|
|
sb.safePrintf("<center><b><p class=nav>"
|
|
"<a href=\"/about.html\">About</a>"
|
|
" <a href=\"/contact.html\">Contact</a>"
|
|
" <a href=\"/help.html\">Help</a>"
|
|
" <a href=/privacy.html>Privacy Policy</a>"
|
|
" <a href=\"/searchfeed.html\">"
|
|
"Search API</a>"
|
|
" <a href=/seoapi.html>SEO API</a>"
|
|
" <a href=/account>My Account</a> "
|
|
//" <a href=/logout>Logout</a>"
|
|
);
|
|
if ( r->isLocal() )
|
|
sb.safePrintf(" [<a href=\"/master?\">Admin</a>]");
|
|
sb.safePrintf("</p></b></center></body></html>");
|
|
return true;
|
|
}
|
|
|
|
bool printWebHomePage ( SafeBuf &sb , HttpRequest *r ) {
|
|
|
|
sb.safePrintf("<html>\n");
|
|
sb.safePrintf("<head>\n");
|
|
//sb.safePrintf("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf8\">");
|
|
sb.safePrintf("<meta name=\"description\" content=\"A powerful, new search engine that does real-time indexing!\">\n");
|
|
sb.safePrintf("<meta name=\"keywords\" content=\"search, search engine, search engines, search the web, fresh index, green search engine, green search, clean search engine, clean search\">\n");
|
|
sb.safePrintf("<title>Gigablast - "
|
|
"An Alternative Open Source Search Engine</title>\n");
|
|
sb.safePrintf("<style><!--\n");
|
|
sb.safePrintf("body {\n");
|
|
sb.safePrintf("font-family:Arial, Helvetica, sans-serif;\n");
|
|
sb.safePrintf("color: #000000;\n");
|
|
sb.safePrintf("font-size: 12px;\n");
|
|
sb.safePrintf("margin: 20px 5px;\n");
|
|
sb.safePrintf("letter-spacing: 0.04em;\n");
|
|
sb.safePrintf("}\n");
|
|
sb.safePrintf("a:link {color:#00c}\n");
|
|
sb.safePrintf("a:visited {color:#551a8b}\n");
|
|
sb.safePrintf("a:active {color:#f00}\n");
|
|
sb.safePrintf(".bold {font-weight: bold;}\n");
|
|
sb.safePrintf(".bluetable {background:#d1e1ff;margin-bottom:15px;font-size:12px;}\n");
|
|
sb.safePrintf(".url {color:#008000;}\n");
|
|
sb.safePrintf(".cached, .cached a {font-size: 10px;color: #666666;\n");
|
|
sb.safePrintf("}\n");
|
|
sb.safePrintf("table {\n");
|
|
sb.safePrintf("font-family:Arial, Helvetica, sans-serif;\n");
|
|
sb.safePrintf("color: #000000;\n");
|
|
sb.safePrintf("font-size: 12px;\n");
|
|
sb.safePrintf("}\n");
|
|
sb.safePrintf(".directory {font-size: 16px;}\n");
|
|
sb.safePrintf("-->\n");
|
|
sb.safePrintf("</style>\n");
|
|
sb.safePrintf("\n");
|
|
sb.safePrintf("</head>\n");
|
|
sb.safePrintf("<script>\n");
|
|
sb.safePrintf("<!--\n");
|
|
sb.safePrintf("function x(){document.f.q.focus();}\n");
|
|
sb.safePrintf("// --></script>\n");
|
|
sb.safePrintf("<body onload=\"x()\">\n");
|
|
//sb.safePrintf("<body>\n");
|
|
//g_proxy.insertLoginBarDirective ( &sb );
|
|
sb.safePrintf("<br><br>\n");
|
|
// try to avoid using https for images. it is like 10ms slower.
|
|
|
|
if ( g_conf.m_isMattWells )
|
|
sb.safePrintf("<center><a href=/><img border=0 width=500 "
|
|
"height=122 src=http://www.gigablast.com/logo-"
|
|
"med.jpg></a>\n");
|
|
else
|
|
sb.safePrintf("<center><a href=/><img border=0 width=500 "
|
|
"height=122 src=/logo-med.jpg></a>\n");
|
|
|
|
sb.safePrintf("<br><br>\n");
|
|
sb.safePrintf("<br><br><br>\n");
|
|
sb.safePrintf("<b>web</b> <a href=/seo>seo</a> <a href=\"http://www.gigablast.com/?c=dmoz3\">directory</a> \n");
|
|
sb.safePrintf("<a href=/adv.html>advanced search</a>");
|
|
sb.safePrintf(" ");
|
|
sb.safePrintf("<a href=/addurl title=\"Instantly add your url to "
|
|
"Gigablast's index\">add url</a>");
|
|
sb.safePrintf("\n");
|
|
sb.safePrintf("<br><br>\n");
|
|
// submit to https now
|
|
sb.safePrintf("<form method=get "
|
|
"action=/search name=f>\n");
|
|
sb.safePrintf("<input name=q type=text size=60 value=\"\"> <input type=\"submit\" value=\"Search Green\">\n");
|
|
sb.safePrintf("\n");
|
|
sb.safePrintf("</form>\n");
|
|
sb.safePrintf("<br>\n");
|
|
sb.safePrintf("\n");
|
|
sb.safePrintf("<table cellpadding=3>\n");
|
|
sb.safePrintf("\n");
|
|
|
|
|
|
sb.safePrintf("<tr valign=top>\n");
|
|
sb.safePrintf("<td><div style=width:50px;height:50px;display:inline-block;background-color:red;></td>\n");
|
|
sb.safePrintf("<td><font size=+1><b>Open Source!</b>"
|
|
"</font><br>\n");
|
|
sb.brify2("Gigablast is now available as an <a href=https://github.com/gigablast/open-source-search-engine>open source search engine</a> on github.com. Download it today. Finally a robust, scalable search solution in C/C++ that has been in development and used commercially since 2000. <a href=/admin.html#features>Features.</a> Limited support available for free."
|
|
,80);
|
|
sb.safePrintf("<br><br>");
|
|
sb.safePrintf("</td></tr>\n");
|
|
|
|
|
|
|
|
sb.safePrintf("<tr valign=top>\n");
|
|
sb.safePrintf("<td><div style=width:50px;height:50px;display:inline-block;background-color:green;></td>\n");
|
|
sb.safePrintf("<td><font size=+1><b>The Green Search Engine</b></font><br>\n");
|
|
sb.brify2("Gigablast is the only clean-powered web search engine. 90% of its power usage comes from wind energy. Astoundingly, Gigablast is one of ONLY four search engines in the United States indexing over a billion pages.",80);
|
|
sb.safePrintf("<br><br></td></tr>\n");
|
|
sb.safePrintf("\n");
|
|
sb.safePrintf("\n");
|
|
|
|
|
|
|
|
sb.safePrintf("<tr valign=top>\n");
|
|
sb.safePrintf("<td><div style=width:50px;height:50px;display:inline-block;background-color:0040fe;></td>\n");
|
|
sb.safePrintf("<td><font size=+1><b>The Transparent Search Engine</b></font><br>\n");
|
|
sb.brify2("Gigablast is the first truly transparent search engine. It tells you exactly why the search results are ranked the way they are. There is nothing left to the imagination.",85);
|
|
sb.safePrintf("<br><br>");
|
|
sb.safePrintf("</td></tr>\n");
|
|
sb.safePrintf("\n");
|
|
sb.safePrintf("\n");
|
|
|
|
sb.safePrintf("<tr valign=top>\n");
|
|
sb.safePrintf("<td><div style=width:50px;height:50px;display:inline-block;background-color:f2b629;></td>\n");
|
|
sb.safePrintf("<td><font size=+1><b>The SEO Search Engine</b></font><br>\n");
|
|
sb.brify2("When it comes to search-engine based SEO, Gigablast is the place to be. With a frothy set of unique and effective <a href=/seo>SEO tools</a>, you will find all you need to execute a simple yet effective SEO strategy. Stop the guesswork, and let a search engine tell you how to SEO it.",85);
|
|
sb.safePrintf("</td></tr>\n");
|
|
|
|
|
|
/*
|
|
sb.safePrintf("<tr valign=top>\n");
|
|
sb.safePrintf("<td><div style=width:50px;height:50px;display:inline-block;background-color:ff3030;></td>\n");
|
|
sb.safePrintf("<td><font size=+1><b>Xml Search Feed</b></font><br>\n");
|
|
sb.brify2("Utilize Gigablast's results on your own site or product by connecting with Gigablast's <a href=/searchfeed.html>XML search feed</a>. It's now simpler than ever to setup and use. You can also add the web pages you want into the index in near real-time.",85);
|
|
sb.safePrintf("</td></tr>\n");
|
|
*/
|
|
|
|
/*
|
|
sb.safePrintf("<tr valign=top>\n");
|
|
sb.safePrintf("<td><div style=width:50px;height:50px;display:inline-block;background-color:black;></td>\n");
|
|
sb.safePrintf("<td><font size=+1><b>The Private Search Engine</b>"
|
|
"</font><br>\n");
|
|
sb.brify2("Gigablast does not allow the NSA or any third party "
|
|
"to spy on the queries your IP address is doing, "
|
|
"unlike "
|
|
"<a href=http://www.guardian.co.uk/world/2013/jun/"
|
|
"06/us-tech-giants-nsa-data>"
|
|
"other large search engines</a>. "
|
|
"Gigablast is the only "
|
|
"<a href=/privacy.html>truly private search engine</a> "
|
|
"in the United States."
|
|
//" Everyone else has fundamental "
|
|
//"gaps in their "
|
|
//"security as explained by the above link."
|
|
//"Tell Congress "
|
|
//"to <a href=https://optin.stopwatching.us/>stop spying "
|
|
//"on you</a>."
|
|
,85);
|
|
sb.safePrintf("</td></tr>\n");
|
|
*/
|
|
|
|
/*
|
|
sb.safePrintf("<tr valign=top>\n");
|
|
sb.safePrintf("<td><div style=width:50px;height:50px;display:inline-block;background-color:black;></td>\n");
|
|
sb.safePrintf("<td><font size=+1><b>No Tax Dodging</b></font><br>\n");
|
|
sb.brify2("Gigablast pays its taxes when it makes a profit. "
|
|
"Google and Bing <a href=http://www.bloomberg.com/news/"
|
|
"2010-10-21/google-2-4-rate-shows-how-60-billion-u-s-"
|
|
"revenue-lost-to-tax-loopholes.html>do not</a>. They "
|
|
"stash their profits in "
|
|
"offshore tax havens to avoid paying taxes. "
|
|
//"The end result is that taxes are higher for you. "
|
|
"You may think Google and Bing are free to use, but in "
|
|
"reality, <u>you</u> pay for it in increased taxes."
|
|
,85);
|
|
sb.safePrintf("</td></tr>\n");
|
|
*/
|
|
|
|
|
|
sb.safePrintf("\n");
|
|
sb.safePrintf("\n");
|
|
sb.safePrintf("</table>\n");
|
|
sb.safePrintf("<br><br>\n");
|
|
printNav ( sb , r );
|
|
return true;
|
|
}
|
|
|
|
bool printAddUrlHomePage ( SafeBuf &sb , char *url , HttpRequest *r ) {
|
|
|
|
sb.safePrintf("<html>\n");
|
|
sb.safePrintf("<head>\n");
|
|
sb.safePrintf("<title>Gigablast - Add Url</title>\n");
|
|
sb.safePrintf("<style><!--\n");
|
|
sb.safePrintf("body {\n");
|
|
sb.safePrintf("font-family:Arial, Helvetica, sans-serif;\n");
|
|
sb.safePrintf("color: #000000;\n");
|
|
sb.safePrintf("font-size: 12px;\n");
|
|
sb.safePrintf("margin: 20px 5px;\n");
|
|
sb.safePrintf("letter-spacing: 0.04em;\n");
|
|
sb.safePrintf("}\n");
|
|
sb.safePrintf("a:link {color:#00c}\n");
|
|
sb.safePrintf("a:visited {color:#551a8b}\n");
|
|
sb.safePrintf("a:active {color:#f00}\n");
|
|
sb.safePrintf(".bold {font-weight: bold;}\n");
|
|
sb.safePrintf(".bluetable {background:#d1e1ff;margin-bottom:15px;font-size:12px;}\n");
|
|
sb.safePrintf(".url {color:#008000;}\n");
|
|
sb.safePrintf(".cached, .cached a {font-size: 10px;color: #666666;\n");
|
|
sb.safePrintf("}\n");
|
|
sb.safePrintf("table {\n");
|
|
sb.safePrintf("font-family:Arial, Helvetica, sans-serif;\n");
|
|
sb.safePrintf("color: #000000;\n");
|
|
sb.safePrintf("font-size: 12px;\n");
|
|
sb.safePrintf("}\n");
|
|
sb.safePrintf(".directory {font-size: 16px;}\n");
|
|
sb.safePrintf("-->\n");
|
|
sb.safePrintf("</style>\n");
|
|
sb.safePrintf("\n");
|
|
sb.safePrintf("</head>\n");
|
|
sb.safePrintf("<script>\n");
|
|
sb.safePrintf("<!--\n");
|
|
sb.safePrintf("function x(){document.f.q.focus();}\n");
|
|
sb.safePrintf("// --></script>\n");
|
|
//sb.safePrintf("<body onload=\"x()\">\n");
|
|
/*
|
|
if ( url ) {
|
|
sb.safePrintf(
|
|
"<body "
|
|
"onLoad=\""
|
|
"var client = new XMLHttpRequest();\n"
|
|
"client.onreadystatechange = handler;\n"
|
|
"var url='/addurl?u="
|
|
);
|
|
sb.urlEncode ( url );
|
|
// propagate "admin" if set
|
|
//long admin = hr->getLong("admin",-1);
|
|
//if ( admin != -1 ) sb.safePrintf("&admin=%li",admin);
|
|
// provide hash of the query so clients can't just pass in
|
|
// a bogus id to get search results from us
|
|
unsigned long h32 = hash32n(url);
|
|
if ( h32 == 0 ) h32 = 1;
|
|
unsigned long long rand64 = gettimeofdayInMillisecondsLocal();
|
|
sb.safePrintf("&id=%lu&rand=%llu';\n"
|
|
"client.open('GET', url );\n"
|
|
"client.send();\n"
|
|
"\">"
|
|
, h32
|
|
, rand64
|
|
);
|
|
|
|
}
|
|
else {
|
|
sb.safePrintf("<body>");
|
|
}
|
|
*/
|
|
sb.safePrintf("<body>");
|
|
|
|
|
|
sb.safePrintf("<script type=\"text/javascript\">\n"
|
|
"function handler() {\n"
|
|
"if(this.readyState == 4 ) {\n"
|
|
"document.getElementById('msgbox').innerHTML="
|
|
"this.responseText;\n"
|
|
//"alert(this.status+this.statusText+"
|
|
//"this.responseXML+this.responseText);\n"
|
|
"}}\n"
|
|
"</script>\n");
|
|
|
|
|
|
//g_proxy.insertLoginBarDirective ( &sb );
|
|
|
|
sb.safePrintf("\n");
|
|
sb.safePrintf("<br><br>\n");
|
|
|
|
if ( g_conf.m_isMattWells )
|
|
sb.safePrintf("<center><a href=/><img border=0 width=500 "
|
|
"height=122 src=http://www.gigablast.com/logo-"
|
|
"med.jpg></a>\n");
|
|
else
|
|
sb.safePrintf("<center><a href=/><img border=0 width=500 "
|
|
"height=122 src=/logo-med.jpg></a>\n");
|
|
|
|
sb.safePrintf("<br><br>\n");
|
|
sb.safePrintf("<br><br><br>\n");
|
|
sb.safePrintf("<a href=/>web</a> <a href=/seo>seo</a> <a href=\"http://www.gigablast.com/?c=dmoz3\">directory</a> \n");
|
|
sb.safePrintf("<a href=/adv.html>advanced search</a>");
|
|
sb.safePrintf(" ");
|
|
sb.safePrintf("<b title=\"Instantly add your url to Gigablast's "
|
|
"index\">"
|
|
"add url</b>");
|
|
sb.safePrintf("\n");
|
|
sb.safePrintf("<br><br>\n");
|
|
sb.safePrintf("<form method=get action=/addurl name=f>\n");
|
|
sb.safePrintf("<input name=u type=text size=60 value=\"");
|
|
if ( url ) {
|
|
SafeBuf tmp;
|
|
tmp.safePrintf("%s",url);
|
|
// don't let double quotes in the url close our val attribute
|
|
tmp.replace("\"","%22");
|
|
sb.safeMemcpy(&tmp);
|
|
}
|
|
else
|
|
sb.safePrintf("http://");
|
|
sb.safePrintf("\"> <input type=\"submit\" value=\"Add Url\">\n");
|
|
sb.safePrintf("\n");
|
|
|
|
// if addurl is turned off, just print "disabled" msg
|
|
char *msg = NULL;
|
|
if ( ! g_conf.m_addUrlEnabled )
|
|
msg = "Add url is temporarily disabled";
|
|
// can also be turned off in the collection rec
|
|
CollectionRec *cr = g_collectiondb.getRec ( "main" );
|
|
if ( ! cr->m_addUrlEnabled )
|
|
msg = "Add url is temporarily disabled";
|
|
// or if in read-only mode
|
|
if ( g_conf.m_readOnlyMode )
|
|
msg = "Add url is temporarily disabled";
|
|
// if url is non-empty the ajax will receive this identical msg
|
|
// and display it in the div, so do not duplicate the msg!
|
|
if ( msg && ! url )
|
|
sb.safePrintf("<br><br>%s",msg);
|
|
|
|
|
|
// . the ajax msgbox div
|
|
// . when loaded with the main page for the first time it will
|
|
// immediately replace its content...
|
|
if ( url ) {
|
|
sb.safePrintf("<br>"
|
|
"<br>"
|
|
"<div id=msgbox>"
|
|
//"<b>Injecting your url. Please wait...</b>"
|
|
"<center>"
|
|
"<img src=/gears.gif width=50 height=50>"
|
|
"</center>"
|
|
"<script type=text/javascript>"
|
|
//"alert('shit');"
|
|
"var client = new XMLHttpRequest();\n"
|
|
"client.onreadystatechange = handler;\n"
|
|
"var url='/addurl?u="
|
|
);
|
|
sb.urlEncode ( url );
|
|
// propagate "admin" if set
|
|
//long admin = hr->getLong("admin",-1);
|
|
//if ( admin != -1 ) sb.safePrintf("&admin=%li",admin);
|
|
// provide hash of the query so clients can't just pass in
|
|
// a bogus id to get search results from us
|
|
unsigned long h32 = hash32n(url);
|
|
if ( h32 == 0 ) h32 = 1;
|
|
unsigned long long rand64 = gettimeofdayInMillisecondsLocal();
|
|
sb.safePrintf("&id=%lu&rand=%llu';\n"
|
|
"client.open('GET', url );\n"
|
|
"client.send();\n"
|
|
"</script>\n"
|
|
, h32
|
|
, rand64
|
|
);
|
|
sb.safePrintf("</div>\n");
|
|
}
|
|
|
|
sb.safePrintf("</form>\n");
|
|
sb.safePrintf("<br>\n");
|
|
sb.safePrintf("\n");
|
|
sb.safePrintf("<br><br>\n");
|
|
|
|
printNav ( sb , r );
|
|
return true;
|
|
}
|
|
|
|
|
|
bool printDirHomePage ( SafeBuf &sb , HttpRequest *r ) {
|
|
|
|
sb.safePrintf("<html>\n");
|
|
sb.safePrintf("<head>\n");
|
|
//sb.safePrintf("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">");
|
|
sb.safePrintf("<meta name=\"description\" content=\"A powerful, new search engine that does real-time indexing!\">\n");
|
|
sb.safePrintf("<meta name=\"keywords\" content=\"search, search engine, search engines, search the web, fresh index, green search engine, green search, clean search engine, clean search\">\n");
|
|
sb.safePrintf("<title>Gigablast</title>\n");
|
|
sb.safePrintf("<style><!--\n");
|
|
sb.safePrintf("body {\n");
|
|
sb.safePrintf("font-family:Arial, Helvetica, sans-serif;\n");
|
|
sb.safePrintf("color: #000000;\n");
|
|
sb.safePrintf("font-size: 12px;\n");
|
|
sb.safePrintf("margin: 20px 5px;\n");
|
|
sb.safePrintf("letter-spacing: 0.04em;\n");
|
|
sb.safePrintf("}\n");
|
|
sb.safePrintf("a:link {color:#00c}\n");
|
|
sb.safePrintf("a:visited {color:#551a8b}\n");
|
|
sb.safePrintf("a:active {color:#f00}\n");
|
|
sb.safePrintf(".bold {font-weight: bold;}\n");
|
|
sb.safePrintf(".bluetable {background:#d1e1ff;margin-bottom:15px;font-size:12px;}\n");
|
|
sb.safePrintf(".url {color:#008000;}\n");
|
|
sb.safePrintf(".cached, .cached a {font-size: 10px;color: #666666;\n");
|
|
sb.safePrintf("}\n");
|
|
sb.safePrintf("table {\n");
|
|
sb.safePrintf("font-family:Arial, Helvetica, sans-serif;\n");
|
|
sb.safePrintf("color: #000000;\n");
|
|
sb.safePrintf("font-size: 12px;\n");
|
|
sb.safePrintf("}\n");
|
|
sb.safePrintf(".directory {font-size: 16px;}\n");
|
|
sb.safePrintf("-->\n");
|
|
sb.safePrintf("</style>\n");
|
|
sb.safePrintf("\n");
|
|
sb.safePrintf("</head>\n");
|
|
sb.safePrintf("<script>\n");
|
|
sb.safePrintf("<!--\n");
|
|
sb.safePrintf("function x(){document.f.q.focus();}\n");
|
|
sb.safePrintf("// --></script>\n");
|
|
sb.safePrintf("<body onload=\"x()\">\n");
|
|
sb.safePrintf("<body>\n");
|
|
sb.safePrintf("<br><br>\n");
|
|
// try to avoid using https for images. it is like 10ms slower.
|
|
|
|
if ( g_conf.m_isMattWells )
|
|
sb.safePrintf("<center><a href=/><img border=0 width=500 "
|
|
"height=122 src=http://www.gigablast.com/logo-"
|
|
"med.jpg></a>\n");
|
|
else
|
|
sb.safePrintf("<center><a href=/><img border=0 width=500 "
|
|
"height=122 src=/logo-med.jpg></a>\n");
|
|
|
|
sb.safePrintf("<br><br>\n");
|
|
sb.safePrintf("<br><br><br>\n");
|
|
sb.safePrintf("<a href=/>web</a> <a href=/seo>seo</a> <b>directory</b> \n");
|
|
sb.safePrintf("<a href=/adv.html>advanced search</a>");
|
|
sb.safePrintf(" ");
|
|
sb.safePrintf("<a href=/addurl title=\"Instantly add your url to "
|
|
"Gigablast's index\">add url</a>");
|
|
sb.safePrintf("\n");
|
|
sb.safePrintf("<br><br>\n");
|
|
// submit to HTTPS now
|
|
sb.safePrintf("<form method=get "
|
|
"action=/search name=f>\n");
|
|
sb.safePrintf("<input name=q type=text size=60 value=\"\"> <input type=\"submit\" value=\"Search Green\">\n");
|
|
sb.safePrintf("\n");
|
|
sb.safePrintf("</form>\n");
|
|
sb.safePrintf("<br>\n");
|
|
sb.safePrintf("\n");
|
|
|
|
|
|
printTopDirectory ( sb );
|
|
|
|
sb.safePrintf("<br><br>\n");
|
|
|
|
printNav ( sb , r);
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
// . returns false if blocked, true otherwise
|
|
// . sets errno on error
|
|
// . make a web page displaying the config of this host
|
|
// . call g_httpServer.sendDynamicPage() to send it
|
|
bool sendPageRoot ( TcpSocket *s , HttpRequest *r, char *cookie ) {
|
|
// don't allow pages bigger than 128k in cache
|
|
char buf [ 10*1024 + MAX_QUERY_LEN ];
|
|
// a ptr into "buf"
|
|
//char *p = buf;
|
|
//char *pend = buf + 10*1024 + MAX_QUERY_LEN - 100 ;
|
|
SafeBuf sb(buf, 10*1024 + MAX_QUERY_LEN);
|
|
// print bgcolors, set focus, set font style
|
|
//p = g_httpServer.printFocus ( p , pend );
|
|
//p = g_httpServer.printColors ( p , pend );
|
|
//long qlen;
|
|
//char *q = r->getString ( "q" , &qlen , NULL );
|
|
// insert collection name too
|
|
long collLen;
|
|
char *coll = r->getString("c",&collLen);
|
|
if ( ! coll || ! coll[0] ) {
|
|
//coll = g_conf.m_defaultColl;
|
|
coll = g_conf.getDefaultColl( r->getHost(), r->getHostLen() );
|
|
collLen = gbstrlen(coll);
|
|
}
|
|
// ensure collection not too big
|
|
if ( collLen >= MAX_COLL_LEN ) {
|
|
g_errno = ECOLLTOOBIG;
|
|
return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));
|
|
}
|
|
// get the collection rec
|
|
/*
|
|
CollectionRec *cr = g_collectiondb.getRec ( coll );
|
|
uint8_t *hp = NULL;
|
|
long hpLen;
|
|
long long docsInColl = -1;
|
|
if ( ! cr ) {
|
|
// use the default
|
|
Parm *pp = g_parms.getParm ( "hp" );
|
|
if ( ! pp ) {
|
|
g_errno = ENOTFOUND;
|
|
g_msg = " (error: no such collection)";
|
|
return g_httpServer.sendErrorReply(s,500,
|
|
mstrerror(g_errno));
|
|
}
|
|
hp = (uint8_t*)pp->m_def;
|
|
if ( hp ) hpLen = uint8strlen ( hp );
|
|
if ( hpLen <= 0 || ! hp )
|
|
log(LOG_INFO,"http: No root page html present.");
|
|
} else {
|
|
if(cr->m_useLanguagePages) {
|
|
uint8_t lang = g_langId.guessGBLanguageFromUrl(r->getHost());
|
|
if(lang && (hp = g_languagePages.getLanguagePage(lang)) != NULL) {
|
|
hpLen = uint8strlen(hp);
|
|
// Set sort language as well
|
|
// This might not be a good idea, as it
|
|
// overrides any other setting. May be
|
|
// better to let the user agent string
|
|
// tell us what the user wants.
|
|
strcpy(cr->m_defaultSortLanguage,
|
|
getLanguageAbbr(lang));
|
|
}
|
|
}
|
|
if(!hp) {
|
|
hp = (uint8_t*)cr->m_htmlRoot;
|
|
hpLen = cr->m_htmlRootLen;
|
|
}
|
|
//RdbBase *base = getRdbBase ( RDB_CHECKSUMDB , coll );
|
|
RdbBase *base = getRdbBase ( (uint8_t)RDB_CLUSTERDB , coll );
|
|
if ( base ) docsInColl = base->getNumGlobalRecs();
|
|
}
|
|
*/
|
|
// print the page out
|
|
/*
|
|
expandRootHtml ( sb,
|
|
hp , hpLen ,
|
|
q , qlen , r , s , docsInColl ,
|
|
cr );
|
|
*/
|
|
|
|
|
|
if ( ! strcmp(coll,"dmoz3" ) )
|
|
printDirHomePage(sb,r);
|
|
else
|
|
printWebHomePage(sb,r);
|
|
|
|
|
|
// . print last 5 queries
|
|
// . put 'em in a table
|
|
// . disable for now, impossible to monitor/control
|
|
//p += printLastQueries ( p , pend );
|
|
// are we the admin?
|
|
//bool isAdmin = g_collectiondb.isAdmin ( r , s );
|
|
|
|
// calculate bufLen
|
|
//long bufLen = p - buf;
|
|
// . now encapsulate it in html head/tail and send it off
|
|
// . the 0 means browser caches for however long it's set for
|
|
// . but we don't use 0 anymore, use -2 so it never gets cached so
|
|
// our display of the # of pages in the index is fresh
|
|
// . no, but that will piss people off, its faster to keep it cached
|
|
//return g_httpServer.sendDynamicPage ( s , buf , bufLen , -1 );
|
|
return g_httpServer.sendDynamicPage ( s,
|
|
(char*) sb.getBufStart(),
|
|
sb.length(),
|
|
// 120 seconds cachetime
|
|
// don't cache anymore since
|
|
// we have the login bar at
|
|
// the top of the page
|
|
0,//120, // cachetime
|
|
false,// post?
|
|
"text/html",
|
|
200,
|
|
NULL, // cookie
|
|
"UTF-8",
|
|
r);
|
|
}
|
|
|
|
/*
|
|
//char *expandRootHtml ( char *p , long plen ,
|
|
bool expandRootHtml ( SafeBuf& sb,
|
|
uint8_t *head , long hlen ,
|
|
char *q , long qlen ,
|
|
HttpRequest *r ,
|
|
TcpSocket *s ,
|
|
long long docsInColl ,
|
|
CollectionRec *cr ) {
|
|
//char *pend = p + plen;
|
|
// store custom header into buf now
|
|
//for ( long i = 0 ; i < hlen && p+10 < pend ; i++ ) {
|
|
for ( long i = 0 ; i < hlen; i++ ) {
|
|
if ( head[i] != '%' ) {
|
|
// *p++ = head[i];
|
|
sb.safeMemcpy((char*)&head[i], 1);
|
|
continue;
|
|
}
|
|
if ( i + 1 >= hlen ) {
|
|
// *p++ = head[i];
|
|
sb.safeMemcpy((char*)&head[i], 1);
|
|
continue;
|
|
}
|
|
if ( head[i+1] == 'S' ) {
|
|
// now we got the %S, insert "spiders are [on/off]"
|
|
bool spidersOn = true;
|
|
if ( ! g_conf.m_spideringEnabled ) spidersOn = false;
|
|
if ( ! cr->m_spideringEnabled ) spidersOn = false;
|
|
if ( spidersOn )
|
|
sb.safePrintf("Spiders are on");
|
|
else
|
|
sb.safePrintf("Spiders are off");
|
|
// skip over %S
|
|
i += 1;
|
|
continue;
|
|
}
|
|
|
|
if ( head[i+1] == 'q' ) {
|
|
// now we got the %q, insert the query
|
|
char *p = (char*) sb.getBuf();
|
|
char *pend = (char*) sb.getBufEnd();
|
|
long eqlen = dequote ( p , pend , q , qlen );
|
|
//p += eqlen;
|
|
sb.incrementLength(eqlen);
|
|
// skip over %q
|
|
i += 1;
|
|
continue;
|
|
}
|
|
if ( head[i+1] == 'w' &&
|
|
head[i+2] == 'h' &&
|
|
head[i+3] == 'e' &&
|
|
head[i+4] == 'r' &&
|
|
head[i+5] == 'e' ) {
|
|
// insert the location
|
|
long whereLen;
|
|
char *where = r->getString("where",&whereLen);
|
|
// get it from cookie as well!
|
|
if ( ! where )
|
|
where = r->getStringFromCookie("where",
|
|
&whereLen);
|
|
// fix for getStringFromCookie
|
|
if ( where && ! where[0] ) where = NULL;
|
|
// skip over the %where
|
|
i += 5;
|
|
// if empty, base it on IP
|
|
if ( ! where ) {
|
|
double lat;
|
|
double lon;
|
|
double radius;
|
|
char *city,*state,*ctry;
|
|
// use this by default
|
|
long ip = r->m_userIP;
|
|
// ip for testing?
|
|
long iplen;
|
|
char *ips = r->getString("uip",&iplen);
|
|
if ( ips ) ip = atoip(ips);
|
|
// returns true if found in db
|
|
char buf[128];
|
|
getIPLocation ( ip ,
|
|
&lat ,
|
|
&lon ,
|
|
&radius,
|
|
&city ,
|
|
&state ,
|
|
&ctry ,
|
|
buf ,
|
|
128 ) ;
|
|
if ( city && state )
|
|
sb.safePrintf("%s, %s",city,state);
|
|
}
|
|
else
|
|
sb.dequote (where,whereLen);
|
|
continue;
|
|
}
|
|
if ( head[i+1] == 'w' &&
|
|
head[i+2] == 'h' &&
|
|
head[i+3] == 'e' &&
|
|
head[i+4] == 'n' ) {
|
|
// insert the location
|
|
long whenLen;
|
|
char *when = r->getString("when",&whenLen);
|
|
// skip over the %when
|
|
i += 4;
|
|
if ( ! when ) continue;
|
|
sb.dequote (when,whenLen);
|
|
continue;
|
|
}
|
|
// %sortby
|
|
if ( head[i+1] == 's' &&
|
|
head[i+2] == 'o' &&
|
|
head[i+3] == 'r' &&
|
|
head[i+4] == 't' &&
|
|
head[i+5] == 'b' &&
|
|
head[i+6] == 'y' ) {
|
|
// insert the location
|
|
long sortBy = r->getLong("sortby",1);
|
|
// print the radio buttons
|
|
char *cs[5];
|
|
cs[0]="";
|
|
cs[1]="";
|
|
cs[2]="";
|
|
cs[3]="";
|
|
cs[4]="";
|
|
if ( sortBy >=1 && sortBy <=4 )
|
|
cs[sortBy] = " checked";
|
|
sb.safePrintf(
|
|
"<input type=radio name=sortby value=1%s>date "
|
|
"<input type=radio name=sortby value=2%s>distance "
|
|
"<input type=radio name=sortby value=3%s>relevancy "
|
|
"<input type=radio name=sortby value=4%s>popularity",
|
|
cs[1],cs[2],cs[3],cs[4]);
|
|
// skip over the %sortby
|
|
i += 6;
|
|
continue;
|
|
}
|
|
if ( head[i+1] == 'e' ) {
|
|
// now we got the %e, insert the query
|
|
char *p = (char*) sb.getBuf();
|
|
long plen = sb.getAvail();
|
|
long eqlen = urlEncode ( p , plen , q , qlen );
|
|
//p += eqlen;
|
|
sb.incrementLength(eqlen);
|
|
// skip over %e
|
|
i += 1;
|
|
continue;
|
|
}
|
|
if ( head[i+1] == 'N' ) {
|
|
// now we got the %N, insert the global doc count
|
|
//long long c=g_checksumdb.getRdb()->getNumGlobalRecs();
|
|
//now each host tells us how many docs it has in itsping
|
|
long long c = g_hostdb.getNumGlobalRecs();
|
|
c += g_conf.m_docCountAdjustment;
|
|
// never allow to go negative
|
|
if ( c < 0 ) c = 0;
|
|
//p+=ulltoa(p,c);
|
|
char *p = (char*) sb.getBuf();
|
|
sb.reserve2x(16);
|
|
long len = ulltoa(p, c);
|
|
sb.incrementLength(len);
|
|
// skip over %N
|
|
i += 1;
|
|
continue;
|
|
}
|
|
if ( head[i+1] == 'E' ) {
|
|
// now each host tells us how many docs it has in its
|
|
// ping request
|
|
long long c = g_hostdb.getNumGlobalEvents();
|
|
char *p = (char*) sb.getBuf();
|
|
sb.reserve2x(16);
|
|
long len = ulltoa(p, c);
|
|
sb.incrementLength(len);
|
|
// skip over %E
|
|
i += 1;
|
|
continue;
|
|
}
|
|
if ( head[i+1] == 'n' ) {
|
|
// now we got the %n, insert the collection doc count
|
|
//p+=ulltoa(p,docsInColl);
|
|
char *p = (char*) sb.getBuf();
|
|
sb.reserve2x(16);
|
|
long len = ulltoa(p, docsInColl);
|
|
sb.incrementLength(len);
|
|
// skip over %n
|
|
i += 1;
|
|
continue;
|
|
}
|
|
if ( head[i+1] == 'T' ) {
|
|
// . print the final tail
|
|
// . only print admin link if we're local
|
|
//long user = g_pages.getUserType ( s , r );
|
|
//char *username = g_users.getUsername(r);
|
|
//char *pwd = r->getString ( "pwd" );
|
|
char *p = (char*) sb.getBuf();
|
|
long plen = sb.getAvail();
|
|
//p = g_pages.printTail ( p , p + plen , user , pwd );
|
|
char *n = g_pages.printTail(p , p + plen ,
|
|
r->isLocal());
|
|
sb.incrementLength(n - p);
|
|
// skip over %T
|
|
i += 1;
|
|
continue;
|
|
}
|
|
// print the drop down menu for selecting the # of reslts
|
|
if ( head[i+1] == 'D' ) {
|
|
// skip over %D
|
|
i += 1;
|
|
// skip if not enough buffer
|
|
//if ( p + 1000 >= pend ) continue;
|
|
// # results
|
|
//long n = r->getLong("n",10);
|
|
//bool printedDropDown;
|
|
//p = printNumResultsDropDown(p,n,&printedDropDown);
|
|
//printNumResultsDropDown(sb,n,&printedDropDown);
|
|
continue;
|
|
}
|
|
if ( head[i+1] == 'H' ) {
|
|
// . insert the secret key here, to stop seo bots
|
|
// . TODO: randomize its position to make parsing more
|
|
// difficult
|
|
// . this secret key is for submitting a new query
|
|
long key;
|
|
char kname[4];
|
|
g_httpServer.getKey (&key,kname,NULL,0,time(NULL),0,
|
|
10);
|
|
//sprintf ( p , "<input type=hidden name=%s value=%li>",
|
|
// kname,key);
|
|
//p += gbstrlen ( p );
|
|
sb.safePrintf( "<input type=hidden name=%s value=%li>",
|
|
kname,key);
|
|
|
|
//adds param for default screen size
|
|
//if(cr)
|
|
// sb.safePrintf("<input type=hidden id='screenWidth' name='ws' value=%li>", cr->m_screenWidth);
|
|
|
|
// insert collection name too
|
|
long collLen;
|
|
char *coll = r->getString ( "c" , &collLen );
|
|
if ( collLen > 0 && collLen < MAX_COLL_LEN ) {
|
|
//sprintf (p,"<input type=hidden name=c "
|
|
// "value=\"");
|
|
//p += gbstrlen ( p );
|
|
sb.safePrintf("<input type=hidden name=c "
|
|
"value=\"");
|
|
//memcpy ( p , coll , collLen );
|
|
//p += collLen;
|
|
sb.safeMemcpy(coll, collLen);
|
|
//sprintf ( p , "\">\n");
|
|
//p += gbstrlen ( p );
|
|
sb.safePrintf("\">\n");
|
|
}
|
|
|
|
// pass this crap on so zak can do searches
|
|
char *username = g_users.getUsername(r);
|
|
// this is null because not in the cookie and we are
|
|
// logged in
|
|
//char *pwd = r->getString ( "pwd" );
|
|
//sb.safePrintf("<input type=hidden name=pwd value=\"%s\">\n",
|
|
//pwd);
|
|
sb.safePrintf("<input type=hidden name=username "
|
|
"value=\"%s\">\n",username);
|
|
|
|
// skip over %H
|
|
i += 1;
|
|
continue;
|
|
}
|
|
// %t, print Top Directory section
|
|
if ( head[i+1] == 't' ) {
|
|
i += 1;
|
|
//p = printTopDirectory ( p, pend );
|
|
printTopDirectory ( sb );
|
|
continue;
|
|
}
|
|
|
|
// *p++ = head[i];
|
|
sb.safeMemcpy((char*)&head[i], 1);
|
|
continue;
|
|
}
|
|
//return p;
|
|
return true;
|
|
}
|
|
*/
|
|
|
|
// . store into "p"
|
|
// . returns bytes stored into "p"
|
|
// . used for entertainment purposes
|
|
/*
|
|
long printLastQueries ( char *p , char *pend ) {
|
|
// if not 512 bytes left, bail
|
|
if ( pend - p < 512 ) return 0;
|
|
// return w/ no table if no queries have been added to g_qbuf yet
|
|
if ( ! g_nextq == -1 ) return 0;
|
|
// remember start for returning # of bytes stored
|
|
char *start = p;
|
|
// begin table (no border)
|
|
sprintf (p,"<br><table border=0><tr><td><center>Last %li queries:"
|
|
"</td></tr>", (long)QBUF_NUMQUERIES );
|
|
p += gbstrlen ( p );
|
|
// point to last query added
|
|
long n = g_nextq - 1;
|
|
// . wrap it if we need to
|
|
// . QBUF_NUMQUERIES is defined to be 5 in PageResults.h
|
|
if ( n < 0 ) n = QBUF_NUMQUERIES - 1;
|
|
// . print up to five queries
|
|
// . queries are stored by advancing g_nextq, so "i" should go backward
|
|
long count = 0;
|
|
for ( long i = n ; count < QBUF_NUMQUERIES ; count++ , i-- ) {
|
|
// wrap i if we need to
|
|
if ( i == -1 ) i = QBUF_NUMQUERIES - 1;
|
|
// if this query is empty, skip it (might be uninitialized)
|
|
if ( g_qbuf[i][0] == '\0' ) continue;
|
|
// point to the query (these are NULL terminated)
|
|
char *q = g_qbuf[i];
|
|
long qlen = gbstrlen(q);
|
|
// bail if too big
|
|
if ( p + qlen + 32 + 1024 >= pend ) return p - start;
|
|
// otherwise, print this query to the page
|
|
sprintf ( p , "<tr><td><a href=/cgi/0.cgi?q=" );
|
|
p += gbstrlen ( p );
|
|
// store encoded query as cgi parm
|
|
p += urlEncode ( p , q , qlen );
|
|
// end a href tag
|
|
*p++ = '>';
|
|
// . then print the actual query to the page
|
|
// . use htmlEncode so nobody can abuse it
|
|
p += saftenTags ( p , pend - p , q , qlen );
|
|
// wrap it up
|
|
sprintf ( p , "</a></td></tr>" );
|
|
p += gbstrlen ( p );
|
|
}
|
|
// end the table
|
|
sprintf ( p , "</table>");
|
|
p += gbstrlen ( p );
|
|
// return bytes written
|
|
return p - start;
|
|
}
|
|
*/
|
|
|
|
|
|
//char *printTopDirectory ( char *p, char *pend ) {
|
|
bool printTopDirectory ( SafeBuf& sb ) {
|
|
//char topList[4096];
|
|
//sprintf(topList,
|
|
return sb.safePrintf (
|
|
"<center>"
|
|
"<table cellspacing=\"4\" cellpadding=\"4\"><tr><td valign=top>\n"
|
|
"<b><a href=\"/Arts/\">Arts</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/Arts/Movies/\">Movies</a>, "
|
|
"<a href=\"/Arts/Television/\">Television</a>, "
|
|
"<a href=\"/Arts/Music/\">Music</a>..."
|
|
"</small>\n"
|
|
"</td><td valign=top>"
|
|
"<b><a href=\"/Business/\">Business</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/Business/Employment/\">Jobs</a>, "
|
|
"<a href=\"/Business/Real_Estate/\">Real Estate</a>, "
|
|
"<a href=\"/Business/Investing/\">Investing</a>..."
|
|
"</small>\n"
|
|
"</td><td valign=top>"
|
|
"<b><a href=\"/Computers/\">Computers</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/Computers/Internet/\">Internet</a>, "
|
|
"<a href=\"/Computers/Software/\">Software</a>, "
|
|
"<a href=\"/Computers/Hardware/\">Hardware</a>..."
|
|
"</small>\n"
|
|
"</td></tr><tr><td valign=top>"
|
|
"<b><a href=\"/Games/\">Games</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/Games/Video_Games/\">Video Games</a>, "
|
|
"<a href=\"/Games/Roleplaying/\">RPGs</a>, "
|
|
"<a href=\"/Games/Gambling/\">Gambling</a>..."
|
|
"</small>\n"
|
|
"</td><td valign=top>"
|
|
"<b><a href=\"/Health/\">Health</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/Health/Fitness/\">Fitness</a>, "
|
|
"<a href=\"/Health/Medicine/\">Medicine</a>, "
|
|
"<a href=\"/Health/Alternative/\">Alternative</a>..."
|
|
"</small>\n"
|
|
"</td><td valign=top>"
|
|
"<b><a href=\"/Home/\">Home</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/Home/Family/\">Family</a>, "
|
|
"<a href=\"/Home/Consumer_Information/\">Consumers</a>, "
|
|
"<a href=\"/Home/Cooking/\">Cooking</a>..."
|
|
"</small>\n"
|
|
"</td></tr><tr><td valign=top>"
|
|
//"<b><a href=\"/Kids_and_Teens/\">"
|
|
//"<font color=\"#ff0000\">K</font>"
|
|
//"<font color=\"339900\">i</font>"
|
|
//"<font color=\"#ff6600\">d</font>"
|
|
//"<font color=\"#0066ff\">s</font>"
|
|
//" and Teens</a></b><br>"
|
|
"<b><a href=\"/Kids_and_Teens/\">Kids and Teens</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/Kids_and_Teens/Arts/\">Arts</a>, "
|
|
"<a href=\"/Kids_and_Teens/School_Time/\">School Time</a>, "
|
|
"<a href=\"/Kids_and_Teens/Teen_Life/\">Teen Life</a>..."
|
|
"</small>\n"
|
|
"</td><td valign=top>"
|
|
"<b><a href=\"/News/\">News</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/News/Media/\">Media</a>, "
|
|
"<a href=\"/News/Newspapers/\">Newspapers</a>, "
|
|
"<a href=\"/News/Weather/\">Weather</a>..."
|
|
"</small>\n"
|
|
"</td><td valign=top>"
|
|
"<b><a href=\"/Recreation/\">Recreation</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/Recreation/Travel/\">Travel</a>, "
|
|
"<a href=\"/Recreation/Food/\">Food</a>, "
|
|
"<a href=\"/Recreation/Outdoors/\">Outdoors</a>, "
|
|
"<a href=\"/Recreation/Humor/\">Humor</a>..."
|
|
"</small>\n"
|
|
"</td></tr><tr><td valign=top>"
|
|
"<b><a href=\"/Reference/\">Reference</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/Reference/Maps/\">Maps</a>, "
|
|
"<a href=\"/Reference/Education/\">Education</a>, "
|
|
"<a href=\"/Reference/Libraries/\">Libraries</a>..."
|
|
"</small>\n"
|
|
"</td><td valign=top>"
|
|
"<b><a href=\"/Regional/\">Regional</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/Regional/North_America/United_States/\">US</a>, "
|
|
"<a href=\"/Regional/North_America/Canada/\">Canada</a>, "
|
|
"<a href=\"/Regional/Europe/United_Kingdom/\">UK</a>, "
|
|
"<a href=\"/Regional/Europe/\">Europe</a>..."
|
|
"</small>\n"
|
|
"</td><td valign=top>"
|
|
"<b><a href=\"/Science/\">Science</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/Science/Biology/\">Biology</a>, "
|
|
"<a href=\"/Science/Social_Sciences/Psychology/\">Psychology</a>, "
|
|
"<a href=\"/Science/Physics/\">Physics</a>..."
|
|
"</small>\n"
|
|
"</td></tr><tr><td valign=top>"
|
|
"<b><a href=\"/Shopping/\">Shopping</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/Shopping/Vehicles/Autos/\">Autos</a>, "
|
|
"<a href=\"/Shopping/Clothing/\">Clothing</a>, "
|
|
"<a href=\"/Shopping/Gifts/\">Gifts</a>..."
|
|
"</small>\n"
|
|
"</td><td valign=top>"
|
|
"<b><a href=\"/Society/\">Society</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/Society/People/\">People</a>, "
|
|
"<a href=\"/Society/Religion_and_Spirituality/\">Religion</a>, "
|
|
"<a href=\"/Society/Issues/\">Issues</a>..."
|
|
"</small>\n"
|
|
"</td><td valign=top>"
|
|
"<b><a href=\"/Sports/\">Sports</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/Sports/Baseball/\">Baseball</a>, "
|
|
"<a href=\"/Sports/Soccer/\">Soccer</a>, "
|
|
"<a href=\"/Sports/Basketball/\">Basketball</a>..."
|
|
"</small>\n"
|
|
"</td></tr>"
|
|
"<tr><td colspan=3 valign=top>"
|
|
"<b><a href=\"/World/\">World</a></b><br>"
|
|
"<small>"
|
|
"<a href=\"/World/Deutsch/\">Deutsch</a>, "
|
|
"<a href=\"/World/Espa%%c3%%b1ol/\">Espa%c%col</a>, "
|
|
"<a href=\"/World/Fran%%c3%%a7ais/\">Fran%c%cais</a>, "
|
|
"<a href=\"/World/Italiano/\">Italiano</a>, "
|
|
"<a href=\"/World/Japanese/\">Japanese</a>, "
|
|
"<a href=\"/World/Nederlands/\">Nederlands</a>, "
|
|
"<a href=\"/World/Polska/\">Polska</a>, "
|
|
"<a href=\"/World/Dansk/\">Dansk</a>, "
|
|
"<a href=\"/World/Svenska/\">Svenska</a>..."
|
|
"</small>\n"
|
|
"</td></tr></table></center>\n",
|
|
195, 177, 195, 167);
|
|
// make sure there's room
|
|
//long topListLen = gbstrlen(topList);
|
|
//if (pend - p <= topListLen+1)
|
|
// return p;
|
|
// copy it in
|
|
//memcpy(p, topList, topListLen);
|
|
//p += topListLen;
|
|
//*p = '\0';
|
|
//return p;
|
|
}
|
|
|
|
/////////////////
|
|
//
|
|
// ADD URL PAGE
|
|
//
|
|
/////////////////
|
|
|
|
#include "PageInject.h"
|
|
#include "TuringTest.h"
|
|
#include "AutoBan.h"
|
|
#include "CollectionRec.h"
|
|
#include "Users.h"
|
|
#include "Spider.h"
|
|
|
|
//static bool sendReply ( void *state , bool addUrlEnabled );
|
|
static bool canSubmit (unsigned long h, long now, long maxUrlsPerIpDom);
|
|
|
|
//static void addedStuff ( void *state );
|
|
|
|
void resetPageAddUrl ( ) ;
|
|
|
|
/*
|
|
class State2 {
|
|
public:
|
|
Url m_url;
|
|
//char *m_buf;
|
|
//long m_bufLen;
|
|
//long m_bufMaxLen;
|
|
};
|
|
*/
|
|
|
|
class State1 {
|
|
public:
|
|
//Msg4 m_msg4;
|
|
Msg7 m_msg7;
|
|
TcpSocket *m_socket;
|
|
bool m_isAdmin;
|
|
char m_coll[MAX_COLL_LEN+1];
|
|
bool m_goodAnswer;
|
|
bool m_doTuringTest;
|
|
long m_ufuLen;
|
|
char m_ufu[MAX_URL_LEN];
|
|
|
|
//long m_urlLen;
|
|
//char m_url[MAX_URL_LEN];
|
|
|
|
//char m_username[MAX_USER_SIZE];
|
|
bool m_strip;
|
|
bool m_spiderLinks;
|
|
bool m_forceRespider;
|
|
// buf filled by the links coming from google, msn, yahoo, etc
|
|
//State2 m_state2[5]; // gb, goog, yahoo, msn, ask
|
|
long m_numSent;
|
|
long m_numReceived;
|
|
//long m_raw;
|
|
//SpiderRequest m_sreq;
|
|
};
|
|
|
|
static void doneInjectingWrapper3 ( void *st1 ) ;
|
|
|
|
// only allow up to 1 Msg10's to be in progress at a time
|
|
static bool s_inprogress = false;
|
|
|
|
// . returns false if blocked, true otherwise
|
|
// . sets g_errno on error
|
|
bool sendPageAddUrl ( TcpSocket *s , HttpRequest *r ) {
|
|
// . get fields from cgi field of the requested url
|
|
// . get the search query
|
|
long urlLen = 0;
|
|
char *url = r->getString ( "u" , &urlLen , NULL /*default*/);
|
|
|
|
// see if they provided a url of a file of urls if they did not
|
|
// provide a url to add directly
|
|
bool isAdmin = g_collectiondb.isAdmin ( r , s );
|
|
long ufuLen = 0;
|
|
char *ufu = NULL;
|
|
if ( isAdmin )
|
|
// get the url of a file of urls (ufu)
|
|
ufu = r->getString ( "ufu" , &ufuLen , NULL );
|
|
|
|
// can't be too long, that's obnoxious
|
|
if ( urlLen > MAX_URL_LEN || ufuLen > MAX_URL_LEN ) {
|
|
g_errno = EBUFTOOSMALL;
|
|
g_msg = " (error: url too long)";
|
|
return g_httpServer.sendErrorReply(s,500,"url too long");
|
|
}
|
|
// get the collection
|
|
long collLen = 0;
|
|
char *coll = r->getString("c",&collLen);
|
|
if ( ! coll || ! coll[0] ) {
|
|
//coll = g_conf.m_defaultColl;
|
|
coll = g_conf.getDefaultColl( r->getHost(), r->getHostLen() );
|
|
collLen = gbstrlen(coll);
|
|
}
|
|
// get collection rec
|
|
CollectionRec *cr = g_collectiondb.getRec ( coll );
|
|
// bitch if no collection rec found
|
|
if ( ! cr ) {
|
|
g_errno = ENOCOLLREC;
|
|
g_msg = " (error: no collection)";
|
|
return g_httpServer.sendErrorReply(s,500,"no coll rec");
|
|
}
|
|
// . make sure the ip is not banned
|
|
// . we may also have an exclusive list of IPs for private collections
|
|
if ( ! cr->hasSearchPermission ( s ) ) {
|
|
g_errno = ENOPERM;
|
|
g_msg = " (error: permission denied)";
|
|
return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));
|
|
}
|
|
|
|
|
|
//
|
|
// if no url, print the main homepage page
|
|
//
|
|
if ( ! url ) {
|
|
SafeBuf sb;
|
|
printAddUrlHomePage ( sb , NULL , r );
|
|
return g_httpServer.sendDynamicPage(s,
|
|
sb.getBufStart(),
|
|
sb.length(),
|
|
// 120 secs cachetime
|
|
// don't cache any more
|
|
// since we have the
|
|
// login bar at top of page
|
|
0,//120 ,// cachetime
|
|
false,// post?
|
|
"text/html",
|
|
200,
|
|
NULL, // cookie
|
|
"UTF-8",
|
|
r);
|
|
}
|
|
|
|
//
|
|
// run the ajax script on load to submit the url now
|
|
//
|
|
long id = r->getLong("id",0);
|
|
// if we are not being called by the ajax loader, the put the
|
|
// ajax loader script into the html now
|
|
if ( id == 0 ) {
|
|
SafeBuf sb;
|
|
printAddUrlHomePage ( sb , url , r );
|
|
return g_httpServer.sendDynamicPage ( s,
|
|
sb.getBufStart(),
|
|
sb.length(),
|
|
// don't cache any more
|
|
// since we have the
|
|
// login bar at top of
|
|
//page
|
|
0,//3600,// cachetime
|
|
false,// post?
|
|
"text/html",
|
|
200,
|
|
NULL, // cookie
|
|
"UTF-8",
|
|
r);
|
|
}
|
|
|
|
//
|
|
// ok, inject the provided url!!
|
|
//
|
|
|
|
//
|
|
// check for errors first
|
|
//
|
|
|
|
// if addurl is turned off, just print "disabled" msg
|
|
char *msg = NULL;
|
|
if ( ! g_conf.m_addUrlEnabled )
|
|
msg = "Add url is temporarily disabled";
|
|
// can also be turned off in the collection rec
|
|
if ( ! cr->m_addUrlEnabled )
|
|
msg = "Add url is temporarily disabled";
|
|
// or if in read-only mode
|
|
if ( g_conf.m_readOnlyMode )
|
|
msg = "Add url is temporarily disabled";
|
|
// cannot add if another Msg10 from here is still in progress
|
|
if ( s_inprogress )
|
|
msg = "Add url is currently busy! Try again in a second.";
|
|
|
|
// . send msg back to the ajax request
|
|
// . use cachetime of 3600 so it does not re-inject if you hit the
|
|
// back button!
|
|
if ( msg ) {
|
|
SafeBuf sb;
|
|
sb.safePrintf("%s",msg);
|
|
g_httpServer.sendDynamicPage (s,
|
|
sb.getBufStart(),
|
|
sb.length(),
|
|
3600,//-1, // cachetime
|
|
false,// post?
|
|
"text/html",
|
|
200, // http status
|
|
NULL, // cookie
|
|
"UTF-8");
|
|
return true;
|
|
}
|
|
|
|
|
|
|
|
|
|
// make a new state
|
|
State1 *st1 ;
|
|
try { st1 = new (State1); }
|
|
catch ( ... ) {
|
|
g_errno = ENOMEM;
|
|
log("PageAddUrl: new(%i): %s",
|
|
sizeof(State1),mstrerror(g_errno));
|
|
return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); }
|
|
mnew ( st1 , sizeof(State1) , "PageAddUrl" );
|
|
// save socket and isAdmin
|
|
st1->m_socket = s;
|
|
st1->m_isAdmin = isAdmin;
|
|
|
|
/*
|
|
// save the url
|
|
st1->m_url[0] = '\0';
|
|
if ( url ) {
|
|
// normalize and add www. if it needs it
|
|
Url uu;
|
|
uu.set ( url , gbstrlen(url) , true );
|
|
// remove >'s i guess and store in st1->m_url[] buffer
|
|
st1->m_urlLen=cleanInput ( st1->m_url,
|
|
MAX_URL_LEN,
|
|
uu.getUrl(),
|
|
uu.getUrlLen() );
|
|
}
|
|
*/
|
|
|
|
// save the "ufu" (url of file of urls)
|
|
st1->m_ufu[0] = '\0';
|
|
st1->m_ufuLen = ufuLen;
|
|
memcpy ( st1->m_ufu , ufu , ufuLen );
|
|
st1->m_ufu[ufuLen] = '\0';
|
|
|
|
st1->m_doTuringTest = cr->m_doTuringTest;
|
|
st1->m_spiderLinks = true;
|
|
st1->m_strip = true;
|
|
|
|
// save the collection name in the State1 class
|
|
if ( collLen > MAX_COLL_LEN ) collLen = MAX_COLL_LEN;
|
|
strncpy ( st1->m_coll , coll , collLen );
|
|
st1->m_coll [ collLen ] = '\0';
|
|
|
|
// assume they answered turing test correctly
|
|
st1->m_goodAnswer = true;
|
|
|
|
// get ip of submitter
|
|
//unsigned long h = ipdom ( s->m_ip );
|
|
// . use top 2 bytes now, some isps have large blocks
|
|
// . if this causes problems, then they can do pay for inclusion
|
|
unsigned long h = iptop ( s->m_ip );
|
|
long codeLen;
|
|
char* code = r->getString("code", &codeLen);
|
|
if(g_autoBan.hasCode(code, codeLen, s->m_ip)) {
|
|
long uipLen = 0;
|
|
char* uip = r->getString("uip",&uipLen);
|
|
long hip = 0;
|
|
//use the uip when we have a raw query to test if
|
|
//we can submit
|
|
if(uip) {
|
|
hip = atoip(uip, uipLen);
|
|
h = iptop( hip );
|
|
}
|
|
}
|
|
|
|
|
|
st1->m_strip = r->getLong("strip",0);
|
|
// Remember, for cgi, if the box is not checked, then it is not
|
|
// reported in the request, so set default return value to 0
|
|
st1->m_spiderLinks = r->getLong("spiderLinks",0);
|
|
|
|
// . should we force it into spiderdb even if already in there
|
|
// . use to manually update spider times for a url
|
|
// . however, will not remove old scheduled spider times
|
|
// . mdw: made force on the default
|
|
st1->m_forceRespider = r->getLong("force",1); // 0);
|
|
|
|
long now = getTimeGlobal();
|
|
// . allow 1 submit every 1 hour
|
|
// . restrict by submitter domain ip
|
|
if ( ! st1->m_isAdmin &&
|
|
! canSubmit ( h , now , cr->m_maxAddUrlsPerIpDomPerDay ) ) {
|
|
// return error page
|
|
//g_errno = ETOOEARLY;
|
|
SafeBuf sb;
|
|
sb.safePrintf("You breached your add url quota.");
|
|
mdelete ( st1 , sizeof(State1) , "PageAddUrl" );
|
|
delete (st1);
|
|
// use cachetime of 3600 so it does not re-inject if you hit
|
|
// the back button!
|
|
g_httpServer.sendDynamicPage (s,
|
|
sb.getBufStart(),
|
|
sb.length(),
|
|
3600,//-1, // cachetime
|
|
false,// post?
|
|
"text/html",
|
|
200, // http status
|
|
NULL, // cookie
|
|
"UTF-8");
|
|
return true;
|
|
}
|
|
|
|
//st1->m_query = r->getString( "qts", &st1->m_queryLen );
|
|
|
|
// check it, if turing test is enabled for this collection
|
|
/*
|
|
if ( ! st1->m_isAdmin && cr->m_doTuringTest &&
|
|
! g_turingTest.isHuman(r) ) {
|
|
// log note so we know it didn't make it
|
|
g_msg = " (error: bad answer)";
|
|
//log("PageAddUrl:: addurl failed for %s : bad answer",
|
|
// iptoa(s->m_ip));
|
|
st1->m_goodAnswer = false;
|
|
return sendReply ( st1 , true ); // addUrl enabled?
|
|
}
|
|
*/
|
|
|
|
|
|
//
|
|
// inject using msg7
|
|
//
|
|
|
|
// . pass in the cleaned url
|
|
// . returns false if blocked, true otherwise
|
|
if ( ! st1->m_msg7.inject ( s ,
|
|
r ,
|
|
st1 ,
|
|
doneInjectingWrapper3 ) )
|
|
return false;
|
|
|
|
// some kinda error, g_errno should be set i guess
|
|
doneInjectingWrapper3 ( st1 );
|
|
// we did not block
|
|
return true;
|
|
}
|
|
|
|
|
|
void doneInjectingWrapper3 ( void *st ) {
|
|
State1 *st1 = (State1 *)st;
|
|
// allow others to add now
|
|
s_inprogress = false;
|
|
// get the state properly
|
|
//State1 *st1 = (State1 *) state;
|
|
// in order to see what sites are being added log it, then we can
|
|
// more easily remove sites from sitesearch.gigablast.com that are
|
|
// being added but not being searched
|
|
char *url = st1->m_msg7.m_xd.m_firstUrl.m_url;
|
|
log(LOG_INFO,"http: add url %s (%s)",url ,mstrerror(g_errno));
|
|
// extract info from state
|
|
TcpSocket *s = st1->m_socket;
|
|
//bool isAdmin = st1->m_isAdmin;
|
|
//char *url = NULL;
|
|
//if ( st1->m_urlLen ) url = st1->m_url;
|
|
// re-null it out if just http://
|
|
//bool printUrl = true;
|
|
//if ( st1->m_urlLen == 0 ) printUrl = false;
|
|
//if ( ! st1->m_url ) printUrl = false;
|
|
//if(st1->m_urlLen==7&&st1->m_url&&!strncasecmp(st1->m_url,"http://",7)
|
|
// printUrl = false;
|
|
|
|
// page is not more than 32k
|
|
char buf[1024*32+MAX_URL_LEN*2];
|
|
SafeBuf sb(buf, 1024*32+MAX_URL_LEN*2);
|
|
|
|
//char rawbuf[1024*8];
|
|
//SafeBuf rb(rawbuf, 1024*8);
|
|
//rb.safePrintf("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
|
|
//rb.safePrintf("<status>\n");
|
|
//CollectionRec *cr = g_collectiondb.getRec ( st1->m_coll );
|
|
|
|
// collection name
|
|
|
|
//char tt [ 128 ];
|
|
//tt[0] = '\0';
|
|
//if ( st1->m_coll[0] != '\0' && ! isAdmin )
|
|
// sprintf ( tt , " for %s", st1->m_coll );
|
|
|
|
|
|
//
|
|
// what we print here will just be the error msg, because the
|
|
// ajax will fill the text we print here into the div below
|
|
// the add url box
|
|
//
|
|
|
|
// if there was an error let them know
|
|
//char msg[MAX_URL_LEN + 1024];
|
|
char *pm = "";
|
|
if ( g_errno ) {
|
|
if ( g_errno == ETOOEARLY ) {
|
|
pm = "Error. 100 urls have "
|
|
"already been submitted by "
|
|
"this IP address for the last 24 hours. "
|
|
"<a href=/addurlerror.html>Explanation</a>.";
|
|
log("addurls: Failed for user at %s: "
|
|
"quota breeched.", iptoa(s->m_ip));
|
|
|
|
//rb.safePrintf("Error. %li urls have "
|
|
// "already been submitted by "
|
|
// "this IP address for the "
|
|
// "last 24 hours. ",
|
|
// cr->m_maxAddUrlsPerIpDomPerDay);
|
|
sb.safePrintf("%s",pm);
|
|
}
|
|
else {
|
|
sb.safePrintf("Error adding url(s): <b>%s[%i]</b>",
|
|
mstrerror(g_errno) , g_errno);
|
|
//pm = msg;
|
|
//rb.safePrintf("Error adding url(s): %s[%i]",
|
|
// mstrerror(g_errno) , g_errno);
|
|
//sb.safePrintf("%s",pm);
|
|
}
|
|
}
|
|
else {
|
|
if ( ! g_conf.m_addUrlEnabled ) {
|
|
pm = "<font color=#ff0000>"
|
|
"Sorry, this feature is temporarily disabled. "
|
|
"Please try again later.</font>";
|
|
if ( url )
|
|
log("addurls: failed for user at %s: "
|
|
"add url is disabled. "
|
|
"Enable add url on the "
|
|
"Master Controls page and "
|
|
"on the Spider Controls page for "
|
|
"this collection.",
|
|
iptoa(s->m_ip));
|
|
|
|
sb.safePrintf("%s",pm);
|
|
//rb.safePrintf("Sorry, this feature is temporarily "
|
|
// "disabled. Please try again later.");
|
|
}
|
|
else if ( s_inprogress ) {
|
|
pm = "Add url busy. Try again later.";
|
|
log("addurls: Failed for user at %s: "
|
|
"busy adding another.", iptoa(s->m_ip));
|
|
//rb.safePrintf("Add url busy. Try again later.");
|
|
sb.safePrintf("%s",pm);
|
|
}
|
|
// did they fail the turing test?
|
|
else if ( ! st1->m_goodAnswer ) {
|
|
pm = "<font color=#ff0000>"
|
|
"Oops, you did not enter the 4 large letters "
|
|
"you see below. Please try again.</font>";
|
|
//rb.safePrintf("could not add the url"
|
|
// " because the turing test"
|
|
// " is enabled.");
|
|
sb.safePrintf("%s",pm);
|
|
}
|
|
else if ( st1->m_msg7.m_xd.m_indexCodeValid &&
|
|
st1->m_msg7.m_xd.m_indexCode ) {
|
|
long ic = st1->m_msg7.m_xd.m_indexCode;
|
|
sb.safePrintf("<b>Had error injecting url: %s</b>",
|
|
mstrerror(ic));
|
|
}
|
|
/*
|
|
if ( url && ! st1->m_ufu[0] && url[0] && printUrl ) {
|
|
sprintf ( msg ,"<u>%s</u> added to spider "
|
|
"queue "
|
|
"successfully", url );
|
|
//rb.safePrintf("%s added to spider "
|
|
// "queue successfully", url );
|
|
}
|
|
else if ( st1->m_ufu[0] ) {
|
|
sprintf ( msg ,"urls in <u>%s</u> "
|
|
"added to spider queue "
|
|
"successfully", st1->m_ufu );
|
|
|
|
//rb.safePrintf("urls in %s added to spider "
|
|
// "queue successfully", url );
|
|
|
|
}
|
|
*/
|
|
else {
|
|
//rb.safePrintf("Add the url you want:");
|
|
// avoid hitting browser page cache
|
|
unsigned long rand32 = rand();
|
|
// in the mime to 0 seconds!
|
|
sb.safePrintf("<b>Url successfully added. "
|
|
"<a href=/search?rand=%lu&q=url%%3A",
|
|
rand32);
|
|
sb.urlEncode(url);
|
|
sb.safePrintf(">Check it</a> or "
|
|
"<a href=/seo?u=");
|
|
sb.urlEncode(url);
|
|
sb.safePrintf(">SEO it</a>"
|
|
".</b>");
|
|
}
|
|
|
|
//pm = msg;
|
|
//url = "http://";
|
|
//else
|
|
// pm = "Don't forget to <a href=/gigaboost.html>"
|
|
// "Gigaboost</a> your URL.";
|
|
}
|
|
|
|
// store it
|
|
sb.safePrintf("<b>%s</b>",pm );
|
|
|
|
// clear g_errno, if any, so our reply send goes through
|
|
g_errno = 0;
|
|
|
|
|
|
// nuke state
|
|
mdelete ( st1 , sizeof(State1) , "PageAddUrl" );
|
|
delete (st1);
|
|
|
|
// this reply should be loaded from the ajax loader so use a cache
|
|
// time of 1 hour so it does not re-inject the url if you hit the
|
|
// back button
|
|
g_httpServer.sendDynamicPage (s,
|
|
sb.getBufStart(),
|
|
sb.length(),
|
|
3600, // cachetime
|
|
false,// post?
|
|
"text/html",
|
|
200, // http status
|
|
NULL, // cookie
|
|
"UTF-8");
|
|
}
|
|
|
|
|
|
// we get like 100k submissions a day!!!
|
|
static HashTable s_htable;
|
|
static bool s_init = false;
|
|
static long s_lastTime = 0;
|
|
bool canSubmit ( unsigned long h , long now , long maxAddUrlsPerIpDomPerDay ) {
|
|
// . sometimes no limit
|
|
// . 0 means no limit because if they don't want any submission they
|
|
// can just turn off add url and we want to avoid excess
|
|
// troubleshooting for why a url can't be added
|
|
if ( maxAddUrlsPerIpDomPerDay <= 0 ) return true;
|
|
// init the table
|
|
if ( ! s_init ) {
|
|
s_htable.set ( 50000 );
|
|
s_init = true;
|
|
}
|
|
// clean out table every 24 hours
|
|
if ( now - s_lastTime > 24*60*60 ) {
|
|
s_lastTime = now;
|
|
s_htable.clear();
|
|
}
|
|
// . if table almost full clean out ALL slots
|
|
// . TODO: just clean out oldest slots
|
|
if ( s_htable.getNumSlotsUsed() > 47000 ) s_htable.clear ();
|
|
// . how many times has this IP domain submitted?
|
|
// . allow 10 times per day
|
|
long n = s_htable.getValue ( h );
|
|
// if over 24hr limit then bail
|
|
if ( n >= maxAddUrlsPerIpDomPerDay ) return false;
|
|
// otherwise, inc it
|
|
n++;
|
|
// add to table, will replace old values
|
|
s_htable.addKey ( h , n );
|
|
return true;
|
|
}
|
|
|
|
|
|
void resetPageAddUrl ( ) {
|
|
s_htable.reset();
|
|
}
|
|
|