2013-08-03 00:12:24 +04:00
# include "gb-include.h"
# include "Indexdb.h" // makeKey(long long docId)
# include "Titledb.h"
# include "Spider.h"
# include "Tagdb.h"
# include "Dns.h"
//#include "PageResults.h" // for query buf, g_qbuf
# include "Collectiondb.h"
2013-12-11 03:28:04 +04:00
//#include "CollectionRec.h"
2013-08-03 00:12:24 +04:00
# include "Clusterdb.h" // for getting # of docs indexed
//#include "Checksumdb.h" // should migrate to this one, though
# include "Pages.h"
# include "Query.h" // MAX_QUERY_LEN
# include "SafeBuf.h"
# include "LanguageIdentifier.h"
# include "LanguagePages.h"
# include "Users.h"
# include "Address.h" // getIPLocation
# include "Proxy.h"
//char *printNumResultsDropDown ( char *p, long n, bool *printedDropDown);
bool printNumResultsDropDown ( SafeBuf & sb , long n , bool * printedDropDown ) ;
//static char *printTopDirectory ( char *p, char *pend );
2014-07-06 09:20:15 +04:00
static bool printTopDirectory ( SafeBuf & sb , char format ) ;
2013-08-03 00:12:24 +04:00
// this prints the last five queries
//static long printLastQueries ( char *p , char *pend ) ;
//static char *expandRootHtml ( char *p , long plen ,
/*
static bool expandRootHtml ( SafeBuf & sb ,
uint8_t * html , long htmlLen ,
char * q , long qlen ,
HttpRequest * r ,
TcpSocket * s ,
long long docsInColl ,
CollectionRec * cr ) ;
*/
bool sendPageRoot ( TcpSocket * s , HttpRequest * r ) {
return sendPageRoot ( s , r , NULL ) ;
}
bool printNav ( SafeBuf & sb , HttpRequest * r ) {
2013-11-16 01:47:05 +04:00
2014-08-03 04:58:11 +04:00
/*
2013-11-16 01:47:05 +04:00
char * root = " " ;
char * rootSecure = " " ;
if ( g_conf . m_isMattWells ) {
root = " http://www.gigablast.com " ;
rootSecure = " https://www.gigablast.com " ;
}
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " <center><b><p class=nav> "
2013-11-16 01:47:05 +04:00
" <a href=%s/about.html>About</a> "
" "
" <a href=%s/contact.html>Contact</a> "
" "
2014-07-30 18:39:36 +04:00
" <a href=%s/help.html>Help</a> "
2013-11-16 01:47:05 +04:00
" "
" <a href=%s/privacy.html>Privacy Policy</a> "
" "
2014-06-13 08:27:06 +04:00
// TODO: API page must also provide a description
// of the output... like searchfeed.html does already.
// put that in the api page as well.
" <a href=%s/api>API</a> "
2013-11-16 01:47:05 +04:00
, root
, root
, root
, root
, root
2013-08-03 00:12:24 +04:00
) ;
2014-01-18 23:54:58 +04:00
if ( g_conf . m_isMattWells )
sb . safePrintf ( " "
" <a href=%s/seoapi.html>SEO API</a> "
" "
" <a href=%s/account>My Account</a> "
, root
, rootSecure
//" <a href=/logout>Logout</a>"
) ;
2014-03-21 02:48:51 +04:00
//if ( r->isLocal() )
2014-07-30 18:39:36 +04:00
sb . safePrintf ( " [<a style=color:green; "
" href= \" /admin/settings \" > "
" Admin</a>] " ) ;
2014-08-03 04:58:11 +04:00
sb . safePrintf ( " </p></b></center> " ) ;
*/
sb . safePrintf ( " </TD></TR></TABLE> "
" </body></html> " ) ;
2013-08-03 00:12:24 +04:00
return true ;
}
2014-06-22 08:07:38 +04:00
//////////////
//
// BEGIN expandHtml() helper functions
//
//////////////
bool printFamilyFilter ( SafeBuf & sb , bool familyFilterOn ) {
char * s1 = " " ;
char * s2 = " " ;
if ( familyFilterOn ) s1 = " checked " ;
else s2 = " checked " ;
//p += sprintf ( p ,
return sb . safePrintf (
" Family filter: "
" <input type=radio name=ff value=1%s>On "
" <input type=radio name=ff value=0%s>Off " ,
s1 , s2 ) ;
//return p;
}
//char *printNumResultsDropDown ( char *p , long n , bool *printedDropDown ) {
bool printNumResultsDropDown ( SafeBuf & sb , long n , bool * printedDropDown ) {
if ( n ! = 10 & & n ! = 20 & & n ! = 30 & & n ! = 50 & & n ! = 100 )
//return p;
return true ;
* printedDropDown = true ;
char * d1 = " " ;
char * d2 = " " ;
char * d3 = " " ;
char * d4 = " " ;
char * d5 = " " ;
if ( n = = 10 ) d1 = " selected " ;
if ( n = = 20 ) d2 = " selected " ;
if ( n = = 30 ) d3 = " selected " ;
if ( n = = 50 ) d4 = " selected " ;
if ( n = = 100 ) d5 = " selected " ;
//p += sprintf ( p ,
return sb . safePrintf (
" <select name=n> \n "
" <option value=10%s>10 \n "
" <option value=20%s>20 \n "
" <option value=30%s>30 \n "
" <option value=50%s>50 \n "
" <option value=100%s>100 \n "
" </select> " ,
d1 , d2 , d3 , d4 , d5 ) ;
//return p;
}
//char *printDirectorySearchType ( char *p, long sdirt ) {
bool printDirectorySearchType ( SafeBuf & sb , long sdirt ) {
// default to entire directory
if ( sdirt < 1 | | sdirt > 4 )
sdirt = 3 ;
// by default search the whole thing
sb . safePrintf ( " <input type= \" radio \" name= \" sdirt \" value= \" 3 \" " ) ;
if ( sdirt = = 3 ) sb . safePrintf ( " checked> " ) ;
else sb . safePrintf ( " > " ) ;
sb . safePrintf ( " Entire Directory<br> \n " ) ;
// entire category
sb . safePrintf ( " <input type= \" radio \" name= \" sdirt \" value= \" 1 \" " ) ;
if ( sdirt = = 1 ) sb . safePrintf ( " checked> " ) ;
else sb . safePrintf ( " > " ) ;
sb . safePrintf ( " Entire Category<br> \n " ) ;
// base category only
sb . safePrintf ( " <nobr><input type= \" radio \" name= \" sdirt \" value= \" 2 \" " ) ;
if ( sdirt = = 2 ) sb . safePrintf ( " checked> " ) ;
else sb . safePrintf ( " > " ) ;
sb . safePrintf ( " Pages in Base Category</nobr><br> \n " ) ;
// sites in base category
sb . safePrintf ( " <input type= \" radio \" name= \" sdirt \" value= \" 7 \" " ) ;
if ( sdirt = = 7 ) sb . safePrintf ( " checked> " ) ;
else sb . safePrintf ( " > " ) ;
sb . safePrintf ( " Sites in Base Category<br> \n " ) ;
// sites in entire category
sb . safePrintf ( " <input type= \" radio \" name= \" sdirt \" value= \" 6 \" " ) ;
if ( sdirt = = 6 ) sb . safePrintf ( " checked> " ) ;
else sb . safePrintf ( " > " ) ;
sb . safePrintf ( " Sites in Entire Category<br> \n " ) ;
// end it
return true ;
}
# include "SearchInput.h"
bool printRadioButtons ( SafeBuf & sb , SearchInput * si ) {
// don't display this for directory search
// look it up. returns catId <= 0 if dmoz not setup yet.
// From PageDirectory.cpp
//long catId= g_categories->getIdFromPath(decodedPath, decodedPathLen);
// if /Top print the directory homepage
//if ( catId == 1 || catId <= 0 )
// return true;
// site
/*
if ( si - > m_siteLen > 0 ) {
// . print rest of search box etc.
// . print cobranding radio buttons
//if ( p + si->m_siteLen + 1 >= pend ) return p;
//p += sprintf ( p ,
return sb . safePrintf (
//" "
//"<font size=-1>"
//"<b><a href=\"/\"><font color=red>"
//"Powered by Gigablast</font></a></b>"
//"<br>"
//"<tr align=center><td></td><td>"
" <input type=radio name=site value= \" \" > "
" Search the Web "
" <input type=radio name=site "
" value= \" %s \" checked>Search %s " ,
//"</td></tr></table><br>"
//"</td></tr>"
//"<font size=-1>" ,
si - > m_site , si - > m_site ) ;
}
else if ( si - > m_sitesLen > 0 ) {
*/
if ( si - > m_sites & & si - > m_sites [ 0 ] ) {
// . print rest of search box etc.
// . print cobranding radio buttons
//if ( p + si->m_sitesLen + 1 >= pend ) return p;
// if not explicitly instructed to print all sites
// and they are a long list, do not print all
/*
char tmp [ 1000 ] ;
char * x = si - > m_sites ;
if ( si - > m_sitesLen > 255 ) { //&&!st->m_printAllSites){
// copy what's there
strncpy ( tmp , si - > m_sites , 255 ) ;
x = tmp + 254 ;
// do not hack off in the middle of a site
while ( is_alnum ( * x ) & & x > tmp ) x - - ;
// overwrite it with [more] link
//x += sprintf ( x , "<a href=\"/search?" );
// our current query parameters
//if ( x + uclen + 10 >= xend ) goto skipit;
sprintf ( x , " ... " ) ;
x = tmp ;
}
*/
//p += sprintf ( p ,
sb . safePrintf (
//" "
//"<font size=-1>"
//"<b><a href=\"/\"><font color=red>"
//"Powered by Gigablast</font></a></b>"
//"<br>"
//"<tr align=center><td></td><td>"
" <input type=radio name=sites value= \" \" > "
" Search the Web "
" <input type=radio name=sites "
" value= \" %s \" checked>Search " ,
//"</td></tr></table><br>"
//"</td></tr>"
//"<font size=-1>" ,
si - > m_sites ) ;
sb . safeTruncateEllipsis ( si - > m_sites , 255 ) ;
}
return true ;
}
bool printLogo ( SafeBuf & sb , SearchInput * si ) {
// if an image was provided...
if ( ! si - > m_imgUrl | | ! si - > m_imgUrl [ 0 ] ) {
// no, now we default to our logo
//return true;
//p += sprintf ( p ,
return sb . safePrintf (
" <a href= \" / \" > "
2014-06-22 08:40:25 +04:00
" <img valign=top width=250 height=61 border=0 "
2014-06-22 08:07:38 +04:00
// avoid https for this, so make it absolute
2014-06-22 08:40:25 +04:00
" src= \" /logo-med.jpg \" ></a> " ) ;
2014-06-22 08:07:38 +04:00
//return p;
}
// do we have a link?
if ( si - > m_imgLink & & si - > m_imgLink [ 0 ] )
//p += sprintf ( p , "<a href=\"%s\">",si->m_imgLink);
sb . safePrintf ( " <a href= \" %s \" > " , si - > m_imgLink ) ;
// print image width and length
if ( si - > m_imgWidth > = 0 & & si - > m_imgHeight > = 0 )
//p += sprintf ( p , "<img width=%li height=%li ",
sb . safePrintf ( " <img width=%li height=%li " ,
si - > m_imgWidth , si - > m_imgHeight ) ;
else
//p += sprintf ( p , "<img " );
sb . safePrintf ( " <img " ) ;
//p += sprintf ( p , "border=0 src=\"%s\">",
sb . safePrintf ( " border=0 src= \" %s \" > " ,
si - > m_imgUrl ) ;
// end the link if we had one
if ( si - > m_imgLink & & si - > m_imgLink [ 0 ] )
//p += sprintf ( p , "</a>");
sb . safePrintf ( " </a> " ) ;
return true ;
}
2014-06-21 20:57:51 +04:00
2014-06-22 08:07:38 +04:00
/////////////
//
// END expandHtml() helper functions
//
/////////////
bool expandHtml ( SafeBuf & sb ,
char * head ,
long hlen ,
char * q ,
long qlen ,
HttpRequest * r ,
SearchInput * si ,
char * method ,
CollectionRec * cr ) {
2014-06-21 20:57:51 +04:00
//char *pend = p + plen;
// store custom header into buf now
//for ( long i = 0 ; i < hlen && p+10 < pend ; i++ ) {
for ( long i = 0 ; i < hlen ; i + + ) {
if ( head [ i ] ! = ' % ' ) {
// *p++ = head[i];
sb . safeMemcpy ( ( char * ) & head [ i ] , 1 ) ;
continue ;
}
if ( i + 1 > = hlen ) {
// *p++ = head[i];
sb . safeMemcpy ( ( char * ) & head [ i ] , 1 ) ;
continue ;
}
if ( head [ i + 1 ] = = ' S ' ) {
// now we got the %S, insert "spiders are [on/off]"
bool spidersOn = true ;
if ( ! g_conf . m_spideringEnabled ) spidersOn = false ;
if ( ! cr - > m_spideringEnabled ) spidersOn = false ;
if ( spidersOn )
sb . safePrintf ( " Spiders are on " ) ;
else
sb . safePrintf ( " Spiders are off " ) ;
// skip over %S
i + = 1 ;
continue ;
}
if ( head [ i + 1 ] = = ' q ' ) {
// now we got the %q, insert the query
char * p = ( char * ) sb . getBuf ( ) ;
char * pend = ( char * ) sb . getBufEnd ( ) ;
long eqlen = dequote ( p , pend , q , qlen ) ;
//p += eqlen;
sb . incrementLength ( eqlen ) ;
// skip over %q
i + = 1 ;
continue ;
}
if ( head [ i + 1 ] = = ' c ' ) {
// now we got the %q, insert the query
if ( cr ) sb . safeStrcpy ( cr - > m_coll ) ;
// skip over %c
i + = 1 ;
continue ;
}
if ( head [ i + 1 ] = = ' w ' & &
head [ i + 2 ] = = ' h ' & &
head [ i + 3 ] = = ' e ' & &
head [ i + 4 ] = = ' r ' & &
head [ i + 5 ] = = ' e ' ) {
// insert the location
long whereLen ;
char * where = r - > getString ( " where " , & whereLen ) ;
// get it from cookie as well!
if ( ! where )
where = r - > getStringFromCookie ( " where " ,
& whereLen ) ;
// fix for getStringFromCookie
if ( where & & ! where [ 0 ] ) where = NULL ;
// skip over the %where
i + = 5 ;
// if empty, base it on IP
if ( ! where ) {
double lat ;
double lon ;
double radius ;
char * city , * state , * ctry ;
// use this by default
long ip = r - > m_userIP ;
// ip for testing?
long iplen ;
char * ips = r - > getString ( " uip " , & iplen ) ;
if ( ips ) ip = atoip ( ips ) ;
// returns true if found in db
char buf [ 128 ] ;
getIPLocation ( ip ,
& lat ,
& lon ,
& radius ,
& city ,
& state ,
& ctry ,
buf ,
128 ) ;
if ( city & & state )
sb . safePrintf ( " %s, %s " , city , state ) ;
}
else
sb . dequote ( where , whereLen ) ;
continue ;
}
if ( head [ i + 1 ] = = ' w ' & &
head [ i + 2 ] = = ' h ' & &
head [ i + 3 ] = = ' e ' & &
head [ i + 4 ] = = ' n ' ) {
// insert the location
long whenLen ;
char * when = r - > getString ( " when " , & whenLen ) ;
// skip over the %when
i + = 4 ;
if ( ! when ) continue ;
sb . dequote ( when , whenLen ) ;
continue ;
}
// %sortby
if ( head [ i + 1 ] = = ' s ' & &
head [ i + 2 ] = = ' o ' & &
head [ i + 3 ] = = ' r ' & &
head [ i + 4 ] = = ' t ' & &
head [ i + 5 ] = = ' b ' & &
head [ i + 6 ] = = ' y ' ) {
// insert the location
long sortBy = r - > getLong ( " sortby " , 1 ) ;
// print the radio buttons
char * cs [ 5 ] ;
cs [ 0 ] = " " ;
cs [ 1 ] = " " ;
cs [ 2 ] = " " ;
cs [ 3 ] = " " ;
cs [ 4 ] = " " ;
if ( sortBy > = 1 & & sortBy < = 4 )
cs [ sortBy ] = " checked " ;
sb . safePrintf (
" <input type=radio name=sortby value=1%s>date "
" <input type=radio name=sortby value=2%s>distance "
" <input type=radio name=sortby value=3%s>relevancy "
" <input type=radio name=sortby value=4%s>popularity " ,
cs [ 1 ] , cs [ 2 ] , cs [ 3 ] , cs [ 4 ] ) ;
// skip over the %sortby
i + = 6 ;
continue ;
}
if ( head [ i + 1 ] = = ' e ' ) {
// now we got the %e, insert the query
char * p = ( char * ) sb . getBuf ( ) ;
long plen = sb . getAvail ( ) ;
long eqlen = urlEncode ( p , plen , q , qlen ) ;
//p += eqlen;
sb . incrementLength ( eqlen ) ;
// skip over %e
i + = 1 ;
continue ;
}
if ( head [ i + 1 ] = = ' N ' ) {
// now we got the %N, insert the global doc count
//long long c=g_checksumdb.getRdb()->getNumGlobalRecs();
//now each host tells us how many docs it has in itsping
long long c = g_hostdb . getNumGlobalRecs ( ) ;
c + = g_conf . m_docCountAdjustment ;
// never allow to go negative
if ( c < 0 ) c = 0 ;
//p+=ulltoa(p,c);
char * p = ( char * ) sb . getBuf ( ) ;
sb . reserve2x ( 16 ) ;
long len = ulltoa ( p , c ) ;
sb . incrementLength ( len ) ;
// skip over %N
i + = 1 ;
continue ;
}
2014-06-22 08:07:38 +04:00
/*
2014-06-21 20:57:51 +04:00
if ( head [ i + 1 ] = = ' E ' ) {
// now each host tells us how many docs it has in its
// ping request
long long c = g_hostdb . getNumGlobalEvents ( ) ;
char * p = ( char * ) sb . getBuf ( ) ;
sb . reserve2x ( 16 ) ;
long len = ulltoa ( p , c ) ;
sb . incrementLength ( len ) ;
// skip over %E
i + = 1 ;
continue ;
}
2014-06-22 08:07:38 +04:00
*/
2014-06-21 20:57:51 +04:00
if ( head [ i + 1 ] = = ' n ' ) {
// now we got the %n, insert the collection doc count
//p+=ulltoa(p,docsInColl);
char * p = ( char * ) sb . getBuf ( ) ;
sb . reserve2x ( 16 ) ;
long long docsInColl = 0 ;
if ( cr ) docsInColl = cr - > getNumDocsIndexed ( ) ;
long len = ulltoa ( p , docsInColl ) ;
sb . incrementLength ( len ) ;
// skip over %n
i + = 1 ;
continue ;
}
/*
if ( head [ i + 1 ] = = ' T ' ) {
// . print the final tail
// . only print admin link if we're local
//long user = g_pages.getUserType ( s , r );
//char *username = g_users.getUsername(r);
//char *pwd = r->getString ( "pwd" );
char * p = ( char * ) sb . getBuf ( ) ;
long plen = sb . getAvail ( ) ;
//p = g_pages.printTail ( p , p + plen , user , pwd );
char * n = g_pages . printTail ( p , p + plen ,
r - > isLocal ( ) ) ;
sb . incrementLength ( n - p ) ;
// skip over %T
i + = 1 ;
continue ;
}
*/
// print the drop down menu for selecting the # of reslts
if ( head [ i + 1 ] = = ' D ' ) {
// skip over %D
i + = 1 ;
// skip if not enough buffer
//if ( p + 1000 >= pend ) continue;
// # results
//long n = r->getLong("n",10);
//bool printedDropDown;
//p = printNumResultsDropDown(p,n,&printedDropDown);
//printNumResultsDropDown(sb,n,&printedDropDown);
continue ;
}
if ( head [ i + 1 ] = = ' H ' ) {
// . insert the secret key here, to stop seo bots
// . TODO: randomize its position to make parsing more
// difficult
// . this secret key is for submitting a new query
2014-06-22 08:40:25 +04:00
// long key;
// char kname[4];
// g_httpServer.getKey (&key,kname,NULL,0,time(NULL),0,
// 10);
//sprintf (p , "<input type=hidden name=%s value=%li>",
2014-06-21 20:57:51 +04:00
// kname,key);
//p += gbstrlen ( p );
2014-06-22 08:40:25 +04:00
// sb.safePrintf( "<input type=hidden name=%s "
//"value=%li>",
// kname,key);
2014-06-21 20:57:51 +04:00
//adds param for default screen size
//if(cr)
2014-06-22 08:40:25 +04:00
// sb.safePrintf("<input type=hidden "
//"id='screenWidth' name='ws' value=%li>",
//cr->m_screenWidth);
2014-06-21 20:57:51 +04:00
// insert collection name too
long collLen ;
char * coll = r - > getString ( " c " , & collLen ) ;
if ( collLen > 0 & & collLen < MAX_COLL_LEN ) {
//sprintf (p,"<input type=hidden name=c "
// "value=\"");
//p += gbstrlen ( p );
sb . safePrintf ( " <input type=hidden name=c "
" value= \" " ) ;
//memcpy ( p , coll , collLen );
//p += collLen;
sb . safeMemcpy ( coll , collLen ) ;
//sprintf ( p , "\">\n");
//p += gbstrlen ( p );
sb . safePrintf ( " \" > \n " ) ;
}
// pass this crap on so zak can do searches
2014-06-22 08:40:25 +04:00
//char *username = g_users.getUsername(r);
2014-06-21 20:57:51 +04:00
// this is null because not in the cookie and we are
// logged in
//char *pwd = r->getString ( "pwd" );
2014-06-22 08:40:25 +04:00
//sb.safePrintf("<input type=hidden name=pwd "
//"value=\"%s\">\n",
2014-06-21 20:57:51 +04:00
//pwd);
2014-06-22 08:40:25 +04:00
//sb.safePrintf("<input type=hidden name=username "
// "value=\"%s\">\n",username);
2014-06-21 20:57:51 +04:00
// skip over %H
i + = 1 ;
continue ;
}
// %t, print Top Directory section
if ( head [ i + 1 ] = = ' t ' ) {
i + = 1 ;
//p = printTopDirectory ( p, pend );
2014-07-06 09:20:15 +04:00
printTopDirectory ( sb , FORMAT_HTML ) ;
2014-06-21 20:57:51 +04:00
continue ;
}
2014-06-22 08:07:38 +04:00
// MDW
if ( head [ i + 1 ] = = ' F ' ) {
i + = 1 ;
//p = printTopDirectory ( p, pend );
if ( ! method ) method = " GET " ;
sb . safePrintf ( " <form method=%s action= \" /search \" "
" name= \" f \" > \n " , method ) ;
continue ;
}
if ( head [ i + 1 ] = = ' L ' ) {
i + = 1 ;
//p = printTopDirectory ( p, pend );
printLogo ( sb , si ) ;
continue ;
}
if ( head [ i + 1 ] = = ' f ' ) {
i + = 1 ;
//p = printTopDirectory ( p, pend );
printFamilyFilter ( sb , si - > m_familyFilter ) ;
continue ;
}
if ( head [ i + 1 ] = = ' R ' ) {
i + = 1 ;
//p = printTopDirectory ( p, pend );
printRadioButtons ( sb , si ) ;
continue ;
}
// MDW
2014-06-21 20:57:51 +04:00
// *p++ = head[i];
sb . safeMemcpy ( ( char * ) & head [ i ] , 1 ) ;
continue ;
}
//return p;
return true ;
}
2014-09-19 02:23:35 +04:00
bool printLeftColumnRocketAndTabs ( SafeBuf * sb ,
bool isSearchResultsPage ,
CollectionRec * cr ,
char * tabName ) {
2014-08-03 04:58:11 +04:00
class MenuItem {
public :
char * m_text ;
char * m_url ;
} ;
static MenuItem mi [ ] = {
{ " SEARCH " , " / " } ,
2014-09-19 02:23:35 +04:00
{ " IMAGES " , " /?searchtype=images " } ,
{ " PRODUCTS " , " /?searchtype=products " } ,
{ " ARTICLES " , " /?searchtype=articles " } ,
{ " DISCUSSIONS " , " /?searchtype=discussions " } ,
2014-08-03 04:58:11 +04:00
{ " DIRECTORY " , " /Top " } ,
{ " ADVANCED " , " /adv.html " } ,
{ " ADD URL " , " /addurl " } ,
2014-09-02 04:04:08 +04:00
{ " WIDGETS " , " /widgets.html " } ,
2014-08-31 20:51:21 +04:00
{ " SYNTAX " , " /syntax.html " } ,
2014-09-02 04:23:15 +04:00
{ " USERS " , " /users.html " } ,
2014-08-03 04:58:11 +04:00
{ " ABOUT " , " /about.html " } ,
2014-08-31 21:51:37 +04:00
{ " NEWS " , " /news.html " } ,
2014-08-31 20:51:21 +04:00
{ " FAQ " , " /faq.html " } ,
{ " API " , " /api.html " }
2014-08-03 04:58:11 +04:00
} ;
2014-09-07 08:34:36 +04:00
char * coll = " " ;
if ( cr ) coll = cr - > m_coll ;
2014-08-03 04:58:11 +04:00
//
// first the nav column
//
2014-09-19 02:23:35 +04:00
sb - > safePrintf (
" <TD bgcolor=#f3c714 " // yellow/gold
2014-08-03 04:58:11 +04:00
" valign=top "
2014-08-26 08:38:52 +04:00
" style= \" width:210px; "
2014-08-03 04:58:11 +04:00
" border-right:3px solid blue; "
" \" > "
" <br> "
2014-08-03 21:42:45 +04:00
" <center> "
2014-09-07 08:34:36 +04:00
" <a href=/?c=%s> "
2014-08-03 21:42:45 +04:00
" <div style= \" "
" background-color:white; "
" padding:10px; "
" border-radius:100px; "
" border-color:blue; "
" border-width:3px; "
" border-style:solid; "
" width:100px; "
" height:100px; "
" \" > "
2014-09-07 08:34:36 +04:00
, coll
2014-09-02 00:31:13 +04:00
) ;
2014-08-03 21:42:45 +04:00
2014-09-02 00:31:13 +04:00
if ( strcmp ( tabName , " appliance " ) = = 0 )
sb - > safePrintf ( " <img style=margin-top:21px; width=90 "
" height=57 src=/computer2.png> " ) ;
else
sb - > safePrintf ( " <br style=line-height:10px;> "
" <img width=54 height=79 src=/rocket.jpg> "
) ;
2014-08-03 21:42:45 +04:00
2014-09-02 00:31:13 +04:00
sb - > safePrintf ( " </div> "
" </a> "
" </center> "
2014-08-03 21:42:45 +04:00
2014-09-02 00:31:13 +04:00
" <br> "
" <br> "
2014-08-03 04:58:11 +04:00
) ;
long n = sizeof ( mi ) / sizeof ( MenuItem ) ;
2014-09-02 00:31:13 +04:00
2014-08-03 04:58:11 +04:00
for ( long i = 0 ; i < n ; i + + ) {
2014-09-19 02:23:35 +04:00
if ( isSearchResultsPage & & i > = 5 ) break ;
char delim = ' ? ' ;
if ( strstr ( mi [ i ] . m_url , " ? " ) ) delim = ' & ' ;
2014-08-31 21:11:16 +04:00
sb - > safePrintf (
2014-09-19 02:23:35 +04:00
" <a href=%s%cc=%s> "
2014-08-03 04:58:11 +04:00
" <div style= \" "
" padding:5px; "
" position:relative; "
" text-align:right; "
" border-width:3px; "
" border-right-width:0px; "
" border-style:solid; "
" margin-left:10px; "
" border-top-left-radius:10px; "
" border-bottom-left-radius:10px; "
" font-size:14px; "
" x-overflow:; "
, mi [ i ] . m_url
2014-09-19 02:23:35 +04:00
, delim
2014-09-02 00:31:13 +04:00
, coll
2014-08-03 04:58:11 +04:00
) ;
2014-08-31 21:51:37 +04:00
//if ( i == pageNum )
bool matched = false ;
if ( strcasecmp ( mi [ i ] . m_text , tabName ) = = 0 )
matched = true ;
if ( matched )
2014-08-31 21:11:16 +04:00
sb - > safePrintf (
2014-08-03 09:19:15 +04:00
" border-color:blue; "
" color:black; "
" background-color:white; \" " ) ;
2014-08-03 04:58:11 +04:00
else
2014-08-31 21:11:16 +04:00
sb - > safePrintf ( " border-color:white; "
2014-08-03 09:19:15 +04:00
" color:white; "
" background-color:blue; \" "
" onmouseover= \" "
" this.style.backgroundColor='lightblue'; "
" this.style.color='black'; \" "
" onmouseout= \" "
" this.style.backgroundColor='blue'; "
" this.style.color='white'; \" "
) ;
2014-08-03 04:58:11 +04:00
2014-08-31 21:11:16 +04:00
sb - > safePrintf ( " > "
2014-08-03 04:58:11 +04:00
// make button wider
2014-08-03 09:19:15 +04:00
" <nobr> "
2014-08-03 04:58:11 +04:00
" "
2014-08-03 09:19:15 +04:00
" <b>%s</b> </nobr> "
2014-08-03 04:58:11 +04:00
, mi [ i ] . m_text
) ;
//
// begin hack: white out the blue border line!!
//
2014-08-31 21:51:37 +04:00
if ( matched )
2014-08-31 21:11:16 +04:00
sb - > safePrintf (
2014-08-03 04:58:11 +04:00
" <div style=padding:5px;top:0; "
" background-color:white; "
" display:inline-block; "
" position:absolute;> "
" "
" </div> "
) ;
// end hack
2014-08-31 21:11:16 +04:00
sb - > safePrintf (
2014-08-03 04:58:11 +04:00
" </div> "
" </a> "
" <br> "
) ;
}
2014-09-19 02:23:35 +04:00
2014-08-03 04:58:11 +04:00
// admin link
2014-09-19 02:23:35 +04:00
if ( isSearchResultsPage ) return true ;
2014-08-31 21:11:16 +04:00
sb - > safePrintf (
2014-09-02 00:31:13 +04:00
" <a href=/admin/settings?c=%s> "
2014-08-03 09:19:15 +04:00
" <div style= \" background-color:green; "
2014-08-31 07:49:11 +04:00
// for try it out bubble:
//"position:relative;"
2014-08-03 04:58:11 +04:00
" padding:5px; "
" text-align:right; "
" border-width:3px; "
" border-right-width:0px; "
" border-style:solid; "
" margin-left:10px; "
" border-color:white; "
" border-top-left-radius:10px; "
" border-bottom-left-radius:10px; "
" font-size:14px; "
" color:white; "
" cursor:hand; "
" cursor:pointer; \" "
" onmouseover= \" "
" this.style.backgroundColor='lightgreen'; "
" this.style.color='black'; \" "
" onmouseout= \" "
" this.style.backgroundColor='green'; "
" this.style.color='white'; \" "
" > "
2014-08-31 07:49:11 +04:00
/*
// try it out bubble div
" <div "
" onmouseover= \" "
" this.style.box-shadow='10px 10px 5px #888888'; "
" \" "
" onmouseout= \" "
" this.style.box-shadow=''; "
" \" "
" style= \" "
" vertical-align:middle; "
" text-align:left; "
" cursor:pointer; "
" cursor:hand; "
//"border-color:black;"
//"border-style:solid;"
//"border-width:2px;"
" padding:3px; "
//"width:30px;"
//"height:20px;"
//"margin-top:-20px;"
" margin-left:-120px; "
" position:absolute; "
//"top:-20px;"
//"left:10px;"
" display:inline-block; "
" \" "
" > "
" <b style=font-size:11px;> "
" Click for demo "
" </b> "
" </div> "
*/
// end try it out bubble div
2014-08-03 04:58:11 +04:00
" <b>ADMIN</b> "
" </div> "
" </a> "
" <br> "
2014-09-19 02:23:35 +04:00
" </TD> "
2014-09-02 00:31:13 +04:00
, coll
2014-08-03 04:58:11 +04:00
) ;
2014-09-19 02:23:35 +04:00
return true ;
}
bool printFrontPageShell ( SafeBuf * sb , char * tabName , CollectionRec * cr ) {
sb - > safePrintf ( " <html> \n " ) ;
sb - > safePrintf ( " <head> \n " ) ;
//sb->safePrintf("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf8\">");
sb - > safePrintf ( " <meta name= \" description \" content= \" A powerful, new search engine that does real-time indexing! \" > \n " ) ;
sb - > safePrintf ( " <meta name= \" keywords \" content= \" search, search engine, search engines, search the web, fresh index, green search engine, green search, clean search engine, clean search \" > \n " ) ;
//char *title = "An Alternative Open Source Search Engine";
char * title = " An Alternative Open Source Search Engine " ;
if ( strcasecmp ( tabName , " search " ) ) title = tabName ;
// if ( pageNum == 1 ) title = "Directory";
// if ( pageNum == 2 ) title = "Advanced";
// if ( pageNum == 3 ) title = "Add Url";
// if ( pageNum == 4 ) title = "About";
// if ( pageNum == 5 ) title = "Help";
// if ( pageNum == 6 ) title = "API";
sb - > safePrintf ( " <title>Gigablast - %s</title> \n " , title ) ;
sb - > safePrintf ( " <style><!-- \n " ) ;
sb - > safePrintf ( " body { \n " ) ;
sb - > safePrintf ( " font-family:Arial, Helvetica, sans-serif; \n " ) ;
sb - > safePrintf ( " color: #000000; \n " ) ;
sb - > safePrintf ( " font-size: 12px; \n " ) ;
sb - > safePrintf ( " margin: 0px 0px; \n " ) ;
sb - > safePrintf ( " letter-spacing: 0.04em; \n " ) ;
sb - > safePrintf ( " } \n " ) ;
sb - > safePrintf ( " a {text-decoration:none;} \n " ) ;
//sb->safePrintf("a:link {color:#00c}\n");
//sb->safePrintf("a:visited {color:#551a8b}\n");
//sb->safePrintf("a:active {color:#f00}\n");
sb - > safePrintf ( " .bold {font-weight: bold;} \n " ) ;
sb - > safePrintf ( " .bluetable {background:#d1e1ff;margin-bottom:15px;font-size:12px;} \n " ) ;
sb - > safePrintf ( " .url {color:#008000;} \n " ) ;
sb - > safePrintf ( " .cached, .cached a {font-size: 10px;color: #666666; \n " ) ;
sb - > safePrintf ( " } \n " ) ;
sb - > safePrintf ( " table { \n " ) ;
sb - > safePrintf ( " font-family:Arial, Helvetica, sans-serif; \n " ) ;
sb - > safePrintf ( " color: #000000; \n " ) ;
sb - > safePrintf ( " font-size: 12px; \n " ) ;
sb - > safePrintf ( " } \n " ) ;
sb - > safePrintf ( " .directory {font-size: 16px;} \n "
" .nav {font-size:20px;align:right;} \n "
) ;
sb - > safePrintf ( " --> \n " ) ;
sb - > safePrintf ( " </style> \n " ) ;
sb - > safePrintf ( " \n " ) ;
sb - > safePrintf ( " </head> \n " ) ;
sb - > safePrintf ( " <script> \n " ) ;
sb - > safePrintf ( " <!-- \n " ) ;
sb - > safePrintf ( " function x(){document.f.q.focus();} \n " ) ;
sb - > safePrintf ( " // --></script> \n " ) ;
sb - > safePrintf ( " <body onload= \" x() \" > \n " ) ;
//sb->safePrintf("<body>\n");
//g_proxy.insertLoginBarDirective ( &sb );
//
// DIVIDE INTO TWO PANES, LEFT COLUMN and MAIN COLUMN
//
sb - > safePrintf ( " <TABLE border=0 height=100%% cellspacing=0 "
" cellpadding=0> "
" \n <TR> \n " ) ;
// . also prints <TD>...</TD>
// . false = isSearchResultsPage?
printLeftColumnRocketAndTabs ( sb , false , cr , tabName ) ;
2014-08-03 04:58:11 +04:00
//
// now the MAIN column
//
2014-09-19 02:23:35 +04:00
sb - > safePrintf ( " \n <TD valign=top style=padding-left:30px;> \n " ) ;
2014-08-03 04:58:11 +04:00
2014-08-31 21:11:16 +04:00
sb - > safePrintf ( " <br><br> " ) ;
2014-08-04 04:25:22 +04:00
2014-08-31 21:11:16 +04:00
sb - > safePrintf ( " <a href=/><img border=0 width=470 "
2014-08-03 21:42:45 +04:00
" height=44 src=/gigablast.jpg></a> \n " ) ;
2014-08-31 21:11:16 +04:00
// sb->safePrintf("<br>"
2014-08-04 04:25:22 +04:00
// "<img border=0 width=470 "
// "height=15 src=/bar.jpg>\n");
2014-08-03 21:42:45 +04:00
2014-08-03 04:58:11 +04:00
return true ;
}
bool printWebHomePage ( SafeBuf & sb , HttpRequest * r , TcpSocket * sock ) {
SearchInput si ;
si . set ( sock , r ) ;
// if there's a ton of sites use the post method otherwise
// they won't fit into the http request, the browser will reject
// sending such a large request with "GET"
char * method = " GET " ;
if ( si . m_sites & & gbstrlen ( si . m_sites ) > 800 ) method = " POST " ;
// if the provided their own
CollectionRec * cr = g_collectiondb . getRec ( r ) ;
if ( cr & & cr - > m_htmlRoot . length ( ) ) {
return expandHtml ( sb ,
cr - > m_htmlRoot . getBufStart ( ) ,
cr - > m_htmlRoot . length ( ) ,
NULL ,
0 ,
r ,
& si ,
//TcpSocket *s ,
method , // "GET" or "POST"
cr ) ; //CollectionRec *cr ) {
}
2014-09-19 02:23:35 +04:00
// . search special types
// . defaults to web which is "search"
// . can be like "images" "products" "articles"
char * searchType = r - > getString ( " searchtype " , NULL , " search " , NULL ) ;
log ( " searchtype=%s " , searchType ) ;
// pass searchType in as tabName
printFrontPageShell ( & sb , searchType , cr ) ;
2014-08-03 04:58:11 +04:00
//sb.safePrintf("<br><br>\n");
// try to avoid using https for images. it is like 10ms slower.
// if ( g_conf.m_isMattWells )
// sb.safePrintf("<center><a href=/><img border=0 width=500 "
// "height=122 src=http://www.gigablast.com/logo-"
// "med.jpg></a>\n");
// else
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " <br><br> \n " ) ;
sb . safePrintf ( " <br><br><br> \n " ) ;
2014-08-03 04:58:11 +04:00
/*
2014-01-10 07:00:21 +04:00
sb . safePrintf ( " <b>web</b> " ) ;
if ( g_conf . m_isMattWells )
sb . safePrintf ( " <a href=http://www.gigablast.com/seo>seo</a> "
" "
) ;
sb . safePrintf ( " <a href= \" /Top \" >directory</a> "
2013-11-16 02:01:56 +04:00
" \n " ) ;
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " <a href=/adv.html>advanced search</a> " ) ;
sb . safePrintf ( " " ) ;
sb . safePrintf ( " <a href=/addurl title= \" Instantly add your url to "
" Gigablast's index \" >add url</a> " ) ;
sb . safePrintf ( " \n " ) ;
sb . safePrintf ( " <br><br> \n " ) ;
2014-08-03 04:58:11 +04:00
*/
2013-08-03 00:12:24 +04:00
// submit to https now
2014-06-22 08:07:38 +04:00
sb . safePrintf ( " <form method=%s "
" action=/search name=f> \n " , method ) ;
2014-01-19 00:09:33 +04:00
if ( cr )
sb . safePrintf ( " <input type=hidden name=c value= \" %s \" > " ,
cr - > m_coll ) ;
2014-08-03 04:58:11 +04:00
// put search box in a box
2014-08-04 04:25:22 +04:00
sb . safePrintf ( " <div style= "
" background-color:#fcc714; "
" border-style:solid; "
" border-width:3px; "
" border-color:blue; "
//"background-color:blue;"
" padding:20px; "
" border-radius:20px; "
2014-08-03 04:58:11 +04:00
" > " ) ;
sb . safePrintf ( " <input name=q type=text "
" style= \" "
//"width:%lipx;"
" height:26px; "
" padding:0px; "
" font-weight:bold; "
" padding-left:5px; "
//"border-radius:10px;"
" margin:0px; "
" border:1px inset lightgray; "
" background-color:#ffffff; "
" font-size:18px; "
" \" "
2014-08-04 18:17:13 +04:00
" size=40 value= \" \" > "
2014-08-03 04:58:11 +04:00
2014-08-04 18:17:13 +04:00
//"<input type=\"submit\" value=\"Search\">"
" <div onclick=document.f.submit(); "
2014-08-05 07:27:51 +04:00
" onmouseover= \" "
" this.style.backgroundColor='lightgreen'; "
" this.style.color='black'; \" "
" onmouseout= \" "
" this.style.backgroundColor='green'; "
" this.style.color='white'; \" "
2014-08-04 18:17:13 +04:00
" style=border-radius:28px; "
" cursor:pointer; "
" cursor:hand; "
" border-color:white; "
" border-style:solid; "
" border-width:3px; "
" padding:12px; "
" width:20px; "
" height:20px; "
" display:inline-block; "
" background-color:green;color:white;> "
2014-08-05 07:27:51 +04:00
" <b style=margin-left:-5px;font-size:18px; "
" >GO</b> "
2014-08-04 18:17:13 +04:00
" </div> "
" \n "
) ;
2014-08-03 04:58:11 +04:00
sb . safePrintf ( " </div> \n " ) ;
2014-01-19 00:09:33 +04:00
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " \n " ) ;
sb . safePrintf ( " </form> \n " ) ;
sb . safePrintf ( " <br> \n " ) ;
sb . safePrintf ( " \n " ) ;
2014-04-06 05:09:04 +04:00
2014-04-10 11:31:41 +04:00
2014-05-10 23:32:01 +04:00
if ( cr & & cr - > m_coll ) { // && strcmp(cr->m_coll,"main") ) {
2014-04-10 11:31:41 +04:00
sb . safePrintf ( " <center> "
" Searching the <b>%s</b> collection. "
" </center> " ,
cr - > m_coll ) ;
sb . safePrintf ( " <br> \n " ) ;
sb . safePrintf ( " \n " ) ;
}
2014-04-06 05:09:04 +04:00
// print any red boxes we might need to
if ( printRedBox2 ( & sb , true ) )
sb . safePrintf ( " <br> \n " ) ;
2014-08-03 04:58:11 +04:00
/*
do not show table for open source installs
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " <table cellpadding=3> \n " ) ;
sb . safePrintf ( " \n " ) ;
2013-12-09 06:45:49 +04:00
char * root = " " ;
if ( g_conf . m_isMattWells )
root = " http://www.gigablast.com " ;
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " <tr valign=top> \n " ) ;
2013-12-09 06:45:49 +04:00
//sb.safePrintf("<td align=center><div style=width:50px;height:50px;display:inline-block;background-color:red;></div></td>\n");
2014-06-19 17:49:19 +04:00
sb . safePrintf ( " <td align=center><img height=71px width=50px "
2013-12-09 06:45:49 +04:00
" src=%s/opensource.png></td> \n "
, root ) ;
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " <td><font size=+1><b>Open Source!</b> "
" </font><br> \n " ) ;
2014-08-31 20:51:21 +04:00
sb . brify2 ( " Gigablast is now available as an <a href=https://github.com/gigablast/open-source-search-engine>open source search engine</a> on github.com. Download it today. Finally a robust, scalable search solution in C/C++ that has been in development and used commercially since 2000. <a href=http://www.gigablast.com/faq.html#features>Features</a>. Limited support available for free. "
2013-08-03 00:12:24 +04:00
, 80 ) ;
sb . safePrintf ( " <br><br> " ) ;
sb . safePrintf ( " </td></tr> \n " ) ;
2014-06-19 16:41:52 +04:00
// donate with paypal
sb . safePrintf ( " <tr valign=top> \n " ) ;
2014-06-20 04:56:44 +04:00
sb . safePrintf ( " <td align=center style=padding-right:20px;><center> "
2014-06-19 16:41:52 +04:00
// BEGIN PAYPAL DONATE BUTTON
" <form action= \" https://www.paypal.com/cgi-bin/webscr \" method= \" post \" target= \" _top \" > "
" <input type= \" hidden \" name= \" cmd \" value= \" _donations \" > "
" <input type= \" hidden \" name= \" business \" value= \" 2SFSFLUY3KS9Y \" > "
" <input type= \" hidden \" name= \" lc \" value= \" US \" > "
" <input type= \" hidden \" name= \" item_name \" value= \" Gigablast, Inc. \" > "
" <input type= \" hidden \" name= \" currency_code \" value= \" USD \" > "
" <input type= \" hidden \" name= \" bn \" value= \" PP-DonationsBF:btn_donateCC_LG.gif:NonHosted \" > "
" <input type= \" image \" src= \" https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif \" border= \" 0 \" name= \" submit \" alt= \" PayPal - The safer, easier way to pay online! \" height=47 width=147> "
" <img alt= \" \" border= \" 0 \" src= \" https://www.paypalobjects.com/en_US/i/scr/pixel.gif \" width= \" 1 \" height= \" 1 \" > "
" </form> "
// END PAYPAY BUTTON
" </center></div></center></td> \n "
) ;
sb . safePrintf ( " <td><font size=+1><b> "
" Support Gigablast "
" </b></font><br> \n "
) ;
sb . brify2 (
" Donations of $100 or more receive a black "
" Gigablast T-shirt "
" with embroidered logo while quantities last. "
" State your address and size "
" in an <a href=/contact.html>email</a>. "
2014-06-19 17:49:19 +04:00
" PayPal accepted. "
2014-06-19 16:41:52 +04:00
" Help Gigablast continue "
" to grow and add new features. "
, 80
) ;
sb . safePrintf ( " </td></tr> \n " ) ;
2014-08-03 04:58:11 +04:00
*/
2014-06-19 16:41:52 +04:00
2014-01-10 07:00:21 +04:00
/*
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " <tr valign=top> \n " ) ;
2013-11-16 01:47:05 +04:00
// 204x143
2013-11-16 02:01:56 +04:00
sb . safePrintf ( " <td><img height=52px width=75px "
" src=%s/eventguru.png></td> \n "
, root ) ;
2013-11-16 01:47:05 +04:00
sb . safePrintf ( " <td><font size=+1><b>Event Guru Returns</b></font><br> \n " ) ;
sb . brify2 ( " <a href=http://www.eventguru.com/>Event Guru</a> datamines events from the web. It identifies events on a web page, or even plain text, using the same rules of deduction used by the human mind. It also has Facebook integration and lots of other cool things. " , 80 ) ;
sb . safePrintf ( " <br><br></td></tr> \n " ) ;
sb . safePrintf ( " \n " ) ;
sb . safePrintf ( " \n " ) ;
2014-01-10 07:00:21 +04:00
*/
2013-11-16 01:47:05 +04:00
2013-08-03 00:12:24 +04:00
2013-11-16 01:47:05 +04:00
/*
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " <tr valign=top> \n " ) ;
2013-11-16 01:47:05 +04:00
sb . safePrintf ( " <td align=center><div style=width:50px;height:50px;display:inline-block;background-color:green;></div></td> \n " ) ;
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " <td><font size=+1><b>The Green Search Engine</b></font><br> \n " ) ;
sb . brify2 ( " Gigablast is the only clean-powered web search engine. 90% of its power usage comes from wind energy. Astoundingly, Gigablast is one of ONLY four search engines in the United States indexing over a billion pages. " , 80 ) ;
sb . safePrintf ( " <br><br></td></tr> \n " ) ;
sb . safePrintf ( " \n " ) ;
sb . safePrintf ( " \n " ) ;
2013-11-16 01:47:05 +04:00
*/
2013-08-03 00:12:24 +04:00
2014-01-10 07:00:21 +04:00
/*
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " <tr valign=top> \n " ) ;
2013-11-16 02:01:56 +04:00
sb . safePrintf ( " <td align=center><img src=%s/gears.png "
" height=50 width=50></div></td> \n "
, root ) ;
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " <td><font size=+1><b>The Transparent Search Engine</b></font><br> \n " ) ;
sb . brify2 ( " Gigablast is the first truly transparent search engine. It tells you exactly why the search results are ranked the way they are. There is nothing left to the imagination. " , 85 ) ;
sb . safePrintf ( " <br><br> " ) ;
sb . safePrintf ( " </td></tr> \n " ) ;
sb . safePrintf ( " \n " ) ;
sb . safePrintf ( " \n " ) ;
2014-01-10 07:00:21 +04:00
*/
2013-08-03 00:12:24 +04:00
2014-08-03 04:58:11 +04:00
/*
2014-01-10 07:00:21 +04:00
if ( g_conf . m_isMattWells ) {
sb . safePrintf ( " <tr valign=top> \n " ) ;
sb . safePrintf ( " <td align=center><center><img src=%s/dollargear.png "
" height=50 width=50></center></div></center></td> \n "
, root ) ;
sb . safePrintf ( " <td><font size=+1><b>The SEO Search Engine</b></font><br> \n " ) ;
sb . brify2 ( " When it comes to search-engine based SEO, Gigablast is the place to be. With a frothy set of unique and effective <a href=http://www.gigablast.com/seo>SEO tools</a>, you will find all you need to execute a simple yet effective SEO strategy. Stop the guesswork, and let a search engine tell you how to SEO it. " , 85 ) ;
sb . safePrintf ( " </td></tr> \n " ) ;
}
2014-08-03 04:58:11 +04:00
*/
2013-08-03 00:12:24 +04:00
/*
sb . safePrintf ( " <tr valign=top> \n " ) ;
sb . safePrintf ( " <td><div style=width:50px;height:50px;display:inline-block;background-color:ff3030;></td> \n " ) ;
sb . safePrintf ( " <td><font size=+1><b>Xml Search Feed</b></font><br> \n " ) ;
sb . brify2 ( " Utilize Gigablast's results on your own site or product by connecting with Gigablast's <a href=/searchfeed.html>XML search feed</a>. It's now simpler than ever to setup and use. You can also add the web pages you want into the index in near real-time. " , 85 ) ;
sb . safePrintf ( " </td></tr> \n " ) ;
*/
/*
sb . safePrintf ( " <tr valign=top> \n " ) ;
sb . safePrintf ( " <td><div style=width:50px;height:50px;display:inline-block;background-color:black;></td> \n " ) ;
sb . safePrintf ( " <td><font size=+1><b>The Private Search Engine</b> "
" </font><br> \n " ) ;
sb . brify2 ( " Gigablast does not allow the NSA or any third party "
" to spy on the queries your IP address is doing, "
" unlike "
" <a href=http://www.guardian.co.uk/world/2013/jun/ "
" 06/us-tech-giants-nsa-data> "
" other large search engines</a>. "
" Gigablast is the only "
" <a href=/privacy.html>truly private search engine</a> "
" in the United States. "
//" Everyone else has fundamental "
//"gaps in their "
//"security as explained by the above link."
//"Tell Congress "
//"to <a href=https://optin.stopwatching.us/>stop spying "
//"on you</a>."
, 85 ) ;
sb . safePrintf ( " </td></tr> \n " ) ;
*/
/*
sb . safePrintf ( " <tr valign=top> \n " ) ;
sb . safePrintf ( " <td><div style=width:50px;height:50px;display:inline-block;background-color:black;></td> \n " ) ;
sb . safePrintf ( " <td><font size=+1><b>No Tax Dodging</b></font><br> \n " ) ;
sb . brify2 ( " Gigablast pays its taxes when it makes a profit. "
" Google and Bing <a href=http://www.bloomberg.com/news/ "
" 2010-10-21/google-2-4-rate-shows-how-60-billion-u-s- "
" revenue-lost-to-tax-loopholes.html>do not</a>. They "
" stash their profits in "
" offshore tax havens to avoid paying taxes. "
//"The end result is that taxes are higher for you. "
" You may think Google and Bing are free to use, but in "
" reality, <u>you</u> pay for it in increased taxes. "
, 85 ) ;
sb . safePrintf ( " </td></tr> \n " ) ;
*/
2014-09-11 02:05:33 +04:00
/*
2014-08-31 07:49:11 +04:00
//
// begin new stuff
//
// gradients
sb . safePrintf ( " <style><!-- \n " ) ;
sb . safePrintf ( " .grad { " ) ;
sb . safePrintf ( " background: rgb(190,201,247); " ) ;
sb . safePrintf ( " background: url(data:image/svg+xml;base64,PD94bWwgdmVyc2lvbj0iMS4wIiA/Pgo8c3ZnIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgd2lkdGg9IjEwMCUiIGhlaWdodD0iMTAwJSIgdmlld0JveD0iMCAwIDEgMSIgcHJlc2VydmVBc3BlY3RSYXRpbz0ibm9uZSI+CiAgPGxpbmVhckdyYWRpZW50IGlkPSJncmFkLXVjZ2ctZ2VuZXJhdGVkIiBncmFkaWVudFVuaXRzPSJ1c2VyU3BhY2VPblVzZSIgeDE9IjAlIiB5MT0iMCUiIHgyPSIxMDAlIiB5Mj0iMTAwJSI+CiAgICA8c3RvcCBvZmZzZXQ9IjAlIiBzdG9wLWNvbG9yPSIjYmVjOWY3IiBzdG9wLW9wYWNpdHk9IjEiLz4KICAgIDxzdG9wIG9mZnNldD0iMTAwJSIgc3RvcC1jb2xvcj0iIzBiM2NlZCIgc3RvcC1vcGFjaXR5PSIxIi8+CiAgPC9saW5lYXJHcmFkaWVudD4KICA8cmVjdCB4PSIwIiB5PSIwIiB3aWR0aD0iMSIgaGVpZ2h0PSIxIiBmaWxsPSJ1cmwoI2dyYWQtdWNnZy1nZW5lcmF0ZWQpIiAvPgo8L3N2Zz4=); " ) ;
sb . safePrintf ( " background: -moz-linear-gradient(-45deg, rgba(190,201,247,1) 0%%, rgba(11,60,237,1) 100%%); " ) ;
sb . safePrintf ( " background: -webkit-gradient(linear, left top, right bottom, color-stop(0%%,rgba(190,201,247,1)), color-stop(100%%,rgba(11,60,237,1))); " ) ;
sb . safePrintf ( " background: -webkit-linear-gradient(-45deg, rgba(190,201,247,1) 0%%,rgba(11,60,237,1) 100%%); " ) ;
sb . safePrintf ( " background: -o-linear-gradient(-45deg, rgba(190,201,247,1) 0%%,rgba(11,60,237,1) 100%%); " ) ;
sb . safePrintf ( " background: -ms-linear-gradient(-45deg, rgba(190,201,247,1) 0%%,rgba(11,60,237,1) 100%%); " ) ;
sb . safePrintf ( " background: linear-gradient(135deg, rgba(190,201,247,1) 0%%,rgba(11,60,237,1) 100%%); " ) ;
sb . safePrintf ( " filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#bec9f7', endColorstr='#0b3ced',GradientType=1 ); " ) ;
sb . safePrintf ( " } " ) ;
sb . safePrintf ( " --> " ) ;
sb . safePrintf ( " </style> \n " ) ;
sb . safePrintf ( " <br> " ) ;
2014-09-04 21:49:13 +04:00
2014-09-07 08:05:11 +04:00
2014-08-31 07:49:11 +04:00
sb . safePrintf ( " <div class=grad style= \" border-radius:200px;border-color:blue;border-style:solid;border-width:3px;padding:12px;width:320px;height:320px;display:inline-block;z-index:100;color:black;position:relative;background-color:lightgray; \" > " ) ;
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " <b> " ) ;
sb . safePrintf ( " <font style=font-size:18px;margin-left:80px;> " ) ;
sb . safePrintf ( " Build Your Own " ) ;
sb . safePrintf ( " </font> " ) ;
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " <font style=font-size:18px;margin-left:80px;> " ) ;
2014-09-01 02:35:53 +04:00
sb . safePrintf ( " Search Engine in the " ) ;
sb . safePrintf ( " </font> " ) ;
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " <font style=font-size:18px;margin-left:80px;> " ) ;
sb . safePrintf ( " Cloud " ) ;
2014-08-31 07:49:11 +04:00
sb . safePrintf ( " </font> " ) ;
sb . safePrintf ( " </b> " ) ;
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " <div style=margin-left:20px;width:270px;> " ) ;
2014-08-31 08:25:14 +04:00
sb . safePrintf ( " <a href=/admin/addcoll><img style=float:left;padding-right:15px; height=188px width=101px src=/robot3.png></a> " ) ;
2014-09-01 02:35:53 +04:00
//sb.safePrintf("<br>");
2014-09-02 04:04:08 +04:00
sb . safePrintf ( " <b>STEP 1.</b> <a href=/admin/addcoll?guide=1> "
" Click here to " ) ;
2014-08-31 07:49:11 +04:00
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " <b>name your engine</b></a>. " ) ;
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " <br> " ) ;
2014-09-02 04:04:08 +04:00
sb . safePrintf ( " <b>STEP 2.</b> <a href=/admin/settings?guide=1> "
" Click here to " ) ;
2014-08-31 07:49:11 +04:00
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " <b>add websites to index</b></a>. " ) ;
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " <br> " ) ;
2014-09-02 04:04:08 +04:00
sb . safePrintf ( " <b>STEP 3.</b> <a href=/widgets.html?guide=1> "
" Click here to " ) ;
2014-08-31 07:49:11 +04:00
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " <b>insert search box</b></a>. " ) ;
sb . safePrintf ( " </div> " ) ;
sb . safePrintf ( " </div> " ) ;
sb . safePrintf ( " <div class=grad style= \" border-radius:200px;border-color:blue;border-style:solid;border-width:3px;padding:12px;width:280px;height:280px;display:inline-block;z-index:105;color:black;margin-left:-50px;position:absolute;margin-top:50px;background-color:lightgray; \" > " ) ;
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " <br style=line-height:25px;> " ) ;
sb . safePrintf ( " <b> " ) ;
sb . safePrintf ( " <font style=font-size:18px;margin-left:40px;> " ) ;
sb . safePrintf ( " Web Search Appliance " ) ;
sb . safePrintf ( " </font> " ) ;
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " </b> " ) ;
sb . safePrintf ( " <div style=margin-left:20px;width:270px;> " ) ;
2014-08-31 08:25:14 +04:00
sb . safePrintf ( " <a href=http://www.gigablast.com/appliance.html><img style=float:left;padding-bottom:20px;padding-right:10px; height=81px width=121px src=/computer2.png></a> " ) ;
2014-08-31 07:49:11 +04:00
sb . safePrintf ( " Put the web in your closet. " ) ;
2014-08-31 08:25:14 +04:00
sb . safePrintf ( " Jump start your efforts with four 1U supermicro servers loaded with the top 2 billion pages from the web. <a href=http://www.gigablast.com/appliance.html>[learn more]</a> " ) ;
2014-08-31 07:49:11 +04:00
sb . safePrintf ( " </font> " ) ;
sb . safePrintf ( " </div> " ) ;
sb . safePrintf ( " </div> " ) ;
2014-09-01 02:35:53 +04:00
sb . safePrintf ( " <div class=grad style= \" border-radius:300px;border-color:blue;border-style:solid;border-width:3px;padding:12px;width:240px;height:240px;display:inline-block;z-index:110;color:black;margin-left:-240px;position:absolute;margin-top:230px;background-color:lightgray; \" > " ) ;
2014-08-31 07:49:11 +04:00
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " <b> " ) ;
sb . safePrintf ( " <font style=font-size:18px;margin-left:60px;> " ) ;
sb . safePrintf ( " Open Source " ) ;
sb . safePrintf ( " </font> " ) ;
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " <br> " ) ;
sb . safePrintf ( " </b> " ) ;
sb . safePrintf ( " <div style=margin-left:30px;margin-right:5px;> " ) ;
2014-08-31 20:51:21 +04:00
sb . safePrintf ( " <a href=http://www.gigablast.com/faq.html#features><img style=float:left;padding-right:10px height=71px width=71px src=/unlocked2.png></a> " ) ;
2014-08-31 07:49:11 +04:00
2014-08-31 20:51:21 +04:00
sb . safePrintf ( " Gigablast is now available as an <a href=https://github.com/gigablast/open-source-search-engine>open source search engine</a> on github.com. Download it today. Finally a robust, scalable search solution in C/C++ that has been in development and used commercially since 2000. <a href=http://www.gigablast.com/faq.html#features>Features</a>. " ) ;
2014-08-31 07:49:11 +04:00
sb . safePrintf ( " </div> " ) ;
sb . safePrintf ( " </div> " ) ;
2014-09-07 08:05:11 +04:00
2014-08-31 07:49:11 +04:00
//sb.safePrintf("</TD></TR></TABLE></body></html>");
//
// end new stuff
//
2014-09-11 02:05:33 +04:00
*/
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " \n " ) ;
sb . safePrintf ( " \n " ) ;
2014-08-03 04:58:11 +04:00
//sb.safePrintf("</table>\n");
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " <br><br> \n " ) ;
printNav ( sb , r ) ;
return true ;
}
bool printAddUrlHomePage ( SafeBuf & sb , char * url , HttpRequest * r ) {
2014-09-02 00:31:13 +04:00
CollectionRec * cr = g_collectiondb . getRec ( r ) ;
printFrontPageShell ( & sb , " add url " , cr ) ;
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " <script type= \" text/javascript \" > \n "
" function handler() { \n "
" if(this.readyState == 4 ) { \n "
" document.getElementById('msgbox').innerHTML= "
" this.responseText; \n "
//"alert(this.status+this.statusText+"
//"this.responseXML+this.responseText);\n"
" }} \n "
" </script> \n " ) ;
sb . safePrintf ( " <br><br> \n " ) ;
sb . safePrintf ( " <br><br><br> \n " ) ;
2014-08-03 04:58:11 +04:00
/*
sb . safePrintf ( " <b>web</b> " ) ;
2014-01-18 23:54:58 +04:00
if ( g_conf . m_isMattWells )
2014-08-03 04:58:11 +04:00
sb . safePrintf ( " <a href=http://www.gigablast.com/seo>seo</a> "
" "
) ;
sb . safePrintf ( " <a href= \" /Top \" >directory</a> "
2014-01-18 23:54:58 +04:00
" \n " ) ;
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " <a href=/adv.html>advanced search</a> " ) ;
sb . safePrintf ( " " ) ;
2014-08-03 04:58:11 +04:00
sb . safePrintf ( " <a href=/addurl title= \" Instantly add your url to "
" Gigablast's index \" >add url</a> " ) ;
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " \n " ) ;
sb . safePrintf ( " <br><br> \n " ) ;
2014-08-03 04:58:11 +04:00
*/
// submit to https now
sb . safePrintf ( " <form method=GET "
" action=/addurl name=f> \n " ) ;
2014-01-18 23:09:30 +04:00
2014-08-03 04:58:11 +04:00
char * coll = " " ;
if ( cr ) coll = cr - > m_coll ;
if ( cr )
sb . safePrintf ( " <input type=hidden name=c value= \" %s \" > " ,
cr - > m_coll ) ;
// put search box in a box
2014-08-04 04:25:22 +04:00
sb . safePrintf ( " <div style= "
" background-color:#fcc714; "
" border-style:solid; "
" border-width:3px; "
" border-color:blue; "
//"background-color:blue;"
" padding:20px; "
2014-08-03 04:58:11 +04:00
" border-radius:20px; "
" > " ) ;
sb . safePrintf ( " <input name=urls type=text "
" style= \" "
//"width:%lipx;"
" height:26px; "
" padding:0px; "
" font-weight:bold; "
" padding-left:5px; "
//"border-radius:10px;"
" margin:0px; "
" border:1px inset lightgray; "
" background-color:#ffffff; "
" font-size:18px; "
" \" "
" size=40 value= \" "
) ;
2013-08-03 00:12:24 +04:00
if ( url ) {
SafeBuf tmp ;
tmp . safePrintf ( " %s " , url ) ;
// don't let double quotes in the url close our val attribute
tmp . replace ( " \" " , " %22 " ) ;
sb . safeMemcpy ( & tmp ) ;
}
else
sb . safePrintf ( " http:// " ) ;
2014-08-04 18:17:13 +04:00
sb . safePrintf ( " \" > "
//"<input type=\"submit\" value=\"Add Url\">\n"
" <div onclick=document.f.submit(); "
2014-08-05 07:27:51 +04:00
" onmouseover= \" "
" this.style.backgroundColor='lightgreen'; "
" this.style.color='black'; \" "
" onmouseout= \" "
" this.style.backgroundColor='green'; "
" this.style.color='white'; \" "
2014-08-04 18:17:13 +04:00
" style=border-radius:28px; "
" cursor:pointer; "
" cursor:hand; "
" border-color:white; "
" border-style:solid; "
" border-width:3px; "
" padding:12px; "
" width:20px; "
" height:20px; "
" display:inline-block; "
" background-color:green;color:white;> "
" <b style=margin-left:-5px;font-size:18px;>GO</b> "
" </div> "
" \n "
) ;
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " \n " ) ;
2014-08-03 04:58:11 +04:00
sb . safePrintf ( " </div> \n " ) ;
sb . safePrintf ( " \n " ) ;
sb . safePrintf ( " <br> \n " ) ;
sb . safePrintf ( " \n " ) ;
2013-08-03 00:12:24 +04:00
// if addurl is turned off, just print "disabled" msg
char * msg = NULL ;
if ( ! g_conf . m_addUrlEnabled )
msg = " Add url is temporarily disabled " ;
// can also be turned off in the collection rec
2014-01-10 05:29:18 +04:00
//if ( ! cr->m_addUrlEnabled )
// msg = "Add url is temporarily disabled";
2013-08-03 00:12:24 +04:00
// or if in read-only mode
if ( g_conf . m_readOnlyMode )
msg = " Add url is temporarily disabled " ;
2014-01-18 23:54:58 +04:00
2014-08-03 09:19:15 +04:00
sb . safePrintf ( " <br><center> "
" Add a url to the <b>%s</b> collection</center> " , coll ) ;
2014-01-18 23:54:58 +04:00
2013-08-03 00:12:24 +04:00
// if url is non-empty the ajax will receive this identical msg
// and display it in the div, so do not duplicate the msg!
if ( msg & & ! url )
sb . safePrintf ( " <br><br>%s " , msg ) ;
// . the ajax msgbox div
// . when loaded with the main page for the first time it will
// immediately replace its content...
if ( url ) {
2013-11-16 01:47:05 +04:00
char * root = " " ;
if ( g_conf . m_isMattWells )
root = " http://www.gigablast.com " ;
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " <br> "
" <br> "
" <div id=msgbox> "
//"<b>Injecting your url. Please wait...</b>"
" <center> "
2013-11-16 01:47:05 +04:00
" <img src=%s/gears.gif width=50 height=50> "
2013-08-03 00:12:24 +04:00
" </center> "
" <script type=text/javascript> "
//"alert('shit');"
" var client = new XMLHttpRequest(); \n "
" client.onreadystatechange = handler; \n "
2014-07-04 21:43:04 +04:00
" var url='/addurl?urls= "
2013-11-16 01:47:05 +04:00
, root ) ;
2013-08-03 00:12:24 +04:00
sb . urlEncode ( url ) ;
// propagate "admin" if set
//long admin = hr->getLong("admin",-1);
//if ( admin != -1 ) sb.safePrintf("&admin=%li",admin);
// provide hash of the query so clients can't just pass in
// a bogus id to get search results from us
unsigned long h32 = hash32n ( url ) ;
if ( h32 = = 0 ) h32 = 1 ;
unsigned long long rand64 = gettimeofdayInMillisecondsLocal ( ) ;
2013-12-09 03:44:37 +04:00
// msg7 needs an explicit collection for /addurl for injecting
// in PageInject.cpp. it does not use defaults for safety.
2014-01-18 23:09:30 +04:00
sb . safePrintf ( " &id=%lu&c=%s&rand=%llu'; \n "
2013-08-03 00:12:24 +04:00
" client.open('GET', url ); \n "
2014-06-21 20:57:51 +04:00
" client.send(); \n "
" </script> \n "
, h32
, coll
, rand64
) ;
sb . safePrintf ( " </div> \n " ) ;
}
2013-08-03 00:12:24 +04:00
2014-06-21 20:57:51 +04:00
sb . safePrintf ( " </form> \n " ) ;
sb . safePrintf ( " <br> \n " ) ;
sb . safePrintf ( " \n " ) ;
sb . safePrintf ( " <br><br> \n " ) ;
2013-08-03 00:12:24 +04:00
2014-06-21 20:57:51 +04:00
printNav ( sb , r ) ;
return true ;
2013-08-03 00:12:24 +04:00
}
2014-06-21 20:57:51 +04:00
bool printDirHomePage ( SafeBuf & sb , HttpRequest * r ) {
2014-07-06 09:20:15 +04:00
char format = r - > getReplyFormat ( ) ;
if ( format ! = FORMAT_HTML )
return printTopDirectory ( sb , format ) ;
2014-09-02 00:31:13 +04:00
CollectionRec * cr = g_collectiondb . getRec ( r ) ;
2014-07-06 09:20:15 +04:00
2014-09-02 00:31:13 +04:00
printFrontPageShell ( & sb , " directory " , cr ) ;
2014-08-03 04:58:11 +04:00
2014-06-21 20:57:51 +04:00
sb . safePrintf ( " <br><br> \n " ) ;
sb . safePrintf ( " <br><br><br> \n " ) ;
2014-08-03 04:58:11 +04:00
// submit to https now
sb . safePrintf ( " <form method=GET "
" action=/search name=f> \n " ) ;
2014-06-21 20:57:51 +04:00
2014-08-03 04:58:11 +04:00
if ( cr )
sb . safePrintf ( " <input type=hidden name=c value= \" %s \" > " ,
cr - > m_coll ) ;
2014-06-21 20:57:51 +04:00
2014-08-03 04:58:11 +04:00
// put search box in a box
2014-08-04 04:25:22 +04:00
sb . safePrintf ( " <div style= "
" background-color:#fcc714; "
" border-style:solid; "
" border-width:3px; "
" border-color:blue; "
//"background-color:blue;"
" padding:20px; "
2014-08-03 04:58:11 +04:00
" border-radius:20px; "
" > " ) ;
sb . safePrintf ( " <input name=q type=text "
" style= \" "
//"width:%lipx;"
" height:26px; "
" padding:0px; "
" font-weight:bold; "
" padding-left:5px; "
//"border-radius:10px;"
" margin:0px; "
" border:1px inset lightgray; "
" background-color:#ffffff; "
" font-size:18px; "
" \" "
2014-08-04 18:17:13 +04:00
" size=40 value= \" \" > "
//"<input type=\"submit\" value=\"Search\">\n");
2014-08-03 04:58:11 +04:00
2014-08-04 18:17:13 +04:00
" <div onclick=document.f.submit(); "
2014-08-05 07:27:51 +04:00
" onmouseover= \" "
" this.style.backgroundColor='lightgreen'; "
" this.style.color='black'; \" "
" onmouseout= \" "
" this.style.backgroundColor='green'; "
" this.style.color='white'; \" "
2014-08-04 18:17:13 +04:00
" style=border-radius:28px; "
" cursor:pointer; "
" cursor:hand; "
" border-color:white; "
" border-style:solid; "
" border-width:3px; "
" padding:12px; "
" width:20px; "
" height:20px; "
" display:inline-block; "
" background-color:green;color:white;> "
" <b style=margin-left:-5px;font-size:18px;>GO</b> "
" </div> "
" \n "
) ;
2014-08-03 04:58:11 +04:00
sb . safePrintf ( " </div> \n " ) ;
2014-06-21 20:57:51 +04:00
sb . safePrintf ( " \n " ) ;
sb . safePrintf ( " </form> \n " ) ;
sb . safePrintf ( " <br> \n " ) ;
sb . safePrintf ( " \n " ) ;
2014-07-06 09:20:15 +04:00
printTopDirectory ( sb , FORMAT_HTML ) ;
2014-06-21 20:57:51 +04:00
sb . safePrintf ( " <br><br> \n " ) ;
printNav ( sb , r ) ;
return true ;
}
// . returns false if blocked, true otherwise
// . sets errno on error
// . make a web page displaying the config of this host
// . call g_httpServer.sendDynamicPage() to send it
bool sendPageRoot ( TcpSocket * s , HttpRequest * r , char * cookie ) {
// don't allow pages bigger than 128k in cache
char buf [ 10 * 1024 + MAX_QUERY_LEN ] ;
// a ptr into "buf"
//char *p = buf;
//char *pend = buf + 10*1024 + MAX_QUERY_LEN - 100 ;
SafeBuf sb ( buf , 10 * 1024 + MAX_QUERY_LEN ) ;
// print bgcolors, set focus, set font style
//p = g_httpServer.printFocus ( p , pend );
//p = g_httpServer.printColors ( p , pend );
//long qlen;
//char *q = r->getString ( "q" , &qlen , NULL );
// insert collection name too
CollectionRec * cr = g_collectiondb . getRec ( r ) ;
if ( ! cr ) {
g_errno = ENOCOLLREC ;
return g_httpServer . sendErrorReply ( s , 500 , mstrerror ( g_errno ) ) ;
}
// get the collection rec
/*
CollectionRec * cr = g_collectiondb . getRec ( coll ) ;
uint8_t * hp = NULL ;
long hpLen ;
long long docsInColl = - 1 ;
if ( ! cr ) {
// use the default
Parm * pp = g_parms . getParm ( " hp " ) ;
if ( ! pp ) {
g_errno = ENOTFOUND ;
g_msg = " (error: no such collection) " ;
return g_httpServer . sendErrorReply ( s , 500 ,
mstrerror ( g_errno ) ) ;
2013-08-03 00:12:24 +04:00
}
2014-06-21 20:57:51 +04:00
hp = ( uint8_t * ) pp - > m_def ;
if ( hp ) hpLen = uint8strlen ( hp ) ;
if ( hpLen < = 0 | | ! hp )
log ( LOG_INFO , " http: No root page html present. " ) ;
} else {
if ( cr - > m_useLanguagePages ) {
uint8_t lang = g_langId . guessGBLanguageFromUrl ( r - > getHost ( ) ) ;
if ( lang & & ( hp = g_languagePages . getLanguagePage ( lang ) ) ! = NULL ) {
hpLen = uint8strlen ( hp ) ;
// Set sort language as well
// This might not be a good idea, as it
// overrides any other setting. May be
// better to let the user agent string
// tell us what the user wants.
strcpy ( cr - > m_defaultSortLanguage ,
getLanguageAbbr ( lang ) ) ;
}
2013-08-03 00:12:24 +04:00
}
2014-06-21 20:57:51 +04:00
if ( ! hp ) {
hp = ( uint8_t * ) cr - > m_htmlRoot ;
hpLen = cr - > m_htmlRootLen ;
2013-08-03 00:12:24 +04:00
}
2014-06-21 20:57:51 +04:00
//RdbBase *base = getRdbBase ( RDB_CHECKSUMDB , coll );
RdbBase * base = getRdbBase ( ( uint8_t ) RDB_CLUSTERDB , coll ) ;
if ( base ) docsInColl = base - > getNumGlobalRecs ( ) ;
}
*/
// print the page out
/*
expandRootHtml ( sb ,
hp , hpLen ,
q , qlen , r , s , docsInColl ,
cr ) ;
*/
2013-08-03 00:12:24 +04:00
2014-06-21 20:57:51 +04:00
//if ( ! strcmp(coll,"dmoz" ) )
// printDirHomePage(sb,r);
//else
2014-06-22 08:07:38 +04:00
printWebHomePage ( sb , r , s ) ;
2013-08-03 00:12:24 +04:00
2014-06-21 20:57:51 +04:00
// . print last 5 queries
// . put 'em in a table
// . disable for now, impossible to monitor/control
//p += printLastQueries ( p , pend );
// are we the admin?
//bool isAdmin = g_collectiondb.isAdmin ( r , s );
2013-08-03 00:12:24 +04:00
2014-06-21 20:57:51 +04:00
// calculate bufLen
//long bufLen = p - buf;
// . now encapsulate it in html head/tail and send it off
// . the 0 means browser caches for however long it's set for
// . but we don't use 0 anymore, use -2 so it never gets cached so
// our display of the # of pages in the index is fresh
// . no, but that will piss people off, its faster to keep it cached
//return g_httpServer.sendDynamicPage ( s , buf , bufLen , -1 );
return g_httpServer . sendDynamicPage ( s ,
( char * ) sb . getBufStart ( ) ,
sb . length ( ) ,
// 120 seconds cachetime
// don't cache anymore since
// we have the login bar at
// the top of the page
0 , //120, // cachetime
false , // post?
" text/html " ,
200 ,
NULL , // cookie
" UTF-8 " ,
r ) ;
2013-08-03 00:12:24 +04:00
}
// . store into "p"
// . returns bytes stored into "p"
// . used for entertainment purposes
/*
long printLastQueries ( char * p , char * pend ) {
// if not 512 bytes left, bail
if ( pend - p < 512 ) return 0 ;
// return w/ no table if no queries have been added to g_qbuf yet
if ( ! g_nextq = = - 1 ) return 0 ;
// remember start for returning # of bytes stored
char * start = p ;
// begin table (no border)
sprintf ( p , " <br><table border=0><tr><td><center>Last %li queries: "
" </td></tr> " , ( long ) QBUF_NUMQUERIES ) ;
p + = gbstrlen ( p ) ;
// point to last query added
long n = g_nextq - 1 ;
// . wrap it if we need to
// . QBUF_NUMQUERIES is defined to be 5 in PageResults.h
if ( n < 0 ) n = QBUF_NUMQUERIES - 1 ;
// . print up to five queries
// . queries are stored by advancing g_nextq, so "i" should go backward
long count = 0 ;
for ( long i = n ; count < QBUF_NUMQUERIES ; count + + , i - - ) {
// wrap i if we need to
if ( i = = - 1 ) i = QBUF_NUMQUERIES - 1 ;
// if this query is empty, skip it (might be uninitialized)
if ( g_qbuf [ i ] [ 0 ] = = ' \0 ' ) continue ;
// point to the query (these are NULL terminated)
char * q = g_qbuf [ i ] ;
long qlen = gbstrlen ( q ) ;
// bail if too big
if ( p + qlen + 32 + 1024 > = pend ) return p - start ;
// otherwise, print this query to the page
sprintf ( p , " <tr><td><a href=/cgi/0.cgi?q= " ) ;
p + = gbstrlen ( p ) ;
// store encoded query as cgi parm
p + = urlEncode ( p , q , qlen ) ;
// end a href tag
* p + + = ' > ' ;
// . then print the actual query to the page
// . use htmlEncode so nobody can abuse it
p + = saftenTags ( p , pend - p , q , qlen ) ;
// wrap it up
sprintf ( p , " </a></td></tr> " ) ;
p + = gbstrlen ( p ) ;
}
// end the table
sprintf ( p , " </table> " ) ;
p + = gbstrlen ( p ) ;
// return bytes written
return p - start ;
}
*/
//char *printTopDirectory ( char *p, char *pend ) {
2014-07-06 09:20:15 +04:00
bool printTopDirectory ( SafeBuf & sb , char format ) {
long nr = g_catdb . getRdb ( ) - > getNumTotalRecs ( ) ;
2013-10-14 01:24:41 +04:00
// if no recs in catdb, print instructions
2014-07-06 09:20:15 +04:00
if ( nr = = 0 & & format = = FORMAT_HTML )
2013-10-14 01:24:41 +04:00
return sb . safePrintf ( " <center> "
" <b>DMOZ functionality is not set up.</b> "
" <br> "
" <br> "
" <b> "
" Please follow the set up "
2014-08-31 20:51:21 +04:00
" <a href=/faq.html#dmoz> "
2013-10-14 01:24:41 +04:00
" instructions "
" </a>. "
" </b> "
" </center> " ) ;
2014-07-06 09:20:15 +04:00
// send back an xml/json error reply
if ( nr = = 0 & & format ! = FORMAT_HTML ) {
g_errno = EDMOZNOTREADY ;
return false ;
}
2013-08-03 00:12:24 +04:00
//char topList[4096];
//sprintf(topList,
return sb . safePrintf (
" <center> "
" <table cellspacing= \" 4 \" cellpadding= \" 4 \" ><tr><td valign=top> \n "
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/Arts/ \" >Arts</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/Arts/Movies/ \" >Movies</a>, "
" <a href= \" /Top/Arts/Television/ \" >Television</a>, "
" <a href= \" /Top/Arts/Music/ \" >Music</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td><td valign=top> "
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/Business/ \" >Business</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/Business/Employment/ \" >Jobs</a>, "
" <a href= \" /Top/Business/Real_Estate/ \" >Real Estate</a>, "
" <a href= \" /Top/Business/Investing/ \" >Investing</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td><td valign=top> "
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/Computers/ \" >Computers</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/Computers/Internet/ \" >Internet</a>, "
" <a href= \" /Top/Computers/Software/ \" >Software</a>, "
" <a href= \" /Top/Computers/Hardware/ \" >Hardware</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td></tr><tr><td valign=top> "
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/Games/ \" >Games</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/Games/Video_Games/ \" >Video Games</a>, "
" <a href= \" /Top/Games/Roleplaying/ \" >RPGs</a>, "
" <a href= \" /Top/Games/Gambling/ \" >Gambling</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td><td valign=top> "
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/Health/ \" >Health</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/Health/Fitness/ \" >Fitness</a>, "
" <a href= \" /Top/Health/Medicine/ \" >Medicine</a>, "
" <a href= \" /Top/Health/Alternative/ \" >Alternative</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td><td valign=top> "
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/Home/ \" >Home</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/Home/Family/ \" >Family</a>, "
" <a href= \" /Top/Home/Consumer_Information/ \" >Consumers</a>, "
" <a href= \" /Top/Home/Cooking/ \" >Cooking</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td></tr><tr><td valign=top> "
2013-10-11 09:27:49 +04:00
//"<b><a href=\"/Top/Kids_and_Teens/\">"
2013-08-03 00:12:24 +04:00
//"<font color=\"#ff0000\">K</font>"
//"<font color=\"339900\">i</font>"
//"<font color=\"#ff6600\">d</font>"
//"<font color=\"#0066ff\">s</font>"
//" and Teens</a></b><br>"
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/Kids_and_Teens/ \" >Kids and Teens</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/Kids_and_Teens/Arts/ \" >Arts</a>, "
" <a href= \" /Top/Kids_and_Teens/School_Time/ \" >School Time</a>, "
" <a href= \" /Top/Kids_and_Teens/Teen_Life/ \" >Teen Life</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td><td valign=top> "
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/News/ \" >News</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/News/Media/ \" >Media</a>, "
" <a href= \" /Top/News/Newspapers/ \" >Newspapers</a>, "
" <a href= \" /Top/News/Weather/ \" >Weather</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td><td valign=top> "
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/Recreation/ \" >Recreation</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/Recreation/Travel/ \" >Travel</a>, "
" <a href= \" /Top/Recreation/Food/ \" >Food</a>, "
" <a href= \" /Top/Recreation/Outdoors/ \" >Outdoors</a>, "
" <a href= \" /Top/Recreation/Humor/ \" >Humor</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td></tr><tr><td valign=top> "
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/Reference/ \" >Reference</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/Reference/Maps/ \" >Maps</a>, "
" <a href= \" /Top/Reference/Education/ \" >Education</a>, "
" <a href= \" /Top/Reference/Libraries/ \" >Libraries</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td><td valign=top> "
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/Regional/ \" >Regional</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/Regional/North_America/United_States/ \" >US</a>, "
" <a href= \" /Top/Regional/North_America/Canada/ \" >Canada</a>, "
" <a href= \" /Top/Regional/Europe/United_Kingdom/ \" >UK</a>, "
" <a href= \" /Top/Regional/Europe/ \" >Europe</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td><td valign=top> "
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/Science/ \" >Science</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/Science/Biology/ \" >Biology</a>, "
" <a href= \" /Top/Science/Social_Sciences/Psychology/ \" >Psychology</a>, "
" <a href= \" /Top/Science/Physics/ \" >Physics</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td></tr><tr><td valign=top> "
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/Shopping/ \" >Shopping</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/Shopping/Vehicles/Autos/ \" >Autos</a>, "
" <a href= \" /Top/Shopping/Clothing/ \" >Clothing</a>, "
" <a href= \" /Top/Shopping/Gifts/ \" >Gifts</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td><td valign=top> "
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/Society/ \" >Society</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/Society/People/ \" >People</a>, "
" <a href= \" /Top/Society/Religion_and_Spirituality/ \" >Religion</a>, "
" <a href= \" /Top/Society/Issues/ \" >Issues</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td><td valign=top> "
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/Sports/ \" >Sports</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/Sports/Baseball/ \" >Baseball</a>, "
" <a href= \" /Top/Sports/Soccer/ \" >Soccer</a>, "
" <a href= \" /Top/Sports/Basketball/ \" >Basketball</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td></tr> "
" <tr><td colspan=3 valign=top> "
2013-10-11 09:27:49 +04:00
" <b><a href= \" /Top/World/ \" >World</a></b><br> "
2013-08-03 00:12:24 +04:00
" <small> "
2013-10-11 09:27:49 +04:00
" <a href= \" /Top/World/Deutsch/ \" >Deutsch</a>, "
" <a href= \" /Top/World/Espa%%c3%%b1ol/ \" >Espa%c%col</a>, "
" <a href= \" /Top/World/Fran%%c3%%a7ais/ \" >Fran%c%cais</a>, "
" <a href= \" /Top/World/Italiano/ \" >Italiano</a>, "
" <a href= \" /Top/World/Japanese/ \" >Japanese</a>, "
" <a href= \" /Top/World/Nederlands/ \" >Nederlands</a>, "
" <a href= \" /Top/World/Polska/ \" >Polska</a>, "
" <a href= \" /Top/World/Dansk/ \" >Dansk</a>, "
" <a href= \" /Top/World/Svenska/ \" >Svenska</a>... "
2013-08-03 00:12:24 +04:00
" </small> \n "
" </td></tr></table></center> \n " ,
195 , 177 , 195 , 167 ) ;
// make sure there's room
//long topListLen = gbstrlen(topList);
//if (pend - p <= topListLen+1)
// return p;
// copy it in
//memcpy(p, topList, topListLen);
//p += topListLen;
//*p = '\0';
//return p;
}
/////////////////
//
// ADD URL PAGE
//
/////////////////
2014-06-15 19:26:27 +04:00
# include "PageInject.h"
2013-08-03 00:12:24 +04:00
# include "TuringTest.h"
# include "AutoBan.h"
2013-12-11 03:28:04 +04:00
//#include "CollectionRec.h"
2013-08-03 00:12:24 +04:00
# include "Users.h"
# include "Spider.h"
//static bool sendReply ( void *state , bool addUrlEnabled );
static bool canSubmit ( unsigned long h , long now , long maxUrlsPerIpDom ) ;
//static void addedStuff ( void *state );
void resetPageAddUrl ( ) ;
/*
class State2 {
public :
Url m_url ;
//char *m_buf;
//long m_bufLen;
//long m_bufMaxLen;
} ;
*/
2014-03-12 19:28:42 +04:00
class State1i {
2013-08-03 00:12:24 +04:00
public :
//Msg4 m_msg4;
Msg7 m_msg7 ;
TcpSocket * m_socket ;
2014-09-01 03:23:11 +04:00
bool m_isRootAdmin ;
2013-08-03 00:12:24 +04:00
char m_coll [ MAX_COLL_LEN + 1 ] ;
bool m_goodAnswer ;
bool m_doTuringTest ;
long m_ufuLen ;
char m_ufu [ MAX_URL_LEN ] ;
//long m_urlLen;
//char m_url[MAX_URL_LEN];
//char m_username[MAX_USER_SIZE];
bool m_strip ;
bool m_spiderLinks ;
bool m_forceRespider ;
// buf filled by the links coming from google, msn, yahoo, etc
//State2 m_state2[5]; // gb, goog, yahoo, msn, ask
long m_numSent ;
long m_numReceived ;
//long m_raw;
//SpiderRequest m_sreq;
} ;
static void doneInjectingWrapper3 ( void * st1 ) ;
// only allow up to 1 Msg10's to be in progress at a time
static bool s_inprogress = false ;
// . returns false if blocked, true otherwise
// . sets g_errno on error
2014-07-04 21:43:04 +04:00
bool sendPageAddUrl ( TcpSocket * sock , HttpRequest * hr ) {
2013-08-03 00:12:24 +04:00
// . get fields from cgi field of the requested url
// . get the search query
long urlLen = 0 ;
2014-07-04 21:43:04 +04:00
char * url = hr - > getString ( " urls " , & urlLen , NULL /*default*/ ) ;
2013-08-03 00:12:24 +04:00
// see if they provided a url of a file of urls if they did not
// provide a url to add directly
2014-07-04 21:43:04 +04:00
bool isAdmin = g_conf . isCollAdmin ( sock , hr ) ;
2013-08-03 00:12:24 +04:00
long ufuLen = 0 ;
char * ufu = NULL ;
2014-07-04 21:43:04 +04:00
//if ( isAdmin )
// // get the url of a file of urls (ufu)
// ufu = hr->getString ( "ufu" , &ufuLen , NULL );
2013-08-03 00:12:24 +04:00
// can't be too long, that's obnoxious
if ( urlLen > MAX_URL_LEN | | ufuLen > MAX_URL_LEN ) {
g_errno = EBUFTOOSMALL ;
g_msg = " (error: url too long) " ;
2014-07-04 21:43:04 +04:00
return g_httpServer . sendErrorReply ( sock , 500 , " url too long " ) ;
2013-08-03 00:12:24 +04:00
}
// get the collection
2013-12-09 03:44:37 +04:00
//long collLen = 0;
//char *coll9 = r->getString("c",NULL);//&collLen);
//if ( ! coll || ! coll[0] ) {
// //coll = g_conf.m_defaultColl;
// coll = g_conf.getDefaultColl( r->getHost(), r->getHostLen() );
// collLen = gbstrlen(coll);
//}
2013-08-03 00:12:24 +04:00
// get collection rec
2014-01-18 23:54:58 +04:00
2014-07-04 21:43:04 +04:00
CollectionRec * cr = g_collectiondb . getRec ( hr ) ;
2014-01-18 23:54:58 +04:00
2013-08-03 00:12:24 +04:00
// bitch if no collection rec found
if ( ! cr ) {
g_errno = ENOCOLLREC ;
g_msg = " (error: no collection) " ;
2014-07-04 21:43:04 +04:00
return g_httpServer . sendErrorReply ( sock , 500 , " no coll rec " ) ;
2013-08-03 00:12:24 +04:00
}
// . make sure the ip is not banned
// . we may also have an exclusive list of IPs for private collections
2014-07-04 21:43:04 +04:00
if ( ! cr - > hasSearchPermission ( sock ) ) {
2013-08-03 00:12:24 +04:00
g_errno = ENOPERM ;
g_msg = " (error: permission denied) " ;
2014-07-04 21:43:04 +04:00
return g_httpServer . sendErrorReply ( sock , 500 , mstrerror ( g_errno ) ) ;
2013-08-03 00:12:24 +04:00
}
//
// if no url, print the main homepage page
//
if ( ! url ) {
SafeBuf sb ;
2014-07-04 21:43:04 +04:00
printAddUrlHomePage ( sb , NULL , hr ) ;
return g_httpServer . sendDynamicPage ( sock ,
2013-08-03 00:12:24 +04:00
sb . getBufStart ( ) ,
sb . length ( ) ,
// 120 secs cachetime
// don't cache any more
// since we have the
// login bar at top of page
0 , //120 ,// cachetime
false , // post?
" text/html " ,
200 ,
NULL , // cookie
" UTF-8 " ,
2014-07-04 21:43:04 +04:00
hr ) ;
2013-08-03 00:12:24 +04:00
}
//
// run the ajax script on load to submit the url now
//
2014-07-04 21:43:04 +04:00
long id = hr - > getLong ( " id " , 0 ) ;
2013-08-03 00:12:24 +04:00
// if we are not being called by the ajax loader, the put the
// ajax loader script into the html now
if ( id = = 0 ) {
SafeBuf sb ;
2014-07-04 21:43:04 +04:00
printAddUrlHomePage ( sb , url , hr ) ;
return g_httpServer . sendDynamicPage ( sock ,
2013-08-03 00:12:24 +04:00
sb . getBufStart ( ) ,
sb . length ( ) ,
// don't cache any more
// since we have the
// login bar at top of
//page
0 , //3600,// cachetime
false , // post?
" text/html " ,
200 ,
NULL , // cookie
" UTF-8 " ,
2014-07-04 21:43:04 +04:00
hr ) ;
2013-08-03 00:12:24 +04:00
}
//
// ok, inject the provided url!!
//
//
// check for errors first
//
// if addurl is turned off, just print "disabled" msg
char * msg = NULL ;
if ( ! g_conf . m_addUrlEnabled )
msg = " Add url is temporarily disabled " ;
// can also be turned off in the collection rec
2014-01-10 05:29:18 +04:00
//if ( ! cr->m_addUrlEnabled )
// msg = "Add url is temporarily disabled";
2013-08-03 00:12:24 +04:00
// or if in read-only mode
if ( g_conf . m_readOnlyMode )
msg = " Add url is temporarily disabled " ;
// cannot add if another Msg10 from here is still in progress
if ( s_inprogress )
msg = " Add url is currently busy! Try again in a second. " ;
// . send msg back to the ajax request
// . use cachetime of 3600 so it does not re-inject if you hit the
// back button!
if ( msg ) {
SafeBuf sb ;
sb . safePrintf ( " %s " , msg ) ;
2014-07-04 21:43:04 +04:00
g_httpServer . sendDynamicPage ( sock ,
2013-08-03 00:12:24 +04:00
sb . getBufStart ( ) ,
sb . length ( ) ,
3600 , //-1, // cachetime
false , // post?
" text/html " ,
200 , // http status
NULL , // cookie
" UTF-8 " ) ;
return true ;
}
// make a new state
2014-03-12 19:28:42 +04:00
State1i * st1 ;
try { st1 = new ( State1i ) ; }
2013-08-03 00:12:24 +04:00
catch ( . . . ) {
g_errno = ENOMEM ;
log ( " PageAddUrl: new(%i): %s " ,
2014-08-28 23:55:02 +04:00
( int ) sizeof ( State1i ) , mstrerror ( g_errno ) ) ;
2014-07-04 21:43:04 +04:00
return g_httpServer . sendErrorReply ( sock , 500 , mstrerror ( g_errno ) ) ; }
2014-03-12 19:28:42 +04:00
mnew ( st1 , sizeof ( State1i ) , " PageAddUrl " ) ;
2013-08-03 00:12:24 +04:00
// save socket and isAdmin
2014-07-04 21:43:04 +04:00
st1 - > m_socket = sock ;
2014-09-01 03:23:11 +04:00
st1 - > m_isRootAdmin = isAdmin ;
2013-08-03 00:12:24 +04:00
/*
// save the url
st1 - > m_url [ 0 ] = ' \0 ' ;
if ( url ) {
// normalize and add www. if it needs it
Url uu ;
uu . set ( url , gbstrlen ( url ) , true ) ;
// remove >'s i guess and store in st1->m_url[] buffer
st1 - > m_urlLen = cleanInput ( st1 - > m_url ,
MAX_URL_LEN ,
uu . getUrl ( ) ,
uu . getUrlLen ( ) ) ;
}
*/
// save the "ufu" (url of file of urls)
st1 - > m_ufu [ 0 ] = ' \0 ' ;
st1 - > m_ufuLen = ufuLen ;
memcpy ( st1 - > m_ufu , ufu , ufuLen ) ;
st1 - > m_ufu [ ufuLen ] = ' \0 ' ;
st1 - > m_doTuringTest = cr - > m_doTuringTest ;
st1 - > m_spiderLinks = true ;
st1 - > m_strip = true ;
2014-03-12 19:28:42 +04:00
// save the collection name in the State1i class
2013-12-09 03:44:37 +04:00
//if ( collLen > MAX_COLL_LEN ) collLen = MAX_COLL_LEN;
//strncpy ( st1->m_coll , coll , collLen );
//st1->m_coll [ collLen ] = '\0';
strcpy ( st1 - > m_coll , cr - > m_coll ) ;
2013-08-03 00:12:24 +04:00
// assume they answered turing test correctly
st1 - > m_goodAnswer = true ;
// get ip of submitter
//unsigned long h = ipdom ( s->m_ip );
// . use top 2 bytes now, some isps have large blocks
// . if this causes problems, then they can do pay for inclusion
2014-07-04 21:43:04 +04:00
unsigned long h = iptop ( sock - > m_ip ) ;
2013-08-03 00:12:24 +04:00
long codeLen ;
2014-07-04 21:43:04 +04:00
char * code = hr - > getString ( " code " , & codeLen ) ;
if ( g_autoBan . hasCode ( code , codeLen , sock - > m_ip ) ) {
2013-08-03 00:12:24 +04:00
long uipLen = 0 ;
2014-07-04 21:43:04 +04:00
char * uip = hr - > getString ( " uip " , & uipLen ) ;
2013-08-03 00:12:24 +04:00
long hip = 0 ;
//use the uip when we have a raw query to test if
//we can submit
if ( uip ) {
hip = atoip ( uip , uipLen ) ;
h = iptop ( hip ) ;
}
}
2014-07-04 21:43:04 +04:00
st1 - > m_strip = hr - > getLong ( " strip " , 0 ) ;
2013-10-15 04:19:59 +04:00
// . Remember, for cgi, if the box is not checked, then it is not
// reported in the request, so set default return value to 0
// . support both camel case and all lower-cases
2014-07-04 21:43:04 +04:00
st1 - > m_spiderLinks = hr - > getLong ( " spiderLinks " , 0 ) ;
st1 - > m_spiderLinks = hr - > getLong ( " spiderlinks " , st1 - > m_spiderLinks ) ;
2013-08-03 00:12:24 +04:00
// . should we force it into spiderdb even if already in there
// . use to manually update spider times for a url
// . however, will not remove old scheduled spider times
// . mdw: made force on the default
2014-07-04 21:43:04 +04:00
st1 - > m_forceRespider = hr - > getLong ( " force " , 1 ) ; // 0);
2013-08-03 00:12:24 +04:00
long now = getTimeGlobal ( ) ;
// . allow 1 submit every 1 hour
// . restrict by submitter domain ip
2014-09-01 03:23:11 +04:00
if ( ! st1 - > m_isRootAdmin & &
2013-08-03 00:12:24 +04:00
! canSubmit ( h , now , cr - > m_maxAddUrlsPerIpDomPerDay ) ) {
// return error page
//g_errno = ETOOEARLY;
SafeBuf sb ;
sb . safePrintf ( " You breached your add url quota. " ) ;
2014-03-12 19:28:42 +04:00
mdelete ( st1 , sizeof ( State1i ) , " PageAddUrl " ) ;
2013-08-03 00:12:24 +04:00
delete ( st1 ) ;
// use cachetime of 3600 so it does not re-inject if you hit
// the back button!
2014-07-04 21:43:04 +04:00
g_httpServer . sendDynamicPage ( sock ,
2013-08-03 00:12:24 +04:00
sb . getBufStart ( ) ,
sb . length ( ) ,
3600 , //-1, // cachetime
false , // post?
" text/html " ,
200 , // http status
NULL , // cookie
" UTF-8 " ) ;
return true ;
}
//st1->m_query = r->getString( "qts", &st1->m_queryLen );
// check it, if turing test is enabled for this collection
/*
2014-09-01 03:23:11 +04:00
if ( ! st1 - > m_isRootAdmin & & cr - > m_doTuringTest & &
2013-08-03 00:12:24 +04:00
! g_turingTest . isHuman ( r ) ) {
// log note so we know it didn't make it
g_msg = " (error: bad answer) " ;
//log("PageAddUrl:: addurl failed for %s : bad answer",
2014-06-21 20:57:51 +04:00
// iptoa(sock->m_ip));
2013-08-03 00:12:24 +04:00
st1 - > m_goodAnswer = false ;
return sendReply ( st1 , true ) ; // addUrl enabled?
}
*/
2014-07-04 21:43:04 +04:00
// set this. also sets gr->m_hr
GigablastRequest * gr = & st1 - > m_msg7 . m_gr ;
// this will fill in GigablastRequest so all the parms we need are set
g_parms . setGigablastRequest ( sock , hr , gr ) ;
// this is really an injection, not add url, so make
// GigablastRequest::m_url point to Gigablast::m_urlsBuf because
// the PAGE_ADDURLS2 parms in Parms.cpp fill in the m_urlsBuf.
// HACK!
gr - > m_url = gr - > m_urlsBuf ;
2013-08-03 00:12:24 +04:00
//
// inject using msg7
//
// . pass in the cleaned url
// . returns false if blocked, true otherwise
2014-06-12 04:24:33 +04:00
if ( ! st1 - > m_msg7 . inject ( //s ,
//r ,
2013-08-03 00:12:24 +04:00
st1 ,
doneInjectingWrapper3 ) )
return false ;
// some kinda error, g_errno should be set i guess
doneInjectingWrapper3 ( st1 ) ;
// we did not block
return true ;
}
void doneInjectingWrapper3 ( void * st ) {
2014-03-12 19:28:42 +04:00
State1i * st1 = ( State1i * ) st ;
2013-08-03 00:12:24 +04:00
// allow others to add now
s_inprogress = false ;
// get the state properly
2014-03-12 19:28:42 +04:00
//State1i *st1 = (State1i *) state;
2013-08-03 00:12:24 +04:00
// in order to see what sites are being added log it, then we can
// more easily remove sites from sitesearch.gigablast.com that are
// being added but not being searched
char * url = st1 - > m_msg7 . m_xd . m_firstUrl . m_url ;
log ( LOG_INFO , " http: add url %s (%s) " , url , mstrerror ( g_errno ) ) ;
// extract info from state
2014-06-12 04:24:33 +04:00
TcpSocket * sock = st1 - > m_socket ;
2014-09-01 03:23:11 +04:00
//bool isAdmin = st1->m_isRootAdmin;
2013-08-03 00:12:24 +04:00
//char *url = NULL;
//if ( st1->m_urlLen ) url = st1->m_url;
// re-null it out if just http://
//bool printUrl = true;
//if ( st1->m_urlLen == 0 ) printUrl = false;
//if ( ! st1->m_url ) printUrl = false;
//if(st1->m_urlLen==7&&st1->m_url&&!strncasecmp(st1->m_url,"http://",7)
// printUrl = false;
// page is not more than 32k
char buf [ 1024 * 32 + MAX_URL_LEN * 2 ] ;
SafeBuf sb ( buf , 1024 * 32 + MAX_URL_LEN * 2 ) ;
//char rawbuf[1024*8];
//SafeBuf rb(rawbuf, 1024*8);
//rb.safePrintf("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n");
//rb.safePrintf("<status>\n");
//CollectionRec *cr = g_collectiondb.getRec ( st1->m_coll );
// collection name
2014-01-18 23:09:30 +04:00
char * coll = st1 - > m_coll ;
if ( ! coll ) coll = " " ;
2013-08-03 00:12:24 +04:00
//char tt [ 128 ];
//tt[0] = '\0';
//if ( st1->m_coll[0] != '\0' && ! isAdmin )
// sprintf ( tt , " for %s", st1->m_coll );
//
// what we print here will just be the error msg, because the
// ajax will fill the text we print here into the div below
// the add url box
//
// if there was an error let them know
//char msg[MAX_URL_LEN + 1024];
char * pm = " " ;
if ( g_errno ) {
if ( g_errno = = ETOOEARLY ) {
pm = " Error. 100 urls have "
" already been submitted by "
" this IP address for the last 24 hours. "
" <a href=/addurlerror.html>Explanation</a>. " ;
log ( " addurls: Failed for user at %s: "
2014-06-12 04:24:33 +04:00
" quota breeched. " , iptoa ( sock - > m_ip ) ) ;
2013-08-03 00:12:24 +04:00
//rb.safePrintf("Error. %li urls have "
// "already been submitted by "
// "this IP address for the "
// "last 24 hours. ",
// cr->m_maxAddUrlsPerIpDomPerDay);
sb . safePrintf ( " %s " , pm ) ;
}
else {
sb . safePrintf ( " Error adding url(s): <b>%s[%i]</b> " ,
mstrerror ( g_errno ) , g_errno ) ;
//pm = msg;
//rb.safePrintf("Error adding url(s): %s[%i]",
// mstrerror(g_errno) , g_errno);
//sb.safePrintf("%s",pm);
}
}
else {
if ( ! g_conf . m_addUrlEnabled ) {
pm = " <font color=#ff0000> "
" Sorry, this feature is temporarily disabled. "
" Please try again later.</font> " ;
if ( url )
log ( " addurls: failed for user at %s: "
" add url is disabled. "
" Enable add url on the "
" Master Controls page and "
" on the Spider Controls page for "
" this collection. " ,
2014-06-12 04:24:33 +04:00
iptoa ( sock - > m_ip ) ) ;
2013-08-03 00:12:24 +04:00
sb . safePrintf ( " %s " , pm ) ;
//rb.safePrintf("Sorry, this feature is temporarily "
// "disabled. Please try again later.");
}
else if ( s_inprogress ) {
pm = " Add url busy. Try again later. " ;
log ( " addurls: Failed for user at %s: "
2014-06-12 04:24:33 +04:00
" busy adding another. " , iptoa ( sock - > m_ip ) ) ;
2013-08-03 00:12:24 +04:00
//rb.safePrintf("Add url busy. Try again later.");
sb . safePrintf ( " %s " , pm ) ;
}
// did they fail the turing test?
else if ( ! st1 - > m_goodAnswer ) {
pm = " <font color=#ff0000> "
" Oops, you did not enter the 4 large letters "
" you see below. Please try again.</font> " ;
//rb.safePrintf("could not add the url"
// " because the turing test"
// " is enabled.");
sb . safePrintf ( " %s " , pm ) ;
}
else if ( st1 - > m_msg7 . m_xd . m_indexCodeValid & &
st1 - > m_msg7 . m_xd . m_indexCode ) {
long ic = st1 - > m_msg7 . m_xd . m_indexCode ;
sb . safePrintf ( " <b>Had error injecting url: %s</b> " ,
mstrerror ( ic ) ) ;
}
/*
if ( url & & ! st1 - > m_ufu [ 0 ] & & url [ 0 ] & & printUrl ) {
sprintf ( msg , " <u>%s</u> added to spider "
" queue "
" successfully " , url ) ;
//rb.safePrintf("%s added to spider "
// "queue successfully", url );
}
else if ( st1 - > m_ufu [ 0 ] ) {
sprintf ( msg , " urls in <u>%s</u> "
" added to spider queue "
" successfully " , st1 - > m_ufu ) ;
//rb.safePrintf("urls in %s added to spider "
// "queue successfully", url );
}
*/
else {
//rb.safePrintf("Add the url you want:");
// avoid hitting browser page cache
unsigned long rand32 = rand ( ) ;
// in the mime to 0 seconds!
sb . safePrintf ( " <b>Url successfully added. "
2014-01-18 23:09:30 +04:00
" <a href=/search?rand=%lu& "
" c=%s&q=url%%3A " ,
rand32 ,
coll ) ;
2013-08-03 00:12:24 +04:00
sb . urlEncode ( url ) ;
2014-08-04 18:17:13 +04:00
sb . safePrintf ( " >Check it</a> " // or "
//"<a href=http://www.gigablast."
//"com/seo?u=");
//sb.urlEncode(url);
//sb.safePrintf(">SEO it</a>"
" . "
" </b> " ) ;
2013-08-03 00:12:24 +04:00
}
//pm = msg;
//url = "http://";
//else
// pm = "Don't forget to <a href=/gigaboost.html>"
// "Gigaboost</a> your URL.";
}
// store it
sb . safePrintf ( " <b>%s</b> " , pm ) ;
// clear g_errno, if any, so our reply send goes through
g_errno = 0 ;
// nuke state
2014-03-12 19:28:42 +04:00
mdelete ( st1 , sizeof ( State1i ) , " PageAddUrl " ) ;
2013-08-03 00:12:24 +04:00
delete ( st1 ) ;
// this reply should be loaded from the ajax loader so use a cache
// time of 1 hour so it does not re-inject the url if you hit the
// back button
2014-06-12 04:24:33 +04:00
g_httpServer . sendDynamicPage ( sock ,
2013-08-03 00:12:24 +04:00
sb . getBufStart ( ) ,
sb . length ( ) ,
3600 , // cachetime
false , // post?
" text/html " ,
200 , // http status
NULL , // cookie
" UTF-8 " ) ;
}
// we get like 100k submissions a day!!!
static HashTable s_htable ;
static bool s_init = false ;
static long s_lastTime = 0 ;
bool canSubmit ( unsigned long h , long now , long maxAddUrlsPerIpDomPerDay ) {
// . sometimes no limit
// . 0 means no limit because if they don't want any submission they
// can just turn off add url and we want to avoid excess
// troubleshooting for why a url can't be added
if ( maxAddUrlsPerIpDomPerDay < = 0 ) return true ;
// init the table
if ( ! s_init ) {
s_htable . set ( 50000 ) ;
s_init = true ;
}
// clean out table every 24 hours
if ( now - s_lastTime > 24 * 60 * 60 ) {
s_lastTime = now ;
s_htable . clear ( ) ;
}
// . if table almost full clean out ALL slots
// . TODO: just clean out oldest slots
if ( s_htable . getNumSlotsUsed ( ) > 47000 ) s_htable . clear ( ) ;
// . how many times has this IP domain submitted?
// . allow 10 times per day
long n = s_htable . getValue ( h ) ;
// if over 24hr limit then bail
if ( n > = maxAddUrlsPerIpDomPerDay ) return false ;
// otherwise, inc it
n + + ;
// add to table, will replace old values
s_htable . addKey ( h , n ) ;
return true ;
}
void resetPageAddUrl ( ) {
s_htable . reset ( ) ;
}
2014-08-03 21:42:45 +04:00
bool sendPageAdvanced ( TcpSocket * sock , HttpRequest * hr ) {
SafeBuf sb ;
2014-09-02 00:31:13 +04:00
CollectionRec * cr = g_collectiondb . getRec ( hr ) ;
printFrontPageShell ( & sb , " advanced " , cr ) ;
2014-08-03 21:42:45 +04:00
sb . safePrintf ( " <br><br> \n " ) ;
sb . safePrintf ( " <br><br><br> \n " ) ;
// submit to https now
sb . safePrintf ( " <form method=GET "
" action=/search name=f> \n " ) ;
char * coll = " " ;
if ( cr ) coll = cr - > m_coll ;
if ( cr )
sb . safePrintf ( " <input type=hidden name=c value= \" %s \" > " ,
cr - > m_coll ) ;
sb . safePrintf (
" <script type=text/javascript> "
" <!-- "
" function x(){document.f.q.focus();} "
" // --> "
" </script> "
" </head> "
" "
" <body onload=x()> "
//"<form method=get action=/search>"
" <table width=605 border=0 align=center cellpadding=5 cellspacing=3> "
" <tbody> "
" <tr align=left valign=middle> "
" <th colspan=3>Search for...</th> "
" </tr> "
" <tr align=left valign=middle> "
" <td><strong>all</strong> of these words</td> "
2014-08-05 07:27:51 +04:00
" <td><input type=text name=plus size=40 /> "
" </td><td> "
" <div onclick=document.f.submit(); "
" onmouseover= \" "
" this.style.backgroundColor='lightgreen'; "
" this.style.color='black'; \" "
" onmouseout= \" "
" this.style.backgroundColor='green'; "
" this.style.color='white'; \" "
" style=border-radius:28px; "
" cursor:pointer; "
" cursor:hand; "
" border-color:white; "
" border-style:solid; "
" border-width:3px; "
" padding:12px; "
" width:20px; "
" height:20px; "
" display:inline-block; "
" background-color:green;color:white;> "
" <b style=margin-left:-5px;font-size:18px; "
" >GO</b> "
" </div> "
" </td> "
2014-08-03 21:42:45 +04:00
" </tr> "
" <tr align=left valign=middle> "
" <td>this <strong>exact phrase</strong></td> "
" <td colspan=2><input type=text name=quote1 size=40 /></td> "
" </tr> "
" <tr align=left valign=middle> "
" <td>and this <strong>exact phrase</strong></td> "
" <td colspan=2><input type=text name=quote2 size=40 /></td> "
" </tr> "
" <tr align=left valign=middle> "
" <td><strong>any</strong> of these words</td> "
" <td colspan=2><input type=text name=q size=40 /></td> "
" </tr> "
" <tr align=left valign=middle> "
" <td><strong>none</strong> of these words</td> "
" <td colspan=2><input type=text name=minus size=40 /></td> "
" </tr> "
" "
" <tr align=left valign=middle> "
" <td>In this language: "
" </td> "
" <td colspan=2> "
" <select name=gblang> "
" <option value=0>Any</option> "
" <option value=1>English</option> "
" <option value=2>French</option> "
" <option value=3>Spanish</option> "
" <option value=4>Russian</option> "
" <option value=5>Turkish</option> "
" <option value=6>Japanese</option> "
" <option value=7>ChineseTrad</option> "
" <option value=8>ChineseSimp</option> "
" <option value=9>Korean</option> "
" <option value=10>German</option> "
" <option value=11>Dutch</option> "
" <option value=12>Italian</option> "
" <option value=13>Finnish</option> "
" <option value=14>Swedish</option> "
" <option value=15>Norwegian</option> "
" <option value=16>Portuguese</option> "
" <option value=17>Vietnamese</option> "
" <option value=18>Arabic</option> "
" <option value=19>Hebrew</option> "
" <option value=20>Indonesian</option> "
" <option value=21>Greek</option> "
" <option value=22>Thai</option> "
" <option value=23>Hindi</option> "
" <option value=24>Bengala</option> "
" <option value=25>Polish</option> "
" <option value=26>Tagalog</option> "
" </select> "
" </td> "
" </tr> "
" "
" "
" <tr align=left valign=middle> "
" <td>Restrict to this URL</td> "
" <td colspan=2><input type=text name=url size=40 /></td> "
" </tr> "
" <tr align=left valign=middle> "
" <td>Pages that link to this URL</td> "
" <td colspan=2><input type=text name=link size=40 /></td> "
" </tr> "
" <tr align=left valign=middle> "
" <td>Site Clustering</td> "
" <td colspan=2><input type=radio name=sc value=1 checked=checked />yes <input type=radio name=sc value=0 />no</td> "
" </tr> "
" <tr align=left valign=middle> "
" <td>Number of summary excerpts</td> "
" <td colspan=2><input type=radio name=ns value=0 />0 <input type=radio name=ns value=1 />1 <input type=radio name=ns value=2 />2 <input type=radio name=ns value=3 checked=checked />3 <input type=radio name=ns value=4 />4 <input type=radio name=ns value=5 />5</td> "
" </tr> "
" <tr align=left valign=middle> "
" <td>Results per Page</td> "
" <td colspan=2><input type=radio name=n value=10 checked=checked />10 <input type=radio name=n value=20 />20 <input type=radio name=n value=30 />30 <input type=radio name=n value=40 />40 <input type=radio name=n value=50 />50 <input type=radio name=n value=100 />100</td> "
" </tr> "
" <tr align=left valign=middle> "
" <td>Restrict to these Sites</td> "
" <td colspan=2><textarea rows=10 cols=40 name=sites></textarea></td> "
" </tr> "
" </tbody> "
" </table> "
) ;
sb . safePrintf ( " </form> \n " ) ;
sb . safePrintf ( " <br> \n " ) ;
sb . safePrintf ( " \n " ) ;
sb . safePrintf ( " <br><br> \n " ) ;
printNav ( sb , hr ) ;
g_httpServer . sendDynamicPage ( sock ,
sb . getBufStart ( ) ,
sb . length ( ) ,
3600 , // cachetime
false , // post?
" text/html " ,
200 , // http status
NULL , // cookie
" UTF-8 " ) ;
return true ;
}
bool sendPageAbout ( TcpSocket * sock , HttpRequest * hr ) {
SafeBuf sb ;
2014-09-02 00:31:13 +04:00
CollectionRec * cr = g_collectiondb . getRec ( hr ) ;
printFrontPageShell ( & sb , " about " , cr ) ;
2014-08-03 21:42:45 +04:00
2014-08-04 04:25:22 +04:00
sb . safePrintf ( " <br> \n " ) ;
//sb.safePrintf("<br><br><br>\n");
2014-08-03 21:42:45 +04:00
// submit to https now
//sb.safePrintf("<form method=GET "
// "action=/addurl name=f>\n" );
char * coll = " " ;
if ( cr ) coll = cr - > m_coll ;
if ( cr )
sb . safePrintf ( " <input type=hidden name=c value= \" %s \" > " ,
cr - > m_coll ) ;
sb . safePrintf (
" <table width=100%% cellpadding=5 cellspacing=0 border=0> "
" <!--<tr bgcolor=#0340fd> "
" <th colspan=2> "
" <font color=33dcff> "
" About Gigablast</font> "
" </th> "
" </tr> "
" --> "
" <tr> "
" <td> "
" <br> "
" "
" <center> "
" <table width=650px> "
" <tr><td> "
" "
" <p>As of 2013, Gigablast is one of the remaining four search engines in the United States that maintains its own searchable index of over a billion pages. "
" "
" </p> "
" "
" <p>Founded in 2000, <a href=/bio.html>Matt Wells</a> created Gigablast to index up to 200 Billion pages "
" with the least amount of hardware possible. Gigablast provides large-scale, "
" high-performance, real-time information retrieval technology for partner "
" sites. The company offers a variety of features including topic generation "
" and the ability to index multiple document formats. This search delivery "
" mechanism gives a partner \" turn key \" search capability and "
" the capacity to instantly offer search at maximum scalability with minimum "
" cost. "
" Clients range from NASDAQ 100 listed corporations to boutique "
" companies. "
" </p> "
" "
" <p> "
2014-08-31 08:25:14 +04:00
" For more information, contact Matt directly at <br><img src=data:image/gif;base64,R0lGODlhLAEeAIAAAP///wAAACH5BAEAAAAALAAAAAAsAR4AAAL+hI+py+0Po5y02ouz3rz7D4biSJbmiabqyrbuC8fyTNf2jef6zqPB3wsKV7/Ao9gBHpEb5kkJcfqGPenCeikaG1ooBjvyOsAkMdVGPqQl3a0Caq66l3PTegjUAvT1ed1wFxgHqMeQF3eYd9U19ofQ1sjluDj5WMloWNg0qIaVaKSZ0Abp5dcX+sZJeImZSri4h/Q3GpnZmjooS0c3CUmpOoEqijp6a1l82Ol2uipZ2ZwZ+/z6ticdO2x87Cy8mqu9zd0LytldAU4thkwGp5QIrb6VJjg+HW5pjR0PTSnJL/pPmblrXAISjGIPoTxH7Uwpo+YK3kKJwww+bFQvmJn+QsnwQcTl72M/kNhsRfAErBMHJogcqqQIC6K7hROPDaTnjA1DWYrulfu27Fk3lqzsDU1ZUkPKUi6TfntZ8pNIWiYxihPaklQ2qCS7AgXTLufVX6qQLkHoUWbQaGpfcTyF7mBVW2XHScvKdevIpNnArp1Ltl7dlUabOv0b8bA3chsTyt3Lja2hu40Rd9XL96HfvPdArjF352ThtIpFQg3r7W5irNPm0YScOC9q2GlbWpx9ueLjdD8J19QrZfbTthIbWzx+kaTf1sw8umZ1+7XyoiFRSo95tirezMLtEj+4/Pkuul/LZ735ExzVvuLJn687emxg3aVXI3aSHitajEVnn5vX755g7wEoHy7MxRdZgrVZtuAjDQI2WRT7uWdVgVfwMtx7EtbS24QhiVULfaWcQeJuWZhVYooq8jCTYzGFtmKMMtbQYgY1zohjjjf48kVcOv4IpA8w8hJkkUYeiWSSSi7J5AgFAAA7> "
2014-08-03 21:42:45 +04:00
" </p> "
" <br> "
" <center> "
" <iframe width=420 height=315 src=http://www.youtube-nocookie.com/embed/hoUzcU76u3I frameborder=0 allowfullscreen></iframe> "
" </center> "
" </td></tr> "
" </table> "
" </center> "
" </table> "
) ;
sb . safePrintf ( " </form> \n " ) ;
sb . safePrintf ( " <br> \n " ) ;
sb . safePrintf ( " \n " ) ;
sb . safePrintf ( " <br><br> \n " ) ;
printNav ( sb , hr ) ;
g_httpServer . sendDynamicPage ( sock ,
sb . getBufStart ( ) ,
sb . length ( ) ,
3600 , // cachetime
false , // post?
" text/html " ,
200 , // http status
NULL , // cookie
" UTF-8 " ) ;
return true ;
}
bool sendPageHelp ( TcpSocket * sock , HttpRequest * hr ) {
SafeBuf sb ;
2014-09-02 00:31:13 +04:00
CollectionRec * cr = g_collectiondb . getRec ( hr ) ;
printFrontPageShell ( & sb , " syntax " , cr ) ;
2014-08-03 21:42:45 +04:00
sb . safePrintf ( " <br><br> \n " ) ;
sb . safePrintf ( " <br><br><br> \n " ) ;
// submit to https now
//sb.safePrintf("<form method=GET "
// "action=/addurl name=f>\n" );
// char *coll = "";
// if ( cr ) coll = cr->m_coll;
// if ( cr )
// sb.safePrintf("<input type=hidden name=c value=\"%s\">",
// cr->m_coll);
sb . safePrintf (
" <br> "
2014-08-08 09:02:55 +04:00
" <table width=650px cellpadding=5 cellspacing=0 border=0> "
2014-08-03 21:42:45 +04:00
" "
2014-08-28 09:21:30 +04:00
// yellow/gold bar
" <tr> "
" <td colspan=2 bgcolor=#f3c714> "
" <b> "
" Basic Query Syntax "
" </b> "
" </td> "
" </tr> \n "
2014-08-03 21:42:45 +04:00
" <tr bgcolor=#0340fd> "
" "
2014-08-08 19:04:41 +04:00
" <th><font color=33dcff>Example Query</font></th> "
2014-08-08 09:02:55 +04:00
" <th><font color=33dcff>Description</font></th> "
" </tr> "
" <tr> "
" <td><a href=/search?q=cat+dog>cat dog</a></td> "
2014-08-03 21:42:45 +04:00
" <td>Search results have the word <em>cat</em> and the word <em>dog</em> "
" in them. They could also have <i>cats</i> and <i>dogs</i>.</td> "
" </tr> "
" "
" "
" <tr bgcolor=#E1FFFF> "
" <td><a href=/search?q=%%2Bcat>+cat</a></td> "
" <td>Search results have the word <em>cat</em> in them. If the search results has the word <i>cats</i> then it will not be included. The plus sign indicates an exact match and not to use synonyms, hypernyms or hyponyms or any other form of the word.</td> "
" </tr> "
" "
" "
" <tr> "
" <td height=10><a href=/search?q=mp3+%%22take+five%%22>mp3 \" take five \" </a></td> "
" <td>Search results have the word <em>mp3</em> and the exact phrase <em>take "
" five</em> in them.</td> "
" </tr> "
" <tr bgcolor=#E1FFFF> "
" <td><a href=/search?q=%%22john+smith%%22+-%%22bob+dole%%22> \" john smith \" - \" bob dole \" </a></td> "
" <td>Search results have the phrase <em>john smith</em> but NOT the "
" phrase <em>bob dole</em> in them.</td> "
" </tr> "
" <tr> "
" <td><a href=/search?q=bmx+-game>bmx -game</a></td> "
" <td>Search results have the word <em>bmx</em> but not <em>game</em>.</td> "
" </tr> "
2014-08-08 18:48:11 +04:00
// " <tr> "
// " <td><a href=/search?q=inurl%%3Aedu+title%%3Auniversity><b>inurl:</b></a><a href=/search?q=inurl%%3Aedu+title%%3Auniversity>edu <b>title:</b>university</a></td>"
// " <td>Search results have <em>university</em> in their title and <em>edu</em> "
// " in their url.</td>"
// " </tr>"
// " <tr bgcolor=#E1FFFF> "
// " <td><a href=/search?q=site%%3Awww.ibm.com+%%22big+blue%%22><b>site:</b></a><a href=/search?q=site%%3Awww.ibm.com+%%22big+blue%%22>www.ibm.com \"big blue\"</a></td>"
// " <td>Search results are from the site <em>www.ibm.com</em> and have the phrase "
// " <em>big blue</em> in them.</td>"
// " </tr>"
// " <tr> "
// " <td><a href=/search?q=url%%3Awww.yahoo.com><b>url:</b></a><a href=/search?q=url%%3Awww.yahoo.com&n=10>www.yahoo.com</a></td>"
// " <td>Search result is the single URL www.yahoo.com, if it is indexed.</td>"
// " </tr>"
// " <tr bgcolor=#E1FFFF> "
// " <td><nobr><a href=/search?q=title%%3A%%22the+news%%22+-%%22weather+report%%22><b>title:</b>\"the "
// " news\" -\"weather report\"</a></nobr></td>"
// " <td>Search results have the phrase <em>the news</em> in their title, "
// " and do NOT have the phrase <em>weather report</em> anywhere in their "
// " content.</td>"
// " </tr>"
// " <tr> "
// " <td><a href=/search?q=ip%%3A216.32.120+cars><b>ip:</b></a><a href=/search?q=ip%%3A216.32.120>216.32.120</a></td>"
// " <td>Search results are from the the ip 216.32.120.*.</td>"
// " </tr>"
// ""
// " <tr bgcolor=#E1FFFF> "
// " <td><a href=/search?q=type%%3Apdf+nutrition><b>type:</b>pdf nutrition</a></td>"
// " <td>Search results are PDF (Portable Document Format) documents that "
// " contain the word <em>nutrition</em>.</td>"
// " </tr>"
// " <tr> "
// " <td><a href=/search?q=type%%3Adoc><b>type:</b>doc</a></td>"
// " <td>Search results are Microsoft Word documents.</td>"
// " </tr>"
// " <tr bgcolor=#E1FFFF> "
// " <td><a href=/search?q=type%%3Axls><b>type:</b>xls</a></td>"
// " <td>Search results are Microsoft Excel documents.</td>"
// " </tr>"
// " <tr> "
// " <td><a href=/search?q=type%%3Appt><b>type:</b>ppt</a></td>"
// " <td>Search results are Microsoft Power Point documents.</td>"
// " </tr>"
// " <tr bgcolor=#E1FFFF> "
// " <td><a href=/search?q=type%%3Aps><b>type:</b>ps</a></td>"
// " <td>Search results are Postscript documents.</td>"
// " </tr>"
// " <tr> "
// " <td><a href=/search?q=type%%3Atext><b>type:</b>text</a></td>"
// " <td>Search results are plain text documents.</td>"
// " </tr>"
// " <tr bgcolor=#E1FFFF> "
// " <td><a href=/search?q=filetype%%3Apdf><b>filetype:</b>pdf</a></td>"
// " <td>Search results are PDF documents.</td>"
// " </tr>"
// ""
// ""
// " <tr bgcolor=#E1FFFF> "
// " <td><a href=/search?q=link%%3Awww.yahoo.com><b>link:</b>www.yahoo.com</a></td>"
// " <td>All the pages that link to www.yahoo.com.</td>"
// " </tr>"
// ""
// " <tr bgcolor=#E1FFFF> "
// " <td><a href=/search?q=sitelink%%3Awww.yahoo.com><b>sitelink:</b>www.yahoo.com</a></td>"
// " <td>All the pages that link to any page on www.yahoo.com.</td>"
// " </tr>"
// ""
// " <tr bgcolor=#E1FFFF> "
// " <td><a href=/search?q=ext%%3Atxt><b>ext:</b>txt</a></td>"
// " <td>All the pages whose url ends in the .txt extension.</td>"
// " </tr>"
// ""
// ""
2014-08-08 09:02:55 +04:00
) ;
sb . safePrintf (
// spacer
//"<tr><td><br></td><td></td></tr>"
//"<tr bgcolor=#0340fd>"
// "<td><font color=33dcff><b>Special Query</b>"
// "</font></td>"
//"<td><font color=33dcff><b>Description</b></font></td>"
// "</tr>"
2014-08-08 18:48:11 +04:00
" <tr bgcolor=#E1FFFF> "
2014-08-08 19:04:41 +04:00
" <td>cat | dog</td><td> "
" Match documents that have cat and dog in them, but "
" do not allow cat to affect the ranking score, only "
" dog. This is called a <i>query refinement</i>. "
" </td></tr> \n "
" <tr bgcolor=#ffFFFF> "
2014-08-08 18:48:11 +04:00
" <td>document.title:paper</td><td> "
2014-08-08 09:02:55 +04:00
" That query will match a JSON document like "
" <i> "
" { \" document \" :{ \" title \" : \" This is a good paper. \" "
" }}</i> or, alternatively, an XML document like <i> "
2014-08-08 19:04:41 +04:00
2014-08-08 09:02:55 +04:00
) ;
sb . htmlEncode ( " <document><title>This is a good paper "
" </title></document> " ) ;
sb . safePrintf ( " </i></td></tr> \n " ) ;
2014-08-08 18:48:11 +04:00
char * bg1 = " #E1FFFF " ;
char * bg2 = " #ffffff " ;
2014-08-08 19:04:41 +04:00
char * bgcolor = bg1 ;
2014-08-08 18:48:11 +04:00
2014-08-08 09:02:55 +04:00
// table of the query keywords
long n = getNumFieldCodes ( ) ;
for ( long i = 0 ; i < n ; i + + ) {
// get field #i
QueryField * f = & g_fields [ i ] ;
2014-08-08 18:48:11 +04:00
if ( g_fields [ i ] . m_flag & QTF_HIDE ) continue ;
2014-08-28 09:10:27 +04:00
// new table?
if ( g_fields [ i ] . m_flag & QTF_BEGINNEWTABLE ) {
sb . safePrintf ( " </table> "
2014-08-28 09:21:30 +04:00
" <br> "
" <br> "
2014-08-28 09:10:27 +04:00
" <br> "
" <table width=650px "
" cellpadding=5 cellspacing=0 border=0> "
2014-08-28 09:21:30 +04:00
// yellow/gold bar
2014-08-28 09:10:27 +04:00
" <tr> "
" <td colspan=2 bgcolor=#f3c714> "
2014-08-28 09:21:30 +04:00
" <b> "
2014-08-28 09:10:27 +04:00
" %s "
2014-08-28 09:21:30 +04:00
" </b> "
2014-08-28 09:10:27 +04:00
" </td> "
" </tr> \n "
" <tr bgcolor=#0340fd> "
" <th><font color=33dcff> "
" Example Query</font></th> "
" <th><font color=33dcff> "
" Description</font></th> "
" </tr> \n "
, g_fields [ i ] . m_title
) ;
}
2014-08-08 09:02:55 +04:00
// print it out
char * d = f - > desc ;
// fix table internal cell bordering
2014-08-08 18:48:11 +04:00
if ( ! d | | d [ 0 ] = = ' \0 ' ) d = " " ;
sb . safePrintf ( " <tr bgcolor=%s> "
" <td><nobr><a href= \" /search?q= "
, bgcolor ) ;
sb . urlEncode ( f - > example ) ;
sb . safePrintf ( " \" > " ) ;
sb . safePrintf ( " %s</a></nobr></td> "
" <td>%s</td></tr> \n " ,
f - > example ,
d ) ;
if ( bgcolor = = bg1 ) bgcolor = bg2 ;
else bgcolor = bg1 ;
2014-08-08 09:02:55 +04:00
}
sb . safePrintf (
2014-08-28 09:21:30 +04:00
// " <tr> "
// " <td style=padding-bottom:12px;> </td>"
// " <td style=padding-bottom:12px;> </td>"
// " </tr>"
// ""
" </table> "
" <br><br><br> "
" <table width=650px "
" cellpadding=5 cellspacing=0 border=0> "
// yellow/gold bar
" <tr> "
" <td colspan=2 bgcolor=#f3c714> "
" <b> "
" Boolean Queries "
" </b> "
" </td> "
" </tr> \n "
2014-08-08 09:02:55 +04:00
2014-08-03 21:42:45 +04:00
" <tr bgcolor=#0340fd> "
" "
2014-08-28 09:21:30 +04:00
" <th><font color=33dcff>Example Query</font></th> "
2014-08-03 21:42:45 +04:00
" <th><font color=33dcff>Description</font></th> "
" "
" </tr> "
" "
" <tr> "
" <td colspan=2 bgcolor=#FFFFCC><center> "
" Note: boolean operators must be in UPPER CASE. "
" </td> "
" </tr> "
" <tr> "
" <td><a href=/search?q=cat+AND+dog>cat AND dog</a></td> "
" <td>Search results have the word <em>cat</em> AND the word <em>dog</em> "
" in them.</td> "
" </tr> "
" <tr bgcolor=#E1FFFF> "
" <td><a href=/search?q=cat+OR+dog>cat OR dog</a></td> "
" <td>Search results have the word <em>cat</em> OR the word <em>dog</em> "
" in them, but preference is given to results that have both words.</td> "
" </tr> "
" <tr> "
" <td><a href=/search?q=cat+dog+OR+pig>cat dog OR pig</a></td> "
" <td>Search results have the two words <em>cat</em> and <em>dog</em> "
" OR search results have the word <em>pig</em>, but preference is "
" given to results that have all three words. This illustrates how "
" the individual words of one operand are all required for that operand "
" to be true.</td> "
" </tr> "
" <tr bgcolor=#E1FFFF> "
" <td><a href=/search?q=%%22cat+dog%%22+OR+pig> \" cat dog \" OR pig</a></td> "
" <td>Search results have the phrase <em> \" cat dog \" </em> in them OR they "
" have the word <em>pig</em>, but preference is given to results that "
" have both.</td> "
" </tr> "
" <tr> "
" <td><a href=/search?q=title%%3A%%22cat+dog%%22+OR+pig>title</a><a href=/search?q=title%%3A%%22cat+dog%%22+OR+pig>: \" cat "
" dog \" OR pig</a></td> "
" <td>Search results have the phrase <em> \" cat dog \" </em> in their title "
" OR they have the word <em>pig</em>, but preference is given to results "
" that have both.</td> "
" </tr> "
" <tr bgcolor=#E1FFFF> "
" <td><a href=/search?q=cat+OR+dog+OR+pig>cat OR dog OR pig</a></td> "
" <td>Search results need only have one word, <em>cat</em> or <em>dog</em> "
" or <em>pig</em>, but preference is given to results that have the "
" most of the words.</td> "
" </tr> "
" <tr> "
" <td><a href=/search?q=cat+OR+dog+AND+pig>cat OR dog AND pig</a></td> "
" <td>Search results have <em>dog</em> and <em>pig</em>, but they may "
" or may not have <em>cat</em>. Preference is given to results that "
" have all three. To evaluate expressions with more than two operands, "
" as in this case where we have three, you can divide the expression "
" up into sub-expressions that consist of only one operator each. "
" In this case we would have the following two sub-expressions: <em>cat "
" OR dog</em> and <em>dog AND pig</em>. Then, for the original expression "
" to be true, at least one of the sub-expressions that have an OR "
" operator must be true, and, in addition, all of the sub-expressions "
" that have AND operators must be true. Using this logic you can evaluate "
" expressions with more than one boolean operator.</td> "
" </tr> "
" <tr bgcolor=#E1FFFF> "
" <td><a href=/search?q=cat+AND+NOT+dog>cat AND NOT dog</a></td> "
" <td>Search results have <em>cat</em> but do not have <em>dog</em>.</td> "
" </tr> "
" <tr> "
" <td><a href=/search?q=cat+AND+NOT+%%28dog+OR+pig%%29>cat AND NOT (dog OR pig)</a></td> "
" <td>Search results have <em>cat</em> but do not have <em>dog</em> "
" and do not have <em>pig</em>. When evaluating a boolean expression "
" that contains ()'s you can evaluate the sub-expression in the ()'s "
" first. So if a document has <em>dog</em> or it has <em>pig</em> "
" or it has both, then the expression, <em>(dog OR pig)</em> would "
" be true. So you could, in this case, substitute <em>true</em> for "
" that expression to get the following: <em>cat AND NOT (true) = cat "
" AND false = false</em>. Does anyone actually read this far?</td> "
" </tr> "
" <tr bgcolor=#E1FFFF> "
" <td><a href=/search?q=%%28cat+OR+dog%%29+AND+NOT+%%28cat+AND+dog%%29>(cat OR dog) AND NOT (cat AND dog)</a></td> "
" <td>Search results have <em>cat</em> or <em>dog</em> but not both.</td> "
" </tr> "
" <tr> "
" <td>left-operand OPERATOR right-operand</td> "
" <td>This is the general format of a boolean expression. The possible "
" operators are: OR and AND. The operands can themselves be boolean "
" expressions and can be optionally enclosed in parentheses. A NOT "
" operator can optionally preceed the left or the right operand.</td> "
" </tr> "
" "
2014-08-08 09:02:55 +04:00
//" </table>"
2014-08-03 21:42:45 +04:00
" "
" "
" "
2014-08-08 09:02:55 +04:00
//"</td></tr>"
//"</table>"
//"<br>"
2014-08-03 21:42:45 +04:00
) ;
2014-08-08 09:02:55 +04:00
//sb.safePrintf("<tr><td></td><td></td></tr>\n");
//sb.safePrintf("<tr><td></td><td></td></tr>\n");
//sb.safePrintf("<tr><td></td><td></td></tr>\n");
//sb.safePrintf("<tr><td></td><td></td></tr>\n");
sb . safePrintf ( " </table> " ) ;
2014-08-03 21:42:45 +04:00
//sb.safePrintf("</form>\n");
sb . safePrintf ( " <br> \n " ) ;
sb . safePrintf ( " \n " ) ;
sb . safePrintf ( " <br><br> \n " ) ;
printNav ( sb , hr ) ;
g_httpServer . sendDynamicPage ( sock ,
sb . getBufStart ( ) ,
sb . length ( ) ,
3600 , // cachetime
false , // post?
" text/html " ,
200 , // http status
NULL , // cookie
" UTF-8 " ) ;
return true ;
}