mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 12:17:35 +03:00
Merge branch 'testing' into diffbot-testing
This commit is contained in:
commit
23d26e26ba
@ -1686,6 +1686,7 @@ CollectionRec::CollectionRec() {
|
|||||||
// inits for sortbydatetable
|
// inits for sortbydatetable
|
||||||
m_inProgress = false;
|
m_inProgress = false;
|
||||||
m_msg5 = NULL;
|
m_msg5 = NULL;
|
||||||
|
m_importState = NULL;
|
||||||
// JAB - track which regex parsers have been initialized
|
// JAB - track which regex parsers have been initialized
|
||||||
//log(LOG_DEBUG,"regex: %p initalizing empty parsers", m_pRegExParser);
|
//log(LOG_DEBUG,"regex: %p initalizing empty parsers", m_pRegExParser);
|
||||||
|
|
||||||
|
@ -501,6 +501,7 @@ class CollectionRec {
|
|||||||
char m_enforceNewQuotas ;
|
char m_enforceNewQuotas ;
|
||||||
char m_doIpLookups ; // considered iff using proxy
|
char m_doIpLookups ; // considered iff using proxy
|
||||||
char m_useRobotsTxt ;
|
char m_useRobotsTxt ;
|
||||||
|
char m_forceUseFloaters ;
|
||||||
//char m_restrictDomain ; // say on same domain as seeds?
|
//char m_restrictDomain ; // say on same domain as seeds?
|
||||||
char m_doTuringTest ; // for addurl
|
char m_doTuringTest ; // for addurl
|
||||||
char m_applyFilterToText ; // speeds us up
|
char m_applyFilterToText ; // speeds us up
|
||||||
@ -566,6 +567,8 @@ class CollectionRec {
|
|||||||
long m_numImportInjects;
|
long m_numImportInjects;
|
||||||
class ImportState *m_importState;
|
class ImportState *m_importState;
|
||||||
|
|
||||||
|
SafeBuf m_collectionPasswords;
|
||||||
|
SafeBuf m_collectionIps;
|
||||||
|
|
||||||
// from Conf.h
|
// from Conf.h
|
||||||
long m_posdbMinFilesToMerge ;
|
long m_posdbMinFilesToMerge ;
|
||||||
|
140
Conf.cpp
140
Conf.cpp
@ -88,17 +88,96 @@ bool Conf::isMasterAdmin ( TcpSocket *s , HttpRequest *r ) {
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
bool Conf::isCollAdmin ( TcpSocket *socket , HttpRequest *hr ) {
|
bool isInWhiteSpaceList ( char *p , char *buf ) {
|
||||||
// until we have coll tokens use this...
|
|
||||||
return isRootAdmin ( socket , hr );
|
if ( ! p ) return false;
|
||||||
|
|
||||||
|
char *match = strstr ( buf , p );
|
||||||
|
if ( ! match ) return false;
|
||||||
|
|
||||||
|
long len = gbstrlen(p);
|
||||||
|
|
||||||
|
// ensure book-ended by whitespace
|
||||||
|
if ( match &&
|
||||||
|
(match == buf || is_wspace_a(match[-1])) &&
|
||||||
|
(!match[len] || is_wspace_a(match[len])) )
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// no match
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Conf::isCollAdmin ( TcpSocket *sock , HttpRequest *hr ) {
|
||||||
|
|
||||||
|
// until we have coll tokens use this...
|
||||||
|
//return isRootAdmin ( socket , hr );
|
||||||
|
|
||||||
|
// root always does
|
||||||
|
if ( isRootAdmin ( sock , hr ) ) return true;
|
||||||
|
|
||||||
|
CollectionRec *cr = g_collectiondb.getRec ( hr , true );
|
||||||
|
if ( ! cr ) return false;
|
||||||
|
|
||||||
|
return isCollAdmin2 ( sock , hr , cr );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Conf::isCollAdminForColl ( TcpSocket *sock, HttpRequest *hr, char *coll ){
|
||||||
|
|
||||||
|
CollectionRec *cr = g_collectiondb.getRec ( coll );
|
||||||
|
|
||||||
|
if ( ! cr ) return false;
|
||||||
|
|
||||||
|
return isCollAdmin2 ( sock , hr , cr );
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Conf::isCollAdmin2 ( TcpSocket *sock ,
|
||||||
|
HttpRequest *hr ,
|
||||||
|
CollectionRec *cr ) {
|
||||||
|
|
||||||
|
if ( ! cr ) return false;
|
||||||
|
|
||||||
|
//long page = g_pages.getDynamicPageNumber(hr);
|
||||||
|
|
||||||
|
// never for main or dmoz! must be root!
|
||||||
|
if ( strcmp(cr->m_coll,"main")==0 ) return false;
|
||||||
|
if ( strcmp(cr->m_coll,"dmoz")==0 ) return false;
|
||||||
|
|
||||||
|
// empty password field? then allow them through
|
||||||
|
if ( cr->m_collectionPasswords.length() <= 0 &&
|
||||||
|
cr->m_collectionIps .length() <= 0 )
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// a good ip?
|
||||||
|
char *p = iptoa(sock->m_ip);
|
||||||
|
char *buf = cr->m_collectionIps.getBufStart();
|
||||||
|
if ( isInWhiteSpaceList ( p , buf ) ) return true;
|
||||||
|
|
||||||
|
// if they got the password, let them in
|
||||||
|
p = hr->getString("pwd");
|
||||||
|
if ( ! p ) p = hr->getString("password");
|
||||||
|
if ( ! p ) p = hr->getStringFromCookie("pwd");
|
||||||
|
if ( ! p ) return false;
|
||||||
|
buf = cr->m_collectionPasswords.getBufStart();
|
||||||
|
if ( isInWhiteSpaceList ( p , buf ) ) return true;
|
||||||
|
|
||||||
|
// the very act of just knowing the collname of a guest account
|
||||||
|
// is good enough to update it
|
||||||
|
//if ( strncmp ( cr->m_coll , "guest_" , 6 ) == 0 )
|
||||||
|
// return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// . is user a root administrator?
|
// . is user a root administrator?
|
||||||
// . only need to be from root IP *OR* have password, not both
|
// . only need to be from root IP *OR* have password, not both
|
||||||
bool Conf::isRootAdmin ( TcpSocket *socket , HttpRequest *hr ) {
|
bool Conf::isRootAdmin ( TcpSocket *socket , HttpRequest *hr ) {
|
||||||
|
|
||||||
// totally open access?
|
// totally open access?
|
||||||
if ( m_numConnectIps <= 0 && m_numMasterPwds <= 0 )
|
//if ( m_numConnectIps <= 0 && m_numMasterPwds <= 0 )
|
||||||
|
if ( m_connectIps.length() <= 0 &&
|
||||||
|
m_masterPwds.length() <= 0 )
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// coming from root gets you in
|
// coming from root gets you in
|
||||||
@ -114,7 +193,9 @@ bool Conf::isRootAdmin ( TcpSocket *socket , HttpRequest *hr ) {
|
|||||||
|
|
||||||
bool Conf::hasRootPwd ( HttpRequest *hr ) {
|
bool Conf::hasRootPwd ( HttpRequest *hr ) {
|
||||||
|
|
||||||
if ( m_numMasterPwds == 0 ) return false;
|
//if ( m_numMasterPwds == 0 ) return false;
|
||||||
|
if ( m_masterPwds.length() <= 0 )
|
||||||
|
return false;
|
||||||
|
|
||||||
char *p = hr->getString("pwd");
|
char *p = hr->getString("pwd");
|
||||||
|
|
||||||
@ -124,43 +205,46 @@ bool Conf::hasRootPwd ( HttpRequest *hr ) {
|
|||||||
|
|
||||||
if ( ! p ) return false;
|
if ( ! p ) return false;
|
||||||
|
|
||||||
for ( long i = 0 ; i < m_numMasterPwds ; i++ ) {
|
char *buf = m_masterPwds.getBufStart();
|
||||||
if ( strcmp ( m_masterPwds[i], p ) != 0 ) continue;
|
|
||||||
// we got a match
|
return isInWhiteSpaceList ( p , buf );
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// . check this ip in the list of admin ips
|
// . check this ip in the list of admin ips
|
||||||
bool Conf::isRootIp ( unsigned long ip ) {
|
bool Conf::isRootIp ( unsigned long ip ) {
|
||||||
|
|
||||||
//if ( m_numMasterIps == 0 ) return false;
|
//if ( m_numMasterIps == 0 ) return false;
|
||||||
if ( m_numConnectIps == 0 ) return false;
|
//if ( m_numConnectIps == 0 ) return false;
|
||||||
|
if ( m_connectIps.length() <= 0 ) return false;
|
||||||
|
|
||||||
for ( long i = 0 ; i < m_numConnectIps ; i++ )
|
// for ( long i = 0 ; i < m_numConnectIps ; i++ )
|
||||||
if ( m_connectIps[i] == (long)ip )
|
// if ( m_connectIps[i] == (long)ip )
|
||||||
return true;
|
// return true;
|
||||||
|
|
||||||
//if ( ip == atoip("10.5.0.2",8) ) return true;
|
//if ( ip == atoip("10.5.0.2",8) ) return true;
|
||||||
|
|
||||||
// no match
|
char *p = iptoa(ip);
|
||||||
return false;
|
char *buf = m_connectIps.getBufStart();
|
||||||
|
|
||||||
|
return isInWhiteSpaceList ( p , buf );
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Conf::isConnectIp ( unsigned long ip ) {
|
bool Conf::isConnectIp ( unsigned long ip ) {
|
||||||
for ( long i = 0 ; i < m_numConnectIps ; i++ ) {
|
|
||||||
if ( m_connectIps[i] == (long)ip )
|
return isRootIp(ip);
|
||||||
return true;
|
|
||||||
// . 1.2.3.0 ips mean the whole block
|
// for ( long i = 0 ; i < m_numConnectIps ; i++ ) {
|
||||||
// . the high byte in the long is the Least Signficant Byte
|
// if ( m_connectIps[i] == (long)ip )
|
||||||
if ( (m_connectIps[i] >> 24) == 0 &&
|
// return true;
|
||||||
(m_connectIps[i] & 0x00ffffff) ==
|
// // . 1.2.3.0 ips mean the whole block
|
||||||
((long)ip & 0x00ffffff) )
|
// // . the high byte in the long is the Least Signficant Byte
|
||||||
return true;
|
// if ( (m_connectIps[i] >> 24) == 0 &&
|
||||||
}
|
// (m_connectIps[i] & 0x00ffffff) ==
|
||||||
|
// ((long)ip & 0x00ffffff) )
|
||||||
|
// return true;
|
||||||
|
// }
|
||||||
// no match
|
// no match
|
||||||
return false;
|
//return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// . set all member vars to their default values
|
// . set all member vars to their default values
|
||||||
|
15
Conf.h
15
Conf.h
@ -50,6 +50,10 @@ class Conf {
|
|||||||
Conf();
|
Conf();
|
||||||
|
|
||||||
bool isCollAdmin ( TcpSocket *socket , HttpRequest *hr ) ;
|
bool isCollAdmin ( TcpSocket *socket , HttpRequest *hr ) ;
|
||||||
|
bool isCollAdminForColl (TcpSocket *sock, HttpRequest *hr,char *coll );
|
||||||
|
bool isCollAdmin2 (TcpSocket *socket , HttpRequest *hr,
|
||||||
|
class CollectionRec *cr) ;
|
||||||
|
|
||||||
|
|
||||||
bool isRootAdmin ( TcpSocket *socket , HttpRequest *hr ) ;
|
bool isRootAdmin ( TcpSocket *socket , HttpRequest *hr ) ;
|
||||||
//bool isMasterAdmin ( class TcpSocket *s , class HttpRequest *r );
|
//bool isMasterAdmin ( class TcpSocket *s , class HttpRequest *r );
|
||||||
@ -686,14 +690,17 @@ class Conf {
|
|||||||
// programmer reminders.
|
// programmer reminders.
|
||||||
bool m_logReminders;
|
bool m_logReminders;
|
||||||
|
|
||||||
long m_numMasterPwds;
|
//long m_numMasterPwds;
|
||||||
char m_masterPwds[MAX_MASTER_PASSWORDS][PASSWORD_MAX_LEN];
|
//char m_masterPwds[MAX_MASTER_PASSWORDS][PASSWORD_MAX_LEN];
|
||||||
|
SafeBuf m_masterPwds;
|
||||||
|
|
||||||
//long m_numMasterIps;
|
//long m_numMasterIps;
|
||||||
//long m_masterIps[MAX_MASTER_IPS];
|
//long m_masterIps[MAX_MASTER_IPS];
|
||||||
|
|
||||||
// these are the new master ips
|
// these are the new master ips
|
||||||
long m_numConnectIps;
|
//long m_numConnectIps;
|
||||||
long m_connectIps [ MAX_CONNECT_IPS ];
|
//long m_connectIps [ MAX_CONNECT_IPS ];
|
||||||
|
SafeBuf m_connectIps;
|
||||||
|
|
||||||
// should we generate similarity/content vector for titleRecs lacking?
|
// should we generate similarity/content vector for titleRecs lacking?
|
||||||
// this takes a ~100+ ms, very expensive, so it is just meant for
|
// this takes a ~100+ ms, very expensive, so it is just meant for
|
||||||
|
32
File.cpp
32
File.cpp
@ -618,27 +618,41 @@ bool File::closeLeastUsed () {
|
|||||||
long File::getFileSize ( ) {
|
long File::getFileSize ( ) {
|
||||||
|
|
||||||
// allow the substitution of another filename
|
// allow the substitution of another filename
|
||||||
struct stat stats;
|
//struct stat stats;
|
||||||
|
|
||||||
stats.st_size = 0;
|
//stats.st_size = 0;
|
||||||
|
|
||||||
int status = stat ( m_filename , &stats );
|
//int status = stat ( m_filename , &stats );
|
||||||
|
|
||||||
|
FILE *fd = fopen ( m_filename , "r" );
|
||||||
|
if ( ! fd ) {
|
||||||
|
log("disk: error getFileSize(%s) : %s",
|
||||||
|
m_filename , strerror(g_errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fseek(fd,0,SEEK_END);
|
||||||
|
long fileSize = ftell ( fd );
|
||||||
|
|
||||||
|
fclose ( fd );
|
||||||
|
|
||||||
|
return fileSize;
|
||||||
|
|
||||||
// return the size if the status was ok
|
// return the size if the status was ok
|
||||||
if ( status == 0 ) return stats.st_size;
|
//if ( status == 0 ) return stats.st_size;
|
||||||
|
|
||||||
// copy errno to g_errno
|
// copy errno to g_errno
|
||||||
g_errno = errno;
|
//g_errno = errno;
|
||||||
|
|
||||||
// return 0 and reset g_errno if it just does not exist
|
// return 0 and reset g_errno if it just does not exist
|
||||||
if ( g_errno == ENOENT ) { g_errno = 0; return 0; }
|
//if ( g_errno == ENOENT ) { g_errno = 0; return 0; }
|
||||||
|
|
||||||
// resource temporarily unavailable (for newer libc)
|
// resource temporarily unavailable (for newer libc)
|
||||||
if ( g_errno == EAGAIN ) { g_errno = 0; return 0; }
|
//if ( g_errno == EAGAIN ) { g_errno = 0; return 0; }
|
||||||
|
|
||||||
// log & return -1 on any other error
|
// log & return -1 on any other error
|
||||||
log("disk: error getFileSize(%s) : %s",m_filename , strerror(g_errno));
|
//log("disk: error getFileSize(%s) : %s",m_filename,strerror(g_errno));
|
||||||
return -1;
|
//return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// . return 0 on error
|
// . return 0 on error
|
||||||
|
@ -432,7 +432,6 @@ bool HashTableX::load ( char *dir , char *filename , SafeBuf *fillBuf ) {
|
|||||||
|
|
||||||
// both return false and set g_errno on error, true otherwise
|
// both return false and set g_errno on error, true otherwise
|
||||||
bool HashTableX::load ( char *dir, char *filename, char **tbuf, long *tsize ) {
|
bool HashTableX::load ( char *dir, char *filename, char **tbuf, long *tsize ) {
|
||||||
reset();
|
|
||||||
File f;
|
File f;
|
||||||
f.set ( dir , filename );
|
f.set ( dir , filename );
|
||||||
if ( ! f.doesExist() ) return false;
|
if ( ! f.doesExist() ) return false;
|
||||||
@ -447,10 +446,27 @@ bool HashTableX::load ( char *dir, char *filename, char **tbuf, long *tsize ) {
|
|||||||
off += 4;
|
off += 4;
|
||||||
if ( ! f.read ( &numSlotsUsed , 4 , off ) ) return false;
|
if ( ! f.read ( &numSlotsUsed , 4 , off ) ) return false;
|
||||||
off += 4;
|
off += 4;
|
||||||
if ( ! f.read ( &m_ks , 4 , off ) ) return false;
|
long ks;
|
||||||
|
if ( ! f.read ( &ks , 4 , off ) ) return false;
|
||||||
off += 4;
|
off += 4;
|
||||||
if ( ! f.read ( &m_ds , 4 , off ) ) return false;
|
long ds;
|
||||||
|
if ( ! f.read ( &ds , 4 , off ) ) return false;
|
||||||
off += 4;
|
off += 4;
|
||||||
|
|
||||||
|
// bogus key size?
|
||||||
|
if ( ks <= 0 ) {
|
||||||
|
log("htable: reading hashtable from %s%s: "
|
||||||
|
"bogus keysize of %li",
|
||||||
|
dir,filename,ks );
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// just in case m_ks was already set, call reset() down here
|
||||||
|
reset();
|
||||||
|
|
||||||
|
m_ks = ks;
|
||||||
|
m_ds = ds;
|
||||||
|
|
||||||
if ( ! setTableSize ( numSlots , NULL , 0 ) ) return false;
|
if ( ! setTableSize ( numSlots , NULL , 0 ) ) return false;
|
||||||
if ( ! f.read ( m_keys , numSlots * m_ks , off ) ) return false;
|
if ( ! f.read ( m_keys , numSlots * m_ks , off ) ) return false;
|
||||||
off += numSlots * m_ks;
|
off += numSlots * m_ks;
|
||||||
|
@ -893,11 +893,11 @@ bool HttpRequest::set ( char *origReq , long origReqLen , TcpSocket *sock ) {
|
|||||||
|
|
||||||
|
|
||||||
// connectips/adminips
|
// connectips/adminips
|
||||||
for ( long i = 0 ; i < g_conf.m_numConnectIps ; i++ ) {
|
// for ( long i = 0 ; i < g_conf.m_numConnectIps ; i++ ) {
|
||||||
if ( sock->m_ip != g_conf.m_connectIps[i] ) continue;
|
// if ( sock->m_ip != g_conf.m_connectIps[i] ) continue;
|
||||||
m_isLocal = true;
|
// m_isLocal = true;
|
||||||
break;
|
// break;
|
||||||
}
|
// }
|
||||||
|
|
||||||
// roadrunner ip
|
// roadrunner ip
|
||||||
// if ( sock && strncmp(iptoa(sock->m_ip),"66.162.42.131",13) == 0)
|
// if ( sock && strncmp(iptoa(sock->m_ip),"66.162.42.131",13) == 0)
|
||||||
|
@ -1194,8 +1194,8 @@ bool HttpServer::sendReply ( TcpSocket *s , HttpRequest *r , bool isAdmin) {
|
|||||||
//if ( ! strncmp ( path ,"/help.html", pathLen ) )
|
//if ( ! strncmp ( path ,"/help.html", pathLen ) )
|
||||||
// return sendPageAbout ( s , r , path );
|
// return sendPageAbout ( s , r , path );
|
||||||
|
|
||||||
if ( ! strncmp ( path ,"/adv.html", pathLen ) )
|
//if ( ! strncmp ( path ,"/adv.html", pathLen ) )
|
||||||
return sendPageAdvanced ( s , r );
|
// return sendPageAdvanced ( s , r );
|
||||||
|
|
||||||
//if ( ! strncmp ( path ,"/about.html", pathLen ) )
|
//if ( ! strncmp ( path ,"/about.html", pathLen ) )
|
||||||
// return sendPageAbout ( s , r );
|
// return sendPageAbout ( s , r );
|
||||||
@ -1208,6 +1208,9 @@ bool HttpServer::sendReply ( TcpSocket *s , HttpRequest *r , bool isAdmin) {
|
|||||||
if ( ! strncmp ( path ,"/widgets.html", pathLen ) )
|
if ( ! strncmp ( path ,"/widgets.html", pathLen ) )
|
||||||
return sendPageWidgets ( s , r );
|
return sendPageWidgets ( s , r );
|
||||||
|
|
||||||
|
if ( ! strncmp ( path ,"/adv.html", pathLen ) )
|
||||||
|
return sendPagePretty ( s , r,"adv.html","advanced");
|
||||||
|
|
||||||
// who uses gigablast?
|
// who uses gigablast?
|
||||||
if ( ! strncmp ( path ,"/users.html", pathLen ) )
|
if ( ! strncmp ( path ,"/users.html", pathLen ) )
|
||||||
return sendPagePretty ( s , r,"users.html","users"); // special
|
return sendPagePretty ( s , r,"users.html","users"); // special
|
||||||
|
14
HttpServer.h
14
HttpServer.h
@ -190,10 +190,12 @@ class HttpServer {
|
|||||||
//header to reflect the new size and encoding
|
//header to reflect the new size and encoding
|
||||||
TcpSocket *unzipReply(TcpSocket* s);
|
TcpSocket *unzipReply(TcpSocket* s);
|
||||||
|
|
||||||
float getCompressionRatio()
|
float getCompressionRatio() {
|
||||||
{return (float)m_uncompressedBytes/m_bytesDownloaded;}
|
if ( m_bytesDownloaded )
|
||||||
|
return (float)m_uncompressedBytes/m_bytesDownloaded;
|
||||||
|
else
|
||||||
|
return 0.0;
|
||||||
|
};
|
||||||
|
|
||||||
//this is for low priority requests which come in while we are
|
//this is for low priority requests which come in while we are
|
||||||
//in a quickpoll
|
//in a quickpoll
|
||||||
@ -225,8 +227,8 @@ class HttpServer {
|
|||||||
void *states[MAX_DOWNLOADS];
|
void *states[MAX_DOWNLOADS];
|
||||||
tcp_callback_t callbacks[MAX_DOWNLOADS];
|
tcp_callback_t callbacks[MAX_DOWNLOADS];
|
||||||
|
|
||||||
long m_bytesDownloaded;
|
long long m_bytesDownloaded;
|
||||||
long m_uncompressedBytes;
|
long long m_uncompressedBytes;
|
||||||
|
|
||||||
//QueuedRequest m_requestQueue[MAX_REQUEST_QUEUE];
|
//QueuedRequest m_requestQueue[MAX_REQUEST_QUEUE];
|
||||||
//long m_lastSlotUsed;
|
//long m_lastSlotUsed;
|
||||||
|
9
Makefile
9
Makefile
@ -2,6 +2,7 @@ SHELL = /bin/bash
|
|||||||
|
|
||||||
CC=g++
|
CC=g++
|
||||||
|
|
||||||
|
# remove dlstubs.o for CYGWIN
|
||||||
OBJS = UdpSlot.o Rebalance.o \
|
OBJS = UdpSlot.o Rebalance.o \
|
||||||
Msg13.o Mime.o IndexReadInfo.o \
|
Msg13.o Mime.o IndexReadInfo.o \
|
||||||
PageGet.o PageHosts.o PageIndexdb.o \
|
PageGet.o PageHosts.o PageIndexdb.o \
|
||||||
@ -96,7 +97,7 @@ CPPFLAGS = -m32 -g -Wall -pipe -fno-stack-protector -Wno-write-strings -Wstrict-
|
|||||||
LIBS= -L. ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a ./libstdc++.a -lpthread
|
LIBS= -L. ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a ./libstdc++.a -lpthread
|
||||||
# use this for compiling on CYGWIN: (only for 32bit cygwin right now and
|
# use this for compiling on CYGWIN: (only for 32bit cygwin right now and
|
||||||
# you have to install the packages that have these libs.
|
# you have to install the packages that have these libs.
|
||||||
#LIBS= -lz -lm -lpthread -lssl -lcrypto -iconv -lz
|
#LIBS= -lz -lm -lpthread -lssl -lcrypto -liconv
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
@ -567,10 +568,10 @@ master-rpm:
|
|||||||
# deb-master
|
# deb-master
|
||||||
master-deb:
|
master-deb:
|
||||||
# need to change in changelog too!! dont' forget!!!
|
# need to change in changelog too!! dont' forget!!!
|
||||||
git archive --format=tar --prefix=gb-1.14/ master > ../gb_1.14.orig.tar
|
git archive --format=tar --prefix=gb-1.16/ master > ../gb_1.16.orig.tar
|
||||||
rm -rf debian
|
rm -rf debian
|
||||||
# change "-p gb_1.0" to "-p gb_1.1" to update version for example
|
# change "-p gb_1.0" to "-p gb_1.1" to update version for example
|
||||||
dh_make -e gigablast@mail.com -p gb_1.14 -f ../gb_1.14.orig.tar
|
dh_make -e gigablast@mail.com -p gb_1.16 -f ../gb_1.16.orig.tar
|
||||||
# zero this out, it is just filed with the .txt files erroneously and it'll
|
# zero this out, it is just filed with the .txt files erroneously and it'll
|
||||||
# try to automatiicaly install in /usr/docs/
|
# try to automatiicaly install in /usr/docs/
|
||||||
rm debian/docs
|
rm debian/docs
|
||||||
@ -595,7 +596,7 @@ master-deb:
|
|||||||
# upload den
|
# upload den
|
||||||
scp gb*.deb gk268:/w/html/
|
scp gb*.deb gk268:/w/html/
|
||||||
# alien it
|
# alien it
|
||||||
sudo alien --to-rpm gb_1.14-1_i386.deb
|
sudo alien --to-rpm gb_1.16-1_i386.deb
|
||||||
# upload rpm
|
# upload rpm
|
||||||
scp gb*.rpm gk268:/w/html/
|
scp gb*.rpm gk268:/w/html/
|
||||||
|
|
||||||
|
@ -732,9 +732,13 @@ void downloadTheDocForReals2 ( Msg13Request *r ) {
|
|||||||
// user can turn off proxy use with this switch
|
// user can turn off proxy use with this switch
|
||||||
if ( ! g_conf.m_useProxyIps ) useProxies = false;
|
if ( ! g_conf.m_useProxyIps ) useProxies = false;
|
||||||
|
|
||||||
|
// for diffbot turn ON if use robots is off
|
||||||
|
if ( r->m_forceUseFloaters ) useProxies = true;
|
||||||
|
|
||||||
// we gotta have some proxy ips that we can use
|
// we gotta have some proxy ips that we can use
|
||||||
if ( ! g_conf.m_proxyIps.hasDigits() ) useProxies = false;
|
if ( ! g_conf.m_proxyIps.hasDigits() ) useProxies = false;
|
||||||
|
|
||||||
|
|
||||||
// we did not need a spider proxy ip so send this reuest to a host
|
// we did not need a spider proxy ip so send this reuest to a host
|
||||||
// to download the url
|
// to download the url
|
||||||
if ( ! useProxies ) {
|
if ( ! useProxies ) {
|
||||||
|
1
Msg13.h
1
Msg13.h
@ -97,6 +97,7 @@ public:
|
|||||||
long m_isSquidProxiedUrl:1;
|
long m_isSquidProxiedUrl:1;
|
||||||
|
|
||||||
long m_foundInCache:1;
|
long m_foundInCache:1;
|
||||||
|
long m_forceUseFloaters:1;
|
||||||
|
|
||||||
//long m_testParserEnabled:1;
|
//long m_testParserEnabled:1;
|
||||||
//long m_testSpiderEnabled:1;
|
//long m_testSpiderEnabled:1;
|
||||||
|
42
Msg40.cpp
42
Msg40.cpp
@ -105,6 +105,7 @@ Msg40::Msg40() {
|
|||||||
m_numPrintedSoFar = 0;
|
m_numPrintedSoFar = 0;
|
||||||
m_lastChunk = false;
|
m_lastChunk = false;
|
||||||
m_didSummarySkip = false;
|
m_didSummarySkip = false;
|
||||||
|
m_omitCount = 0;
|
||||||
//m_numGigabitInfos = 0;
|
//m_numGigabitInfos = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -159,6 +160,9 @@ bool Msg40::getResults ( SearchInput *si ,
|
|||||||
bool forward ,
|
bool forward ,
|
||||||
void *state ,
|
void *state ,
|
||||||
void (* callback) ( void *state ) ) {
|
void (* callback) ( void *state ) ) {
|
||||||
|
|
||||||
|
m_omitCount = 0;
|
||||||
|
|
||||||
// warning
|
// warning
|
||||||
//if ( ! si->m_coll2 ) log(LOG_LOGIC,"net: NULL collection. msg40.");
|
//if ( ! si->m_coll2 ) log(LOG_LOGIC,"net: NULL collection. msg40.");
|
||||||
if ( si->m_collnumBuf.length() < (long)sizeof(collnum_t) )
|
if ( si->m_collnumBuf.length() < (long)sizeof(collnum_t) )
|
||||||
@ -2404,6 +2408,9 @@ bool Msg40::gotSummary ( ) {
|
|||||||
// how many docids are visible? (unfiltered)
|
// how many docids are visible? (unfiltered)
|
||||||
//long visible = m_filterStats[CR_OK];
|
//long visible = m_filterStats[CR_OK];
|
||||||
|
|
||||||
|
|
||||||
|
m_omitCount = 0;
|
||||||
|
|
||||||
// count how many are visible!
|
// count how many are visible!
|
||||||
long visible = 0;
|
long visible = 0;
|
||||||
// loop over each clusterLevel and set it
|
// loop over each clusterLevel and set it
|
||||||
@ -2412,6 +2419,8 @@ bool Msg40::gotSummary ( ) {
|
|||||||
char *level = &m_msg3a.m_clusterLevels[i];
|
char *level = &m_msg3a.m_clusterLevels[i];
|
||||||
// on CR_OK
|
// on CR_OK
|
||||||
if ( *level == CR_OK ) visible++;
|
if ( *level == CR_OK ) visible++;
|
||||||
|
// otherwise count as ommitted
|
||||||
|
else m_omitCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
// do we got enough search results now?
|
// do we got enough search results now?
|
||||||
@ -2464,10 +2473,16 @@ bool Msg40::gotSummary ( ) {
|
|||||||
|
|
||||||
// if we do not have enough visible, try to get more
|
// if we do not have enough visible, try to get more
|
||||||
if ( visible < m_docsToGetVisible && m_msg3a.m_moreDocIdsAvail &&
|
if ( visible < m_docsToGetVisible && m_msg3a.m_moreDocIdsAvail &&
|
||||||
|
// do not spin too long in this!
|
||||||
|
// TODO: fix this better somehow later
|
||||||
|
m_docsToGet <= 1000 &&
|
||||||
// doesn't work on multi-coll just yet, it cores
|
// doesn't work on multi-coll just yet, it cores
|
||||||
m_numCollsToSearch == 1 ) {
|
m_numCollsToSearch == 1 ) {
|
||||||
// can it cover us?
|
// can it cover us?
|
||||||
long need = m_msg3a.m_docsToGet + 20;
|
//long need = m_msg3a.m_docsToGet + 20;
|
||||||
|
long need = m_docsToGet + 20;
|
||||||
|
// increase by 25 percent as well
|
||||||
|
need *= 1.25;
|
||||||
// note it
|
// note it
|
||||||
log("msg40: too many summaries invisible. getting more "
|
log("msg40: too many summaries invisible. getting more "
|
||||||
"docids from msg3a merge and getting summaries. "
|
"docids from msg3a merge and getting summaries. "
|
||||||
@ -2479,20 +2494,31 @@ bool Msg40::gotSummary ( ) {
|
|||||||
m_numReplies, m_numRequests);
|
m_numReplies, m_numRequests);
|
||||||
// get more
|
// get more
|
||||||
//m_docsToGet = need;
|
//m_docsToGet = need;
|
||||||
// merge more
|
|
||||||
m_msg3a.m_docsToGet = need;
|
// get more!
|
||||||
m_msg3a.mergeLists();
|
//m_msg3a.m_docsToGet = need;
|
||||||
// rellaoc the msg20 array
|
m_docsToGet = need;
|
||||||
if ( ! reallocMsg20Buf() ) return true;
|
|
||||||
// reset this before launch
|
// reset this before launch
|
||||||
m_numReplies = 0;
|
m_numReplies = 0;
|
||||||
m_numRequests = 0;
|
m_numRequests = 0;
|
||||||
// reprocess all!
|
// reprocess all!
|
||||||
m_lastProcessedi = -1;
|
m_lastProcessedi = -1;
|
||||||
|
// let's do it all from the top!
|
||||||
|
return getDocIds ( true ) ;
|
||||||
|
|
||||||
|
|
||||||
|
//m_msg3a.mergeLists();
|
||||||
|
// rellaoc the msg20 array
|
||||||
|
//if ( ! reallocMsg20Buf() ) return true;
|
||||||
|
// reset this before launch
|
||||||
|
//m_numReplies = 0;
|
||||||
|
//m_numRequests = 0;
|
||||||
|
// reprocess all!
|
||||||
|
//m_lastProcessedi = -1;
|
||||||
// now launch!
|
// now launch!
|
||||||
if ( ! launchMsg20s ( true ) ) return false;
|
//if ( ! launchMsg20s ( true ) ) return false;
|
||||||
// all done, call callback
|
// all done, call callback
|
||||||
return true;
|
//return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
1
Msg40.h
1
Msg40.h
@ -220,6 +220,7 @@ class Msg40 {
|
|||||||
HashTableX m_facetTextTable;
|
HashTableX m_facetTextTable;
|
||||||
SafeBuf m_facetTextBuf;
|
SafeBuf m_facetTextBuf;
|
||||||
bool m_firstTime;
|
bool m_firstTime;
|
||||||
|
long m_omitCount;
|
||||||
|
|
||||||
bool printFacetTables ( class SafeBuf *sb ) ;
|
bool printFacetTables ( class SafeBuf *sb ) ;
|
||||||
bool printFacetsForTable ( SafeBuf *sb , QueryTerm *qt );
|
bool printFacetsForTable ( SafeBuf *sb , QueryTerm *qt );
|
||||||
|
@ -245,6 +245,11 @@ class Multicast {
|
|||||||
long m_hack32;
|
long m_hack32;
|
||||||
long long m_hack64;
|
long long m_hack64;
|
||||||
|
|
||||||
|
// more hack stuff used by PageInject.cpp
|
||||||
|
long m_hackFileId;
|
||||||
|
long long m_hackFileOff;
|
||||||
|
class ImportState *m_importState;
|
||||||
|
|
||||||
// hacky crunk use by seo pipeline in xmldoc.cpp
|
// hacky crunk use by seo pipeline in xmldoc.cpp
|
||||||
//void *m_hackxd;
|
//void *m_hackxd;
|
||||||
//void *m_hackHost;
|
//void *m_hackHost;
|
||||||
|
@ -81,6 +81,9 @@ bool sendPageAddDelColl ( TcpSocket *s , HttpRequest *r , bool add ) {
|
|||||||
char *action = r->getString("action",NULL);
|
char *action = r->getString("action",NULL);
|
||||||
char *addColl = r->getString("addcoll",NULL);
|
char *addColl = r->getString("addcoll",NULL);
|
||||||
|
|
||||||
|
// add our ip to the list
|
||||||
|
//char *ips = r->getString("collips",NULL);
|
||||||
|
//char *pwds = r->getString("collpwd",NULL);
|
||||||
|
|
||||||
|
|
||||||
char buf [ 64*1024 ];
|
char buf [ 64*1024 ];
|
||||||
@ -88,7 +91,7 @@ bool sendPageAddDelColl ( TcpSocket *s , HttpRequest *r , bool add ) {
|
|||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// CLOUD SEARCH ENGIEN SUPPORT - GIGABOT ERRORS
|
// CLOUD SEARCH ENGINE SUPPORT - GIGABOT ERRORS
|
||||||
//
|
//
|
||||||
|
|
||||||
SafeBuf gtmp;
|
SafeBuf gtmp;
|
||||||
@ -239,6 +242,41 @@ bool sendPageAddDelColl ( TcpSocket *s , HttpRequest *r , bool add ) {
|
|||||||
"</tr>"
|
"</tr>"
|
||||||
, LIGHT_BLUE
|
, LIGHT_BLUE
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// collection pwds
|
||||||
|
p.safePrintf(
|
||||||
|
"<tr bgcolor=#%s>"
|
||||||
|
"<td><b>collection passwords"
|
||||||
|
"</b>"
|
||||||
|
"<br><font size=1>List of white space separated "
|
||||||
|
"passwords allowed to adminster collection."
|
||||||
|
"</font>"
|
||||||
|
"</td>\n"
|
||||||
|
"<td><input type=text name=collpwd "
|
||||||
|
"size=60>"
|
||||||
|
"</td>"
|
||||||
|
"</tr>"
|
||||||
|
, LIGHT_BLUE
|
||||||
|
);
|
||||||
|
|
||||||
|
// ips box for security
|
||||||
|
p.safePrintf(
|
||||||
|
"<tr bgcolor=#%s>"
|
||||||
|
"<td><b>collection ips"
|
||||||
|
"</b>"
|
||||||
|
|
||||||
|
"<br><font size=1>List of white space separated "
|
||||||
|
"IPs allowed to adminster collection."
|
||||||
|
"</font>"
|
||||||
|
|
||||||
|
"</td>\n"
|
||||||
|
"<td><input type=text name=collips "
|
||||||
|
"size=60>"
|
||||||
|
"</td>"
|
||||||
|
"</tr>"
|
||||||
|
, LIGHT_BLUE
|
||||||
|
);
|
||||||
|
|
||||||
// now list collections from which to copy the config
|
// now list collections from which to copy the config
|
||||||
//p.safePrintf (
|
//p.safePrintf (
|
||||||
// "<tr><td><b>copy configuration from this "
|
// "<tr><td><b>copy configuration from this "
|
||||||
|
@ -622,6 +622,8 @@ bool processLoop ( void *state ) {
|
|||||||
// do not show header for json object display
|
// do not show header for json object display
|
||||||
if ( xd->m_contentType == CT_JSON )
|
if ( xd->m_contentType == CT_JSON )
|
||||||
includeHeader = false;
|
includeHeader = false;
|
||||||
|
if ( xd->m_contentType == CT_XML )
|
||||||
|
includeHeader = false;
|
||||||
|
|
||||||
if ( format == FORMAT_XML ) includeHeader = false;
|
if ( format == FORMAT_XML ) includeHeader = false;
|
||||||
if ( format == FORMAT_JSON ) includeHeader = false;
|
if ( format == FORMAT_JSON ) includeHeader = false;
|
||||||
@ -868,6 +870,10 @@ bool processLoop ( void *state ) {
|
|||||||
// calculate bufLen
|
// calculate bufLen
|
||||||
//long bufLen = p - buf;
|
//long bufLen = p - buf;
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
MDW: return the xml page as is now. 9/28/2014
|
||||||
|
|
||||||
long ct = xd->m_contentType;
|
long ct = xd->m_contentType;
|
||||||
|
|
||||||
// now filter the entire buffer to escape out the xml tags
|
// now filter the entire buffer to escape out the xml tags
|
||||||
@ -890,6 +896,7 @@ bool processLoop ( void *state ) {
|
|||||||
//bufLen = newbuf.length();
|
//bufLen = newbuf.length();
|
||||||
sb->stealBuf ( &newbuf );
|
sb->stealBuf ( &newbuf );
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
// now encapsulate it in html head/tail and send it off
|
// now encapsulate it in html head/tail and send it off
|
||||||
// sendErr:
|
// sendErr:
|
||||||
|
@ -234,10 +234,10 @@ skipReplaceHost:
|
|||||||
"<b>mem used</a></td>"
|
"<b>mem used</a></td>"
|
||||||
|
|
||||||
"<td><a href=\"/admin/hosts?c=%s&sort=10\">"
|
"<td><a href=\"/admin/hosts?c=%s&sort=10\">"
|
||||||
"<b>cpu</a></td>"
|
"<b>cpu used</a></td>"
|
||||||
|
|
||||||
"<td><a href=\"/admin/hosts?c=%s&sort=17\">"
|
"<td><a href=\"/admin/hosts?c=%s&sort=17\">"
|
||||||
"<b>disk</a></td>"
|
"<b>disk used</a></td>"
|
||||||
|
|
||||||
"<td><a href=\"/admin/hosts?c=%s&sort=14\">"
|
"<td><a href=\"/admin/hosts?c=%s&sort=14\">"
|
||||||
"<b>max ping1</a></td>"
|
"<b>max ping1</a></td>"
|
||||||
@ -1224,13 +1224,13 @@ skipReplaceHost:
|
|||||||
"</tr>\n"
|
"</tr>\n"
|
||||||
|
|
||||||
"<tr class=poo>"
|
"<tr class=poo>"
|
||||||
"<td>cpu usage</td>"
|
"<td>cpu used</td>"
|
||||||
"<td>Percentage of cpu resources in use by the gb process."
|
"<td>Percentage of cpu resources in use by the gb process."
|
||||||
"</td>"
|
"</td>"
|
||||||
"</tr>\n"
|
"</tr>\n"
|
||||||
|
|
||||||
"<tr class=poo>"
|
"<tr class=poo>"
|
||||||
"<td>disk usage</td>"
|
"<td>disk used</td>"
|
||||||
"<td>Percentage of disk in use. When this gets close to "
|
"<td>Percentage of disk in use. When this gets close to "
|
||||||
"100%% you need to do something."
|
"100%% you need to do something."
|
||||||
"</td>"
|
"</td>"
|
||||||
|
289
PageInject.cpp
289
PageInject.cpp
@ -418,21 +418,67 @@ bool Msg7::inject ( char *coll ,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// returns false if would block
|
// returns false if would block
|
||||||
bool Msg7::injectTitleRec ( void *state ,
|
// bool Msg7::injectTitleRec ( void *state ,
|
||||||
void (*callback)(void *state) ,
|
// void (*callback)(void *state) ,
|
||||||
CollectionRec *cr ) {
|
// CollectionRec *cr ) {
|
||||||
m_state = state;
|
|
||||||
m_callback = callback;
|
|
||||||
|
static void sendReply ( UdpSlot *slot ) {
|
||||||
|
|
||||||
|
if ( g_errno )
|
||||||
|
g_udpServer.sendErrorReply(slot,g_errno);
|
||||||
|
else
|
||||||
|
g_udpServer.sendReply_ass(NULL,0,NULL,0,slot);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// when XmlDoc::inject() complets it calls this
|
||||||
|
void doneInjectingWrapper10 ( void *state ) {
|
||||||
|
XmlDoc *xd = (XmlDoc *)state;
|
||||||
|
UdpSlot *slot = (UdpSlot *)xd->m_slot;
|
||||||
|
long err = g_errno;
|
||||||
|
mdelete ( xd, sizeof(XmlDoc) , "PageInject" );
|
||||||
|
delete (xd);
|
||||||
|
g_errno = err;
|
||||||
|
sendReply ( slot );
|
||||||
|
}
|
||||||
|
|
||||||
|
void handleRequest7 ( UdpSlot *slot , long netnice ) {
|
||||||
|
|
||||||
|
//m_state = state;
|
||||||
|
//m_callback = callback;
|
||||||
|
|
||||||
// shortcut
|
// shortcut
|
||||||
XmlDoc *xd = &m_xd;
|
XmlDoc *xd;
|
||||||
|
try { xd = new (XmlDoc); }
|
||||||
|
catch ( ... ) {
|
||||||
|
g_errno = ENOMEM;
|
||||||
|
log("PageInject: import failed: new(%i): %s",
|
||||||
|
(int)sizeof(XmlDoc),mstrerror(g_errno));
|
||||||
|
sendReply(slot);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
mnew ( xd, sizeof(XmlDoc) , "PageInject" );
|
||||||
|
|
||||||
xd->reset();
|
//xd->reset();
|
||||||
|
char *titleRec = slot->m_readBuf;
|
||||||
|
long titleRecSize = slot->m_readBufSize;
|
||||||
|
|
||||||
|
long collnum = *(long *)titleRec;
|
||||||
|
|
||||||
|
titleRec += 4;
|
||||||
|
titleRecSize -= 4;
|
||||||
|
|
||||||
|
CollectionRec *cr = g_collectiondb.m_recs[collnum];
|
||||||
|
if ( ! cr ) {
|
||||||
|
sendReply(slot);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// if injecting a titlerec from an import operation use set2()
|
// if injecting a titlerec from an import operation use set2()
|
||||||
//if ( m_sbuf.length() > 0 ) {
|
//if ( m_sbuf.length() > 0 ) {
|
||||||
xd->set2 ( m_sbuf.getBufStart() ,
|
xd->set2 ( titleRec,//m_sbuf.getBufStart() ,
|
||||||
m_sbuf.length() ,
|
titleRecSize,//m_sbuf.length() ,
|
||||||
cr->m_coll ,
|
cr->m_coll ,
|
||||||
NULL, // pbuf
|
NULL, // pbuf
|
||||||
MAX_NICENESS ,
|
MAX_NICENESS ,
|
||||||
@ -442,14 +488,20 @@ bool Msg7::injectTitleRec ( void *state ,
|
|||||||
// call this when done indexing
|
// call this when done indexing
|
||||||
//xd->m_masterState = this;
|
//xd->m_masterState = this;
|
||||||
//xd->m_masterLoop = doneInjectingWrapper9;
|
//xd->m_masterLoop = doneInjectingWrapper9;
|
||||||
xd->m_state = this;
|
xd->m_state = xd;//this;
|
||||||
xd->m_callback1 = doneInjectingWrapper9;
|
xd->m_callback1 = doneInjectingWrapper10;
|
||||||
xd->m_isImporting = true;
|
xd->m_isImporting = true;
|
||||||
xd->m_isImportingValid = true;
|
xd->m_isImportingValid = true;
|
||||||
|
// hack this
|
||||||
|
xd->m_slot = slot;
|
||||||
// then index it
|
// then index it
|
||||||
if ( ! xd->indexDoc() )
|
if ( ! xd->indexDoc() )
|
||||||
return false;
|
// return if would block
|
||||||
return true;
|
return;
|
||||||
|
|
||||||
|
// all done?
|
||||||
|
//return true;
|
||||||
|
sendReply ( slot );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -795,7 +847,7 @@ class ImportState {
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
// available msg7s to use
|
// available msg7s to use
|
||||||
class Msg7 **m_ptrs;
|
class Multicast *m_ptrs;
|
||||||
long m_numPtrs;
|
long m_numPtrs;
|
||||||
|
|
||||||
// collection we are importing INTO
|
// collection we are importing INTO
|
||||||
@ -811,7 +863,7 @@ public:
|
|||||||
bool m_loadedPlaceHolder;
|
bool m_loadedPlaceHolder;
|
||||||
long long m_bfFileSize;
|
long long m_bfFileSize;
|
||||||
|
|
||||||
class Msg7 *getAvailMsg7();
|
class Multicast *getAvailMulticast();// Msg7();
|
||||||
|
|
||||||
void saveFileBookMark ( );//class Msg7 *msg7 );
|
void saveFileBookMark ( );//class Msg7 *msg7 );
|
||||||
|
|
||||||
@ -837,14 +889,11 @@ ImportState::ImportState () {
|
|||||||
|
|
||||||
void ImportState::reset() {
|
void ImportState::reset() {
|
||||||
for ( long i = 0 ; i < m_numPtrs ; i++ ) {
|
for ( long i = 0 ; i < m_numPtrs ; i++ ) {
|
||||||
Msg7 *msg7 = m_ptrs[i];
|
Multicast *mcast = &m_ptrs[i];
|
||||||
if ( ! msg7 ) continue;
|
mcast->destructor();
|
||||||
msg7->reset();
|
|
||||||
mdelete ( msg7, sizeof(Msg7) , "PageInject" );
|
|
||||||
delete (msg7);
|
|
||||||
//m_ptrs[i] = NULL;
|
//m_ptrs[i] = NULL;
|
||||||
}
|
}
|
||||||
mfree ( m_ptrs , MAXINJECTSOUT * sizeof(Msg7 *) , "ism7f" );
|
mfree ( m_ptrs , MAXINJECTSOUT * sizeof(Multicast) , "ism7f" );
|
||||||
m_ptrs = NULL;
|
m_ptrs = NULL;
|
||||||
m_numPtrs = 0;
|
m_numPtrs = 0;
|
||||||
m_fileOffset = 0LL;
|
m_fileOffset = 0LL;
|
||||||
@ -868,6 +917,8 @@ bool resumeImports ( ) {
|
|||||||
if ( s_tried ) return true;
|
if ( s_tried ) return true;
|
||||||
s_tried = true;
|
s_tried = true;
|
||||||
|
|
||||||
|
if ( g_hostdb.m_hostId != 0 ) return true;
|
||||||
|
|
||||||
for ( long i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
|
for ( long i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
|
||||||
CollectionRec *cr = g_collectiondb.m_recs[i];
|
CollectionRec *cr = g_collectiondb.m_recs[i];
|
||||||
if ( ! cr ) continue;
|
if ( ! cr ) continue;
|
||||||
@ -1016,7 +1067,7 @@ bool ImportState::setCurrentTitleFileAndOffset ( ) {
|
|||||||
return true;//&m_bf;
|
return true;//&m_bf;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gotMsg7ReplyWrapper ( void *state ) ;
|
void gotMulticastReplyWrapper ( void *state , void *state2 ) ;
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
@ -1036,7 +1087,7 @@ bool ImportState::importLoop ( ) {
|
|||||||
|
|
||||||
CollectionRec *cr = g_collectiondb.getRec ( m_collnum );
|
CollectionRec *cr = g_collectiondb.getRec ( m_collnum );
|
||||||
|
|
||||||
if ( ! cr ) {
|
if ( ! cr || g_hostdb.m_hostId != 0 ) {
|
||||||
// if coll was deleted!
|
// if coll was deleted!
|
||||||
log("import: collnum %li deleted while importing into",
|
log("import: collnum %li deleted while importing into",
|
||||||
(long)m_collnum);
|
(long)m_collnum);
|
||||||
@ -1059,6 +1110,20 @@ bool ImportState::importLoop ( ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if ( ! cr->m_importEnabled ) {
|
||||||
|
// wait for all to return
|
||||||
|
if ( out > 0 ) return false;
|
||||||
|
// then delete it
|
||||||
|
log("import: collnum %li import loop disabled",
|
||||||
|
(long)m_collnum);
|
||||||
|
mdelete ( this, sizeof(ImportState) , "impstate");
|
||||||
|
delete (this);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// scan each titledb file scanning titledb0001.dat first,
|
// scan each titledb file scanning titledb0001.dat first,
|
||||||
// titledb0003.dat second etc.
|
// titledb0003.dat second etc.
|
||||||
|
|
||||||
@ -1082,16 +1147,23 @@ bool ImportState::importLoop ( ) {
|
|||||||
|
|
||||||
long long saved = m_fileOffset;
|
long long saved = m_fileOffset;
|
||||||
|
|
||||||
Msg7 *msg7;
|
//Msg7 *msg7;
|
||||||
//GigablastRequest *gr;
|
//GigablastRequest *gr;
|
||||||
SafeBuf *sbuf = NULL;
|
//SafeBuf *sbuf = NULL;
|
||||||
|
|
||||||
long need = 12;
|
long need = 12;
|
||||||
long dataSize = -1;
|
long dataSize = -1;
|
||||||
XmlDoc xd;
|
//XmlDoc xd;
|
||||||
key128_t tkey;
|
key_t tkey;
|
||||||
bool status;
|
bool status;
|
||||||
|
SafeBuf tmp;
|
||||||
|
SafeBuf *sbuf = &tmp;
|
||||||
|
long long docId;
|
||||||
|
long shardNum;
|
||||||
|
long key;
|
||||||
|
Multicast *mcast;
|
||||||
|
char *req;
|
||||||
|
long reqSize;
|
||||||
|
|
||||||
if ( m_fileOffset >= m_bfFileSize ) {
|
if ( m_fileOffset >= m_bfFileSize ) {
|
||||||
log("inject: import: done processing file %li %s",
|
log("inject: import: done processing file %li %s",
|
||||||
@ -1100,7 +1172,7 @@ bool ImportState::importLoop ( ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// read in title rec key and data size
|
// read in title rec key and data size
|
||||||
status = m_bf.read ( &tkey, 12 , m_fileOffset );
|
status = m_bf.read ( &tkey, sizeof(key_t) , m_fileOffset );
|
||||||
|
|
||||||
//if ( n != 12 ) goto nextFile;
|
//if ( n != 12 ) goto nextFile;
|
||||||
if ( g_errno ) {
|
if ( g_errno ) {
|
||||||
@ -1127,6 +1199,7 @@ bool ImportState::importLoop ( ) {
|
|||||||
m_fileOffset += 4;
|
m_fileOffset += 4;
|
||||||
need += 4;
|
need += 4;
|
||||||
need += dataSize;
|
need += dataSize;
|
||||||
|
need += 4; // collnum, first 4 bytes
|
||||||
if ( dataSize < 0 || dataSize > 500000000 ) {
|
if ( dataSize < 0 || dataSize > 500000000 ) {
|
||||||
log("main: could not scan in titledb rec of "
|
log("main: could not scan in titledb rec of "
|
||||||
"corrupt dataSize of %li. BAILING ENTIRE "
|
"corrupt dataSize of %li. BAILING ENTIRE "
|
||||||
@ -1137,19 +1210,20 @@ bool ImportState::importLoop ( ) {
|
|||||||
//gr = &msg7->m_gr;
|
//gr = &msg7->m_gr;
|
||||||
|
|
||||||
//XmlDoc *xd = getAvailXmlDoc();
|
//XmlDoc *xd = getAvailXmlDoc();
|
||||||
msg7 = getAvailMsg7();
|
//msg7 = getAvailMsg7();
|
||||||
|
mcast = getAvailMulticast();
|
||||||
|
|
||||||
// if none, must have to wait for some to come back to us
|
// if none, must have to wait for some to come back to us
|
||||||
if ( ! msg7 ) {
|
if ( ! mcast ) {
|
||||||
// restore file offset
|
// restore file offset
|
||||||
//m_fileOffset = saved;
|
//m_fileOffset = saved;
|
||||||
// no, must have been a oom or something
|
// no, must have been a oom or something
|
||||||
log("import: import no msg7 available");
|
log("import: import no mcast available");
|
||||||
return true;//false;
|
return true;//false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// this is for holding a compressed titlerec
|
// this is for holding a compressed titlerec
|
||||||
sbuf = &msg7->m_sbuf;//&gr->m_sbuf;
|
//sbuf = &mcast->m_sbuf;//&gr->m_sbuf;
|
||||||
|
|
||||||
// point to start of buf
|
// point to start of buf
|
||||||
sbuf->reset();
|
sbuf->reset();
|
||||||
@ -1157,6 +1231,9 @@ bool ImportState::importLoop ( ) {
|
|||||||
// ensure we have enough room
|
// ensure we have enough room
|
||||||
sbuf->reserve ( need );
|
sbuf->reserve ( need );
|
||||||
|
|
||||||
|
// collnum first 4 bytes
|
||||||
|
sbuf->pushLong( (long)m_collnum );
|
||||||
|
|
||||||
// store title key
|
// store title key
|
||||||
sbuf->safeMemcpy ( &tkey , sizeof(key_t) );
|
sbuf->safeMemcpy ( &tkey , sizeof(key_t) );
|
||||||
|
|
||||||
@ -1175,8 +1252,8 @@ bool ImportState::importLoop ( ) {
|
|||||||
"file. %s. Skipping file %s",
|
"file. %s. Skipping file %s",
|
||||||
mstrerror(g_errno),m_bf.getFilename());
|
mstrerror(g_errno),m_bf.getFilename());
|
||||||
// essentially free up this msg7 now
|
// essentially free up this msg7 now
|
||||||
msg7->m_inUse = false;
|
//msg7->m_inUse = false;
|
||||||
msg7->reset();
|
//msg7->reset();
|
||||||
goto nextFile;
|
goto nextFile;
|
||||||
}
|
}
|
||||||
// advance
|
// advance
|
||||||
@ -1193,8 +1270,8 @@ bool ImportState::importLoop ( ) {
|
|||||||
// we use this so we know where the doc we are injecting
|
// we use this so we know where the doc we are injecting
|
||||||
// was in the foregien titledb file. so we can update our bookmark
|
// was in the foregien titledb file. so we can update our bookmark
|
||||||
// code.
|
// code.
|
||||||
msg7->m_hackFileOff = saved;//m_fileOffset;
|
mcast->m_hackFileOff = saved;//m_fileOffset;
|
||||||
msg7->m_hackFileId = m_bfFileId;
|
mcast->m_hackFileId = m_bfFileId;
|
||||||
|
|
||||||
//
|
//
|
||||||
// inject a title rec buf this time, we are doing an import
|
// inject a title rec buf this time, we are doing an import
|
||||||
@ -1243,21 +1320,55 @@ bool ImportState::importLoop ( ) {
|
|||||||
//
|
//
|
||||||
//m_fileOffset += need;
|
//m_fileOffset += need;
|
||||||
|
|
||||||
|
// get docid from key
|
||||||
|
docId = g_titledb.getDocIdFromKey ( &tkey );
|
||||||
|
|
||||||
|
// get shard that holds the titlerec for it
|
||||||
|
shardNum = g_hostdb.getShardNumFromDocId ( docId );
|
||||||
|
|
||||||
|
// for selecting which host in the shard receives it
|
||||||
|
key = (long)docId;
|
||||||
|
|
||||||
|
|
||||||
m_numOut++;
|
m_numOut++;
|
||||||
|
|
||||||
// then index it. master callback will be called
|
// then index it. master callback will be called
|
||||||
//if ( ! xd->index() ) return false;
|
//if ( ! xd->index() ) return false;
|
||||||
|
|
||||||
// TODO: make this forward the request to an appropriate host!!
|
// TODO: make this forward the request to an appropriate host!!
|
||||||
// . gr->m_sbuf is set to the titlerec so this should handle that
|
// . gr->m_sbuf is set to the titlerec so this should handle that
|
||||||
// and use XmlDoc::set4() or whatever
|
// and use XmlDoc::set4() or whatever
|
||||||
if ( msg7->injectTitleRec ( msg7 , // state
|
// if ( msg7->injectTitleRec ( msg7 , // state
|
||||||
gotMsg7ReplyWrapper , // callback
|
// gotMsg7ReplyWrapper , // callback
|
||||||
cr )) {
|
// cr )) {
|
||||||
// it didn't block somehow...
|
// // it didn't block somehow...
|
||||||
msg7->m_inUse = false;
|
// msg7->m_inUse = false;
|
||||||
msg7->gotMsg7Reply();
|
// msg7->gotMsg7Reply();
|
||||||
|
// }
|
||||||
|
|
||||||
|
|
||||||
|
req = sbuf->getBufStart();
|
||||||
|
reqSize = sbuf->length();
|
||||||
|
|
||||||
|
if ( reqSize != need ) { char *xx=NULL;*xx=0 ; }
|
||||||
|
|
||||||
|
// do not free it, let multicast free it after sending it
|
||||||
|
sbuf->detachBuf();
|
||||||
|
|
||||||
|
|
||||||
|
if ( ! mcast->send ( req ,
|
||||||
|
reqSize ,
|
||||||
|
0x07 ,
|
||||||
|
true , // ownmsg?
|
||||||
|
shardNum,
|
||||||
|
false, // send to whole shard?
|
||||||
|
key , // for selecting host in shard
|
||||||
|
mcast , // state
|
||||||
|
NULL , // state2
|
||||||
|
gotMulticastReplyWrapper ,
|
||||||
|
999999 ) ) { // total timeout in seconds
|
||||||
|
log("import: import mcast had error: %s",mstrerror(g_errno));
|
||||||
|
m_numIn++;
|
||||||
}
|
}
|
||||||
|
|
||||||
goto INJECTLOOP;
|
goto INJECTLOOP;
|
||||||
@ -1288,43 +1399,37 @@ bool ImportState::importLoop ( ) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gotMsg7ReplyWrapper ( void *state ) {
|
void gotMulticastReplyWrapper ( void *state , void *state2 ) {
|
||||||
|
|
||||||
Msg7 *msg7 = (Msg7 *)state;
|
Multicast *mcast = (Multicast *)state;
|
||||||
msg7->gotMsg7Reply();
|
//msg7->gotMsg7Reply();
|
||||||
|
|
||||||
ImportState *is = msg7->m_importState;
|
ImportState *is = mcast->m_importState;
|
||||||
|
|
||||||
if ( ! is->importLoop() ) return;
|
|
||||||
|
|
||||||
log("inject: import is done");
|
|
||||||
|
|
||||||
mdelete ( is, sizeof(ImportState) , "impstate");
|
|
||||||
delete (is);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Msg7::gotMsg7Reply ( ) {
|
|
||||||
|
|
||||||
if ( m_inUse ) { char *xx=NULL;*xx=0; }
|
|
||||||
|
|
||||||
ImportState *is = m_importState;
|
|
||||||
|
|
||||||
is->m_numIn++;
|
is->m_numIn++;
|
||||||
|
|
||||||
log("import: imported %lli docs (off=%lli)",
|
log("import: imported %lli docs (off=%lli)",
|
||||||
is->m_numIn,is->m_fileOffset);
|
is->m_numIn,is->m_fileOffset);
|
||||||
|
|
||||||
// if we were the least far ahead of scanning the files
|
if ( ! is->importLoop() ) return;
|
||||||
// then save our position in case server crashes so we can
|
|
||||||
// resume
|
|
||||||
//is->saveFileBookMark ( this );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// we will be called again when this multicast reply comes in...
|
||||||
|
if ( is->m_numIn < is->m_numOut ) return;
|
||||||
|
|
||||||
|
log("inject: import is done");
|
||||||
|
|
||||||
|
CollectionRec *cr = g_collectiondb.getRec ( is->m_collnum );
|
||||||
|
// signify to qa.cpp that we are done
|
||||||
|
if ( cr ) cr->m_importState = NULL;
|
||||||
|
|
||||||
|
mdelete ( is, sizeof(ImportState) , "impstate");
|
||||||
|
delete (is);
|
||||||
|
}
|
||||||
|
|
||||||
// . return NULL with g_errno set on error
|
// . return NULL with g_errno set on error
|
||||||
// . importLoop() calls this to get a msg7 to inject a doc from the foreign
|
// . importLoop() calls this to get a msg7 to inject a doc from the foreign
|
||||||
// titledb file into our local collection
|
// titledb file into our local collection
|
||||||
Msg7 *ImportState::getAvailMsg7 ( ) {
|
Multicast *ImportState::getAvailMulticast() { // Msg7 ( ) {
|
||||||
|
|
||||||
//static XmlDoc **s_ptrs = NULL;
|
//static XmlDoc **s_ptrs = NULL;
|
||||||
|
|
||||||
@ -1334,11 +1439,11 @@ Msg7 *ImportState::getAvailMsg7 ( ) {
|
|||||||
// each msg7 has an xmldoc doc in it
|
// each msg7 has an xmldoc doc in it
|
||||||
if ( ! m_ptrs ) {
|
if ( ! m_ptrs ) {
|
||||||
long max = (long)MAXINJECTSOUT;
|
long max = (long)MAXINJECTSOUT;
|
||||||
m_ptrs=(Msg7 **)mcalloc(sizeof(Msg7 *)* max,"sxdp");
|
m_ptrs=(Multicast *)mcalloc(sizeof(Multicast)* max,"sxdp");
|
||||||
if ( ! m_ptrs ) return NULL;
|
if ( ! m_ptrs ) return NULL;
|
||||||
m_numPtrs = max;//(long)MAXINJECTSOUT;
|
m_numPtrs = max;//(long)MAXINJECTSOUT;
|
||||||
//for ( long i = 0 ; i < MAXINJECTSOUT ;i++ )
|
for ( long i = 0 ; i < m_numPtrs ;i++ )
|
||||||
// m_ptrs[i].constructor();
|
m_ptrs[i].constructor();
|
||||||
}
|
}
|
||||||
|
|
||||||
// respect the user limit for this coll
|
// respect the user limit for this coll
|
||||||
@ -1351,24 +1456,11 @@ Msg7 *ImportState::getAvailMsg7 ( ) {
|
|||||||
// find one not in use and return it
|
// find one not in use and return it
|
||||||
for ( long i = 0 ; i < m_numPtrs ; i++ ) {
|
for ( long i = 0 ; i < m_numPtrs ; i++ ) {
|
||||||
// point to it
|
// point to it
|
||||||
Msg7 *m7 = m_ptrs[i];
|
Multicast *mcast = &m_ptrs[i];
|
||||||
// if NULL then init it and use it
|
if ( mcast->m_inUse ) continue;
|
||||||
if ( ! m7 ) {
|
//m7->m_inUse = true;
|
||||||
try { m7 = new (Msg7); }
|
mcast->m_importState = this;
|
||||||
catch ( ... ) {
|
return mcast;
|
||||||
g_errno = ENOMEM;
|
|
||||||
log("PageInject: new(%li): %s",
|
|
||||||
(long)sizeof(Msg7),mstrerror(g_errno));
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
mnew ( m7, sizeof(Msg7) , "dmsg7");
|
|
||||||
// assign so we can delete later
|
|
||||||
m_ptrs[i] = m7;
|
|
||||||
}
|
|
||||||
if ( m7->m_inUse ) continue;
|
|
||||||
m7->m_inUse = true;
|
|
||||||
m7->m_importState = this;
|
|
||||||
return m7;
|
|
||||||
}
|
}
|
||||||
// none avail
|
// none avail
|
||||||
g_errno = 0;
|
g_errno = 0;
|
||||||
@ -1376,6 +1468,7 @@ Msg7 *ImportState::getAvailMsg7 ( ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void saveImportStates ( ) {
|
void saveImportStates ( ) {
|
||||||
|
if ( g_hostdb.m_myHost->m_hostId != 0 ) return;
|
||||||
for ( long i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
|
for ( long i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
|
||||||
CollectionRec *cr = g_collectiondb.m_recs[i];
|
CollectionRec *cr = g_collectiondb.m_recs[i];
|
||||||
if ( ! cr ) continue;
|
if ( ! cr ) continue;
|
||||||
@ -1396,22 +1489,20 @@ void ImportState::saveFileBookMark ( ) { //Msg7 *msg7 ) {
|
|||||||
// if there is one outstanding the preceeded us, we can't update
|
// if there is one outstanding the preceeded us, we can't update
|
||||||
// the bookmark just yet.
|
// the bookmark just yet.
|
||||||
for ( long i = 0 ; i < m_numPtrs ; i++ ) {
|
for ( long i = 0 ; i < m_numPtrs ; i++ ) {
|
||||||
Msg7 *m7 = m_ptrs[i];
|
Multicast *mcast = &m_ptrs[i];
|
||||||
// can be null if never used
|
if ( ! mcast->m_inUse ) continue;
|
||||||
if ( ! m7 ) continue;
|
|
||||||
if ( ! m7->m_inUse ) continue;
|
|
||||||
if ( minOff == -1 ) {
|
if ( minOff == -1 ) {
|
||||||
minOff = m7->m_hackFileOff;
|
minOff = mcast->m_hackFileOff;
|
||||||
minFileId = m7->m_hackFileId;
|
minFileId = mcast->m_hackFileId;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if ( m7->m_hackFileId > minFileId )
|
if ( mcast->m_hackFileId > minFileId )
|
||||||
continue;
|
continue;
|
||||||
if ( m7->m_hackFileId == minFileId &&
|
if ( mcast->m_hackFileId == minFileId &&
|
||||||
m7->m_hackFileOff > minOff )
|
mcast->m_hackFileOff > minOff )
|
||||||
continue;
|
continue;
|
||||||
minOff = m7->m_hackFileOff;
|
minOff = mcast->m_hackFileOff;
|
||||||
minFileId = m7->m_hackFileId;
|
minFileId = mcast->m_hackFileId;
|
||||||
}
|
}
|
||||||
|
|
||||||
char fname[256];
|
char fname[256];
|
||||||
|
12
PageInject.h
12
PageInject.h
@ -1,6 +1,8 @@
|
|||||||
#ifndef GBINJECT_H
|
#ifndef GBINJECT_H
|
||||||
#define GBINJECT_H
|
#define GBINJECT_H
|
||||||
|
|
||||||
|
void handleRequest7 ( class UdpSlot *slot , long netnice ) ;
|
||||||
|
|
||||||
bool sendPageInject ( class TcpSocket *s, class HttpRequest *hr );
|
bool sendPageInject ( class TcpSocket *s, class HttpRequest *hr );
|
||||||
|
|
||||||
bool resumeImports ( ) ;
|
bool resumeImports ( ) ;
|
||||||
@ -36,8 +38,8 @@ public:
|
|||||||
void *m_state;
|
void *m_state;
|
||||||
void (* m_callback )(void *state);
|
void (* m_callback )(void *state);
|
||||||
|
|
||||||
long long m_hackFileOff;
|
//long long m_hackFileOff;
|
||||||
long m_hackFileId;
|
//long m_hackFileId;
|
||||||
|
|
||||||
//long m_crawlbotAPI;
|
//long m_crawlbotAPI;
|
||||||
|
|
||||||
@ -63,9 +65,9 @@ public:
|
|||||||
void (*callback)(void *state) );
|
void (*callback)(void *state) );
|
||||||
|
|
||||||
|
|
||||||
bool injectTitleRec ( void *state ,
|
//bool injectTitleRec ( void *state ,
|
||||||
void (*callback)(void *state) ,
|
// void (*callback)(void *state) ,
|
||||||
class CollectionRec *cr );
|
// class CollectionRec *cr );
|
||||||
|
|
||||||
void gotMsg7Reply ();
|
void gotMsg7Reply ();
|
||||||
|
|
||||||
|
@ -171,8 +171,58 @@ void doneReindexing ( void *state ) {
|
|||||||
//
|
//
|
||||||
/////
|
/////
|
||||||
|
|
||||||
|
HttpRequest *hr = &gr->m_hr;
|
||||||
|
|
||||||
|
char format = hr->getReplyFormat();
|
||||||
|
|
||||||
SafeBuf sb;
|
SafeBuf sb;
|
||||||
|
|
||||||
|
|
||||||
|
char *ct = "text/html";
|
||||||
|
if ( format == FORMAT_JSON ) ct = "application/json";
|
||||||
|
if ( format == FORMAT_XML ) ct = "text/xml";
|
||||||
|
|
||||||
|
if ( format == FORMAT_XML ) {
|
||||||
|
sb.safePrintf("<response>\n"
|
||||||
|
"\t<statusCode>0</statusCode>\n"
|
||||||
|
"\t<statusMsg>Success</statusMsg>\n"
|
||||||
|
"\t<matchingResults>%li</matchingResults>\n"
|
||||||
|
"</response>"
|
||||||
|
, st->m_msg1c.m_numDocIdsAdded
|
||||||
|
);
|
||||||
|
g_httpServer.sendDynamicPage ( gr->m_socket,
|
||||||
|
sb.getBufStart(),
|
||||||
|
sb.length(),
|
||||||
|
-1,
|
||||||
|
false,ct);
|
||||||
|
mdelete ( st , sizeof(State13) , "PageTagdb" );
|
||||||
|
delete (st);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if ( format == FORMAT_JSON ) {
|
||||||
|
sb.safePrintf("{\"response\":{\n"
|
||||||
|
"\t\"statusCode\":0,\n"
|
||||||
|
"\t\"statusMsg\":\"Success\",\n"
|
||||||
|
"\t\"matchingResults\":%li\n"
|
||||||
|
"}\n"
|
||||||
|
"}\n"
|
||||||
|
, st->m_msg1c.m_numDocIdsAdded
|
||||||
|
);
|
||||||
|
g_httpServer.sendDynamicPage ( gr->m_socket,
|
||||||
|
sb.getBufStart(),
|
||||||
|
sb.length(),
|
||||||
|
-1,
|
||||||
|
false,ct);
|
||||||
|
mdelete ( st , sizeof(State13) , "PageTagdb" );
|
||||||
|
delete (st);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
g_pages.printAdminTop ( &sb , gr->m_socket , &gr->m_hr );
|
g_pages.printAdminTop ( &sb , gr->m_socket , &gr->m_hr );
|
||||||
|
|
||||||
sb.safePrintf("<style>"
|
sb.safePrintf("<style>"
|
||||||
|
300
PageResults.cpp
300
PageResults.cpp
@ -38,6 +38,9 @@ static void gotState ( void *state ) ;
|
|||||||
static bool gotResults ( void *state ) ;
|
static bool gotResults ( void *state ) ;
|
||||||
|
|
||||||
bool replaceParm ( char *cgi , SafeBuf *newUrl , HttpRequest *hr ) ;
|
bool replaceParm ( char *cgi , SafeBuf *newUrl , HttpRequest *hr ) ;
|
||||||
|
bool replaceParm2 ( char *cgi , SafeBuf *newUrl ,
|
||||||
|
char *oldUrl , long oldUrlLen ) ;
|
||||||
|
|
||||||
|
|
||||||
bool printCSVHeaderRow ( SafeBuf *sb , State0 *st ) ;
|
bool printCSVHeaderRow ( SafeBuf *sb , State0 *st ) ;
|
||||||
|
|
||||||
@ -48,6 +51,8 @@ bool printPairScore ( SafeBuf *sb , SearchInput *si , PairScore *ps ,
|
|||||||
|
|
||||||
bool printScoresHeader ( SafeBuf *sb ) ;
|
bool printScoresHeader ( SafeBuf *sb ) ;
|
||||||
|
|
||||||
|
bool printMetaContent ( Msg40 *msg40 , long i ,State0 *st, SafeBuf *sb );
|
||||||
|
|
||||||
bool printSingleScore ( SafeBuf *sb , SearchInput *si , SingleScore *ss ,
|
bool printSingleScore ( SafeBuf *sb , SearchInput *si , SingleScore *ss ,
|
||||||
Msg20Reply *mr , Msg40 *msg40 ) ;
|
Msg20Reply *mr , Msg40 *msg40 ) ;
|
||||||
|
|
||||||
@ -2275,6 +2280,18 @@ bool printSearchResultsHeader ( State0 *st ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if ( si->m_format == FORMAT_XML )
|
||||||
|
sb->safePrintf("\t<numResultsOmitted>%li"
|
||||||
|
"</numResultsOmitted>\n",
|
||||||
|
msg40->m_omitCount);
|
||||||
|
|
||||||
|
if ( si->m_format == FORMAT_JSON )
|
||||||
|
sb->safePrintf("\"numResultsOmitted\":%li,\n",
|
||||||
|
msg40->m_omitCount);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//bool xml = si->m_xml;
|
//bool xml = si->m_xml;
|
||||||
|
|
||||||
|
|
||||||
@ -2531,7 +2548,8 @@ bool printSearchResultsHeader ( State0 *st ) {
|
|||||||
Query qq3;
|
Query qq3;
|
||||||
Query *qq2;
|
Query *qq2;
|
||||||
bool firstIgnored;
|
bool firstIgnored;
|
||||||
bool isAdmin = si->m_isRootAdmin;
|
//bool isAdmin = si->m_isRootAdmin;
|
||||||
|
bool isAdmin = (si->m_isRootAdmin || si->m_isCollAdmin);
|
||||||
if ( si->m_format != FORMAT_HTML ) isAdmin = false;
|
if ( si->m_format != FORMAT_HTML ) isAdmin = false;
|
||||||
|
|
||||||
// otherwise, we had no error
|
// otherwise, we had no error
|
||||||
@ -3012,6 +3030,45 @@ bool printSearchResultsTail ( State0 *st ) {
|
|||||||
args.safePrintf("&sites=%s",si->m_sites);
|
args.safePrintf("&sites=%s",si->m_sites);
|
||||||
|
|
||||||
|
|
||||||
|
if ( si->m_format == FORMAT_HTML &&
|
||||||
|
msg40->m_omitCount ) { // && firstNum == 0 ) {
|
||||||
|
// . add our cgi to the original url
|
||||||
|
// . so if it has &qlang=de and they select &qlang=en
|
||||||
|
// we have to replace it... etc.
|
||||||
|
SafeBuf newUrl;
|
||||||
|
// show banned results
|
||||||
|
replaceParm2 ("sb=1",
|
||||||
|
&newUrl,
|
||||||
|
hr->m_origUrlRequest,
|
||||||
|
hr->m_origUrlRequestLen );
|
||||||
|
// no deduping by summary or content hash etc.
|
||||||
|
SafeBuf newUrl2;
|
||||||
|
replaceParm2("dr=0",&newUrl2,newUrl.getBufStart(),
|
||||||
|
newUrl.length());
|
||||||
|
// and no site clustering
|
||||||
|
SafeBuf newUrl3;
|
||||||
|
replaceParm2 ( "sc=0", &newUrl3 , newUrl2.getBufStart(),
|
||||||
|
newUrl2.length());
|
||||||
|
// start at results #0 again
|
||||||
|
SafeBuf newUrl4;
|
||||||
|
replaceParm2 ( "s=0", &newUrl4 , newUrl3.getBufStart(),
|
||||||
|
newUrl3.length());
|
||||||
|
|
||||||
|
sb->safePrintf("<center>"
|
||||||
|
"<i>"
|
||||||
|
"%li results were omitted because they "
|
||||||
|
"were considered duplicates, banned, <br>"
|
||||||
|
"or "
|
||||||
|
"from the same site as other results. "
|
||||||
|
"<a href=%s>Click here to show all results</a>."
|
||||||
|
"</i>"
|
||||||
|
"</center>"
|
||||||
|
"<br><br>"
|
||||||
|
, msg40->m_omitCount
|
||||||
|
, newUrl4.getBufStart() );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if ( firstNum > 0 &&
|
if ( firstNum > 0 &&
|
||||||
(si->m_format == FORMAT_HTML ||
|
(si->m_format == FORMAT_HTML ||
|
||||||
si->m_format == FORMAT_WIDGET_IFRAME //||
|
si->m_format == FORMAT_WIDGET_IFRAME //||
|
||||||
@ -3075,7 +3132,9 @@ bool printSearchResultsTail ( State0 *st ) {
|
|||||||
|
|
||||||
// print try this search on...
|
// print try this search on...
|
||||||
// an additional <br> if we had a Next or Prev results link
|
// an additional <br> if we had a Next or Prev results link
|
||||||
if ( sb->length() > remember ) sb->safeMemcpy ("<br>" , 4 );
|
if ( sb->length() > remember &&
|
||||||
|
si->m_format == FORMAT_HTML )
|
||||||
|
sb->safeMemcpy ("<br>" , 4 );
|
||||||
|
|
||||||
//
|
//
|
||||||
// END PRINT PREV 10 NEXT 10 links!
|
// END PRINT PREV 10 NEXT 10 links!
|
||||||
@ -3107,7 +3166,7 @@ bool printSearchResultsTail ( State0 *st ) {
|
|||||||
sb->safePrintf("<input name=c type=hidden value=\"%s\">",coll);
|
sb->safePrintf("<input name=c type=hidden value=\"%s\">",coll);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isAdmin = si->m_isRootAdmin;
|
bool isAdmin = (si->m_isRootAdmin || si->m_isCollAdmin);
|
||||||
if ( si->m_format != FORMAT_HTML ) isAdmin = false;
|
if ( si->m_format != FORMAT_HTML ) isAdmin = false;
|
||||||
|
|
||||||
if ( isAdmin && banSites.length() > 0 )
|
if ( isAdmin && banSites.length() > 0 )
|
||||||
@ -3554,6 +3613,12 @@ bool printResult ( State0 *st, long ix , long *numPrintedSoFar ) {
|
|||||||
|
|
||||||
long long d = msg40->getDocId(ix);
|
long long d = msg40->getDocId(ix);
|
||||||
|
|
||||||
|
// do not print if it is a summary dup or had some error
|
||||||
|
// long level = (long)msg40->getClusterLevel(ix);
|
||||||
|
// if ( level != CR_OK &&
|
||||||
|
// level != CR_INDENT )
|
||||||
|
// return true;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if ( si->m_docIdsOnly ) {
|
if ( si->m_docIdsOnly ) {
|
||||||
@ -3618,7 +3683,9 @@ bool printResult ( State0 *st, long ix , long *numPrintedSoFar ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// just print cached web page?
|
// just print cached web page?
|
||||||
if ( mr->ptr_content ) {
|
if ( mr->ptr_content &&
|
||||||
|
si->m_format == FORMAT_JSON &&
|
||||||
|
strstr(mr->ptr_ubuf,"-diffbotxyz") ) {
|
||||||
|
|
||||||
// for json items separate with \n,\n
|
// for json items separate with \n,\n
|
||||||
if ( si->m_format != FORMAT_HTML && *numPrintedSoFar > 0 )
|
if ( si->m_format != FORMAT_HTML && *numPrintedSoFar > 0 )
|
||||||
@ -3627,8 +3694,11 @@ bool printResult ( State0 *st, long ix , long *numPrintedSoFar ) {
|
|||||||
// a dud? just print empty {}'s
|
// a dud? just print empty {}'s
|
||||||
if ( mr->size_content == 1 )
|
if ( mr->size_content == 1 )
|
||||||
sb->safePrintf("{}");
|
sb->safePrintf("{}");
|
||||||
|
// if it's a diffbot object just print it out directly
|
||||||
|
// into the json. it is already json.
|
||||||
else
|
else
|
||||||
sb->safeStrcpy ( mr->ptr_content );
|
sb->safeStrcpy ( mr->ptr_content );
|
||||||
|
|
||||||
|
|
||||||
// . let's hack the spidertime onto the end
|
// . let's hack the spidertime onto the end
|
||||||
// . so when we sort by that using gbsortby:spiderdate
|
// . so when we sort by that using gbsortby:spiderdate
|
||||||
@ -3682,6 +3752,27 @@ bool printResult ( State0 *st, long ix , long *numPrintedSoFar ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if ( si->m_format == FORMAT_XML )
|
||||||
|
sb->safePrintf("\t<result>\n" );
|
||||||
|
|
||||||
|
if ( si->m_format == FORMAT_JSON ) {
|
||||||
|
if ( *numPrintedSoFar != 0 ) sb->safePrintf(",\n");
|
||||||
|
sb->safePrintf("\t{\n" );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if ( mr->ptr_content && si->m_format == FORMAT_XML ) {
|
||||||
|
sb->safePrintf("\t\t<content><![CDATA[" );
|
||||||
|
sb->cdataEncode ( mr->ptr_content );
|
||||||
|
sb->safePrintf("]]></content>\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( mr->ptr_content && si->m_format == FORMAT_JSON ) {
|
||||||
|
sb->safePrintf("\t\t\"content\":\"" );
|
||||||
|
sb->jsonEncode ( mr->ptr_content );
|
||||||
|
sb->safePrintf("\",\n");
|
||||||
|
}
|
||||||
|
|
||||||
Highlight hi;
|
Highlight hi;
|
||||||
|
|
||||||
// get the url
|
// get the url
|
||||||
@ -3703,7 +3794,7 @@ bool printResult ( State0 *st, long ix , long *numPrintedSoFar ) {
|
|||||||
// indent it if level is 2
|
// indent it if level is 2
|
||||||
bool indent = false;
|
bool indent = false;
|
||||||
|
|
||||||
bool isAdmin = si->m_isRootAdmin;
|
bool isAdmin = (si->m_isRootAdmin || si->m_isCollAdmin);
|
||||||
if ( si->m_format == FORMAT_XML ) isAdmin = false;
|
if ( si->m_format == FORMAT_XML ) isAdmin = false;
|
||||||
|
|
||||||
//unsigned long long lastSiteHash = siteHash;
|
//unsigned long long lastSiteHash = siteHash;
|
||||||
@ -3747,15 +3838,6 @@ bool printResult ( State0 *st, long ix , long *numPrintedSoFar ) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( si->m_format == FORMAT_XML )
|
|
||||||
sb->safePrintf("\t<result>\n" );
|
|
||||||
|
|
||||||
if ( si->m_format == FORMAT_JSON ) {
|
|
||||||
if ( *numPrintedSoFar != 0 ) sb->safePrintf(",\n");
|
|
||||||
sb->safePrintf("\t{\n" );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// the score if admin
|
// the score if admin
|
||||||
/*
|
/*
|
||||||
if ( isAdmin ) {
|
if ( isAdmin ) {
|
||||||
@ -4354,10 +4436,21 @@ bool printResult ( State0 *st, long ix , long *numPrintedSoFar ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// new line if not xml
|
// new line if not xml. even summary is empty we need it too like
|
||||||
if ( si->m_format == FORMAT_HTML && strLen )
|
// when showing xml docs - MDW 9/28/2014
|
||||||
|
if ( si->m_format == FORMAT_HTML ) // && strLen )
|
||||||
sb->safePrintf("<br>\n");
|
sb->safePrintf("<br>\n");
|
||||||
|
|
||||||
|
|
||||||
|
/////////
|
||||||
|
//
|
||||||
|
// meta tag values for &dt=keywords ...
|
||||||
|
//
|
||||||
|
/////////
|
||||||
|
if ( mr->ptr_dbuf && mr->size_dbuf>1 )
|
||||||
|
printMetaContent ( msg40 , ix,st,sb);
|
||||||
|
|
||||||
|
|
||||||
////////////
|
////////////
|
||||||
//
|
//
|
||||||
// . print DMOZ topics under the summary
|
// . print DMOZ topics under the summary
|
||||||
@ -4678,7 +4771,7 @@ bool printResult ( State0 *st, long ix , long *numPrintedSoFar ) {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
if ( isAdmin && si->m_format == FORMAT_HTML ) {
|
if ( si->m_format == FORMAT_HTML ) {
|
||||||
long lang = mr->m_language;
|
long lang = mr->m_language;
|
||||||
if ( lang ) sb->safePrintf(" - %s",getLanguageString(lang));
|
if ( lang ) sb->safePrintf(" - %s",getLanguageString(lang));
|
||||||
uint16_t cc = mr->m_computedCountry;
|
uint16_t cc = mr->m_computedCountry;
|
||||||
@ -4826,7 +4919,8 @@ bool printResult ( State0 *st, long ix , long *numPrintedSoFar ) {
|
|||||||
"urls=");
|
"urls=");
|
||||||
sb->urlEncode ( url , gbstrlen(url) , false );
|
sb->urlEncode ( url , gbstrlen(url) , false );
|
||||||
unsigned long long rand64 = gettimeofdayInMillisecondsLocal();
|
unsigned long long rand64 = gettimeofdayInMillisecondsLocal();
|
||||||
sb->safePrintf("&rand64=%llu\">respider</a>\n",rand64);
|
sb->safePrintf("&c=%s&rand64=%llu\">respider</a>\n",
|
||||||
|
coll,rand64);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( si->m_format == FORMAT_HTML ) {
|
if ( si->m_format == FORMAT_HTML ) {
|
||||||
@ -4955,6 +5049,7 @@ bool printResult ( State0 *st, long ix , long *numPrintedSoFar ) {
|
|||||||
coll );//, dbuf );
|
coll );//, dbuf );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if ( si->m_format == FORMAT_HTML && ( isAdmin || cr->m_isCustomCrawl)){
|
if ( si->m_format == FORMAT_HTML && ( isAdmin || cr->m_isCustomCrawl)){
|
||||||
char *un = "";
|
char *un = "";
|
||||||
long banVal = 1;
|
long banVal = 1;
|
||||||
@ -6308,7 +6403,7 @@ bool printScoresHeader ( SafeBuf *sb ) {
|
|||||||
"<td>spam</td>"
|
"<td>spam</td>"
|
||||||
"<td>inlnkPR</td>" // nlinkSiteRank</td>"
|
"<td>inlnkPR</td>" // nlinkSiteRank</td>"
|
||||||
"<td>termFreq</td>"
|
"<td>termFreq</td>"
|
||||||
"</tr>"
|
"</tr>\n"
|
||||||
);
|
);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -6532,9 +6627,9 @@ bool printSingleScore ( SafeBuf *sb ,
|
|||||||
|
|
||||||
|
|
||||||
sb->safePrintf("<tr>"
|
sb->safePrintf("<tr>"
|
||||||
"<td rowspan=2>%.03f</td>"
|
"<td rowspan=2>%.03f</td>\n"
|
||||||
"<td>%s <font color=orange>%.1f"
|
"<td>%s <font color=orange>%.1f"
|
||||||
"</font></td>"
|
"</font></td\n>"
|
||||||
// wordpos
|
// wordpos
|
||||||
"<td>"
|
"<td>"
|
||||||
"<a href=\"/get?d="
|
"<a href=\"/get?d="
|
||||||
@ -6548,17 +6643,17 @@ bool printSingleScore ( SafeBuf *sb ,
|
|||||||
"hipos=%li&c=%s#hipos\">"
|
"hipos=%li&c=%s#hipos\">"
|
||||||
,(long)ss->m_wordPos
|
,(long)ss->m_wordPos
|
||||||
,si->m_cr->m_coll);
|
,si->m_cr->m_coll);
|
||||||
sb->safePrintf("%li</a></td>"
|
sb->safePrintf("%li</a></td>\n"
|
||||||
"<td>%s <font color=blue>%.1f"
|
"<td>%s <font color=blue>%.1f"
|
||||||
"</font></td>" // syn
|
"</font></td>\n" // syn
|
||||||
|
|
||||||
// wikibigram?/weight
|
// wikibigram?/weight
|
||||||
"<td>%s <font color=green>%.02f</font></td>"
|
"<td>%s <font color=green>%.02f</font></td>\n"
|
||||||
|
|
||||||
//"<td>%li/<font color=green>%f"
|
//"<td>%li/<font color=green>%f"
|
||||||
//"</font></td>" // diversity
|
//"</font></td>" // diversity
|
||||||
"<td>%li <font color=purple>"
|
"<td>%li <font color=purple>"
|
||||||
"%.02f</font></td>" // density
|
"%.02f</font></td>\n" // density
|
||||||
, (long)ss->m_wordPos
|
, (long)ss->m_wordPos
|
||||||
, syn
|
, syn
|
||||||
, sw // synonym weight
|
, sw // synonym weight
|
||||||
@ -6572,7 +6667,7 @@ bool printSingleScore ( SafeBuf *sb ,
|
|||||||
if ( ss->m_hashGroup == HASHGROUP_INLINKTEXT ) {
|
if ( ss->m_hashGroup == HASHGROUP_INLINKTEXT ) {
|
||||||
sb->safePrintf("<td> </td>"
|
sb->safePrintf("<td> </td>"
|
||||||
"<td>%li <font color=red>%.02f"
|
"<td>%li <font color=red>%.02f"
|
||||||
"</font></td>" // wordspam
|
"</font></td>\n" // wordspam
|
||||||
, (long)ss->m_wordSpamRank
|
, (long)ss->m_wordSpamRank
|
||||||
, wsw
|
, wsw
|
||||||
);
|
);
|
||||||
@ -6580,7 +6675,7 @@ bool printSingleScore ( SafeBuf *sb ,
|
|||||||
else {
|
else {
|
||||||
sb->safePrintf("<td>%li <font color=red>%.02f"
|
sb->safePrintf("<td>%li <font color=red>%.02f"
|
||||||
"</font></td>" // wordspam
|
"</font></td>" // wordspam
|
||||||
"<td> </td>"
|
"<td> </td>\n"
|
||||||
, (long)ss->m_wordSpamRank
|
, (long)ss->m_wordSpamRank
|
||||||
, wsw
|
, wsw
|
||||||
);
|
);
|
||||||
@ -6588,8 +6683,8 @@ bool printSingleScore ( SafeBuf *sb ,
|
|||||||
}
|
}
|
||||||
|
|
||||||
sb->safePrintf("<td id=tf>%lli <font color=magenta>"
|
sb->safePrintf("<td id=tf>%lli <font color=magenta>"
|
||||||
"%.02f</font></td>" // termfreq
|
"%.02f</font></td>\n" // termfreq
|
||||||
"</tr>"
|
"</tr>\n"
|
||||||
, tf
|
, tf
|
||||||
, tfw
|
, tfw
|
||||||
);
|
);
|
||||||
@ -6624,7 +6719,7 @@ bool printSingleScore ( SafeBuf *sb ,
|
|||||||
"<font color=magenta>%.02f</font>"
|
"<font color=magenta>%.02f</font>"
|
||||||
//" / ( 3.0 )"
|
//" / ( 3.0 )"
|
||||||
// end formula
|
// end formula
|
||||||
"</td></tr>"
|
"</td></tr>\n"
|
||||||
, ss->m_finalScore
|
, ss->m_finalScore
|
||||||
//, (long)MAXWORDPOS+1
|
//, (long)MAXWORDPOS+1
|
||||||
, hgw
|
, hgw
|
||||||
@ -7298,6 +7393,11 @@ bool printLogoAndSearchBox ( SafeBuf *sb , HttpRequest *hr , long catId ,
|
|||||||
long qlen;
|
long qlen;
|
||||||
char *qstr = hr->getString("q",&qlen,"",NULL);
|
char *qstr = hr->getString("q",&qlen,"",NULL);
|
||||||
sb->htmlEncode ( qstr , qlen , false );
|
sb->htmlEncode ( qstr , qlen , false );
|
||||||
|
|
||||||
|
// if it was an advanced search, this can be empty
|
||||||
|
if ( qlen == 0 && si->m_displayQuery )
|
||||||
|
sb->htmlEncode ( si->m_displayQuery );
|
||||||
|
|
||||||
sb->safePrintf ("\">"
|
sb->safePrintf ("\">"
|
||||||
//"<input type=submit value=\"Search\" border=0>"
|
//"<input type=submit value=\"Search\" border=0>"
|
||||||
|
|
||||||
@ -8677,40 +8777,66 @@ bool printSearchFiltersBar ( SafeBuf *sb , HttpRequest *hr ) {
|
|||||||
n++;
|
n++;
|
||||||
|
|
||||||
|
|
||||||
|
// family filter
|
||||||
|
s_mi[n].m_menuNum = 8;
|
||||||
|
s_mi[n].m_title = "Family Filter Off";
|
||||||
|
s_mi[n].m_cgi = "ff=0";
|
||||||
|
s_mi[n].m_icon = NULL;
|
||||||
|
n++;
|
||||||
|
|
||||||
|
s_mi[n].m_menuNum = 8;
|
||||||
|
s_mi[n].m_title = "Family Filter On";
|
||||||
|
s_mi[n].m_cgi = "ff=1";
|
||||||
|
s_mi[n].m_icon = NULL;
|
||||||
|
n++;
|
||||||
|
|
||||||
|
// META TAGS
|
||||||
|
s_mi[n].m_menuNum = 9;
|
||||||
|
s_mi[n].m_title = "No Meta Tags";
|
||||||
|
s_mi[n].m_cgi = "dt=";
|
||||||
|
s_mi[n].m_icon = NULL;
|
||||||
|
n++;
|
||||||
|
|
||||||
|
s_mi[n].m_menuNum = 9;
|
||||||
|
s_mi[n].m_title = "Show Meta Tags";
|
||||||
|
s_mi[n].m_cgi = "dt=keywords+description";
|
||||||
|
s_mi[n].m_icon = NULL;
|
||||||
|
n++;
|
||||||
|
|
||||||
|
|
||||||
// ADMIN
|
// ADMIN
|
||||||
|
|
||||||
s_mi[n].m_menuNum = 8;
|
s_mi[n].m_menuNum = 10;
|
||||||
s_mi[n].m_title = "Show Admin View";
|
s_mi[n].m_title = "Show Admin View";
|
||||||
s_mi[n].m_cgi = "admin=1";
|
s_mi[n].m_cgi = "admin=1";
|
||||||
s_mi[n].m_icon = NULL;
|
s_mi[n].m_icon = NULL;
|
||||||
n++;
|
n++;
|
||||||
|
|
||||||
s_mi[n].m_menuNum = 8;
|
s_mi[n].m_menuNum = 10;
|
||||||
s_mi[n].m_title = "Show User View";
|
s_mi[n].m_title = "Show User View";
|
||||||
s_mi[n].m_cgi = "admin=0";
|
s_mi[n].m_cgi = "admin=0";
|
||||||
s_mi[n].m_icon = NULL;
|
s_mi[n].m_icon = NULL;
|
||||||
n++;
|
n++;
|
||||||
|
|
||||||
s_mi[n].m_menuNum = 9;
|
s_mi[n].m_menuNum = 11;
|
||||||
s_mi[n].m_title = "Action";
|
s_mi[n].m_title = "Action";
|
||||||
s_mi[n].m_cgi = "";
|
s_mi[n].m_cgi = "";
|
||||||
s_mi[n].m_icon = NULL;
|
s_mi[n].m_icon = NULL;
|
||||||
n++;
|
n++;
|
||||||
|
|
||||||
s_mi[n].m_menuNum = 9;
|
s_mi[n].m_menuNum = 11;
|
||||||
s_mi[n].m_title = "Respider all results";
|
s_mi[n].m_title = "Respider all results";
|
||||||
s_mi[n].m_cgi = "/admin/reindex";
|
s_mi[n].m_cgi = "/admin/reindex";
|
||||||
s_mi[n].m_icon = NULL;
|
s_mi[n].m_icon = NULL;
|
||||||
n++;
|
n++;
|
||||||
|
|
||||||
s_mi[n].m_menuNum = 9;
|
s_mi[n].m_menuNum = 11;
|
||||||
s_mi[n].m_title = "Delete all results";
|
s_mi[n].m_title = "Delete all results";
|
||||||
s_mi[n].m_cgi = "/admin/reindex";
|
s_mi[n].m_cgi = "/admin/reindex";
|
||||||
s_mi[n].m_icon = NULL;
|
s_mi[n].m_icon = NULL;
|
||||||
n++;
|
n++;
|
||||||
|
|
||||||
s_mi[n].m_menuNum = 9;
|
s_mi[n].m_menuNum = 11;
|
||||||
s_mi[n].m_title = "Scrape from google/bing";
|
s_mi[n].m_title = "Scrape from google/bing";
|
||||||
s_mi[n].m_cgi = "/admin/inject";
|
s_mi[n].m_cgi = "/admin/inject";
|
||||||
s_mi[n].m_icon = NULL;
|
s_mi[n].m_icon = NULL;
|
||||||
@ -8729,10 +8855,12 @@ bool printSearchFiltersBar ( SafeBuf *sb , HttpRequest *hr ) {
|
|||||||
for ( long i = 0 ; i <= s_mi[s_num-1].m_menuNum ; i++ ) {
|
for ( long i = 0 ; i <= s_mi[s_num-1].m_menuNum ; i++ ) {
|
||||||
// after 4 make a new line
|
// after 4 make a new line
|
||||||
if ( i == 5 ) sb->safePrintf("<br><br>");
|
if ( i == 5 ) sb->safePrintf("<br><br>");
|
||||||
|
if ( i == 9 ) sb->safePrintf("<br><br>");
|
||||||
printMenu ( sb , i , hr );
|
printMenu ( sb , i , hr );
|
||||||
}
|
}
|
||||||
|
|
||||||
sb->safePrintf("</div>\n");
|
sb->safePrintf("</div>\n");
|
||||||
|
sb->safePrintf("<br>\n");
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -8829,6 +8957,9 @@ bool printMenu ( SafeBuf *sb , long menuNum , HttpRequest *hr ) {
|
|||||||
//" onmouseout=\""
|
//" onmouseout=\""
|
||||||
//"this.style.display='none';\""
|
//"this.style.display='none';\""
|
||||||
|
|
||||||
|
// if clicking on scrollbar do not hide menu!
|
||||||
|
" onmousedown=\"inmenuclick=1;\" "
|
||||||
|
|
||||||
">"
|
">"
|
||||||
, mi->m_menuNum
|
, mi->m_menuNum
|
||||||
);
|
);
|
||||||
@ -8923,6 +9054,7 @@ bool printMenu ( SafeBuf *sb , long menuNum , HttpRequest *hr ) {
|
|||||||
, frontTag
|
, frontTag
|
||||||
, first->m_title
|
, first->m_title
|
||||||
, backTag
|
, backTag
|
||||||
|
// print triangle
|
||||||
,0xe2
|
,0xe2
|
||||||
,0x96
|
,0x96
|
||||||
,0xbc
|
,0xbc
|
||||||
@ -8937,6 +9069,15 @@ bool replaceParm ( char *cgi , SafeBuf *newUrl , HttpRequest *hr ) {
|
|||||||
// get original request url. this is not \0 terminated
|
// get original request url. this is not \0 terminated
|
||||||
char *src = hr->m_origUrlRequest;
|
char *src = hr->m_origUrlRequest;
|
||||||
long srcLen = hr->m_origUrlRequestLen;
|
long srcLen = hr->m_origUrlRequestLen;
|
||||||
|
return replaceParm2 ( cgi ,newUrl, src, srcLen );
|
||||||
|
}
|
||||||
|
|
||||||
|
bool replaceParm2 ( char *cgi , SafeBuf *newUrl ,
|
||||||
|
char *oldUrl , long oldUrlLen ) {
|
||||||
|
|
||||||
|
char *src = oldUrl;
|
||||||
|
long srcLen = oldUrlLen;
|
||||||
|
|
||||||
char *srcEnd = src + srcLen;
|
char *srcEnd = src + srcLen;
|
||||||
|
|
||||||
char *equal = strstr(cgi,"=");
|
char *equal = strstr(cgi,"=");
|
||||||
@ -8985,3 +9126,90 @@ bool replaceParm ( char *cgi , SafeBuf *newUrl , HttpRequest *hr ) {
|
|||||||
if ( ! newUrl->nullTerm() ) return false;
|
if ( ! newUrl->nullTerm() ) return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool printMetaContent ( Msg40 *msg40 , long i , State0 *st, SafeBuf *sb ) {
|
||||||
|
// store the user-requested meta tags content
|
||||||
|
SearchInput *si = &st->m_si;
|
||||||
|
char *pp = si->m_displayMetas;
|
||||||
|
char *ppend = pp + gbstrlen(si->m_displayMetas);
|
||||||
|
Msg20 *m = msg40->m_msg20[i];//getMsg20(i);
|
||||||
|
Msg20Reply *mr = m->m_r;
|
||||||
|
char *dbuf = mr->ptr_dbuf;//msg40->getDisplayBuf(i);
|
||||||
|
long dbufLen = mr->size_dbuf-1;//msg40->getDisplayBufLen(i);
|
||||||
|
char *dbufEnd = dbuf + (dbufLen-1);
|
||||||
|
char *dptr = dbuf;
|
||||||
|
//bool printedSomething = false;
|
||||||
|
// loop over the names of the requested meta tags
|
||||||
|
while ( pp < ppend && dptr < dbufEnd ) {
|
||||||
|
// . assure last byte of dbuf is \0
|
||||||
|
// provided dbufLen > 0
|
||||||
|
// . this insures sprintf and gbstrlen won't
|
||||||
|
// crash on dbuf/dptr
|
||||||
|
if ( dbuf [ dbufLen ] != '\0' ) {
|
||||||
|
log(LOG_LOGIC,"query: Meta tag buffer has no \\0.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// skip initial spaces
|
||||||
|
while ( pp < ppend && is_wspace_a(*pp) ) pp++;
|
||||||
|
// break if done
|
||||||
|
if ( ! *pp ) break;
|
||||||
|
// that's the start of the meta tag name
|
||||||
|
char *ss = pp;
|
||||||
|
// . find end of that meta tag name
|
||||||
|
// . can end in :<integer> -- specifies max len
|
||||||
|
while ( pp < ppend && ! is_wspace_a(*pp) &&
|
||||||
|
*pp != ':' ) pp++;
|
||||||
|
// save current char
|
||||||
|
char c = *pp;
|
||||||
|
char *cp = pp;
|
||||||
|
// NULL terminate the name
|
||||||
|
*pp++ = '\0';
|
||||||
|
// if ':' was specified, skip the rest
|
||||||
|
if ( c == ':' ) while ( pp < ppend && ! is_wspace_a(*pp)) pp++;
|
||||||
|
// print the name
|
||||||
|
//long sslen = gbstrlen ( ss );
|
||||||
|
//long ddlen = gbstrlen ( dptr );
|
||||||
|
long ddlen = dbufLen;
|
||||||
|
//if ( p + sslen + ddlen + 100 > pend ) continue;
|
||||||
|
// newspaperarchive wants tags printed even if no value
|
||||||
|
// make sure the meta tag isn't fucked up
|
||||||
|
for ( long ti = 0; ti < ddlen; ti++ ) {
|
||||||
|
if ( dptr[ti] == '"' ||
|
||||||
|
dptr[ti] == '>' ||
|
||||||
|
dptr[ti] == '<' ||
|
||||||
|
dptr[ti] == '\r' ||
|
||||||
|
dptr[ti] == '\n' ||
|
||||||
|
dptr[ti] == '\0' ) {
|
||||||
|
ddlen = ti;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ddlen > 0 ) {
|
||||||
|
// ship it out
|
||||||
|
if ( si->m_format == FORMAT_XML ) {
|
||||||
|
sb->safePrintf ( "\t\t<display name=\"%s\">"
|
||||||
|
"<![CDATA[", ss );
|
||||||
|
sb->cdataEncode ( dptr, ddlen );
|
||||||
|
sb->safePrintf ( "]]></display>\n" );
|
||||||
|
}
|
||||||
|
else if ( si->m_format == FORMAT_JSON ) {
|
||||||
|
sb->safePrintf ( "\t\t\"display.%s\":\"",ss);
|
||||||
|
sb->jsonEncode ( dptr, ddlen );
|
||||||
|
sb->safePrintf ( "\",\n");
|
||||||
|
}
|
||||||
|
// otherwise, print in light gray
|
||||||
|
else {
|
||||||
|
sb->safePrintf("<font color=#c62939>"
|
||||||
|
"<b>%s</b>: ", ss );
|
||||||
|
sb->safeMemcpy ( dptr, ddlen );
|
||||||
|
sb->safePrintf ( "</font><br>" );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// restore tag name buffer
|
||||||
|
*cp = c;
|
||||||
|
// point to next content of tag to display
|
||||||
|
dptr += ddlen + 1;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
164
PageRoot.cpp
164
PageRoot.cpp
@ -654,10 +654,10 @@ bool printLeftColumnRocketAndTabs ( SafeBuf *sb ,
|
|||||||
|
|
||||||
{"SEARCH","/"},
|
{"SEARCH","/"},
|
||||||
|
|
||||||
// {"IMAGES","/?searchtype=images"},
|
{"DISCUSSIONS","/?searchtype=discussions"},
|
||||||
// {"PRODUCTS","/?searchtype=products"},
|
{"PRODUCTS","/?searchtype=products"},
|
||||||
// {"ARTICLES","/?searchtype=articles"},
|
{"ARTICLES","/?searchtype=articles"},
|
||||||
// {"DISCUSSIONS","/?searchtype=discussions"},
|
{"IMAGES","/?searchtype=images"},
|
||||||
|
|
||||||
{"DIRECTORY","/Top"},
|
{"DIRECTORY","/Top"},
|
||||||
{"ADVANCED","/adv.html"},
|
{"ADVANCED","/adv.html"},
|
||||||
@ -679,7 +679,7 @@ bool printLeftColumnRocketAndTabs ( SafeBuf *sb ,
|
|||||||
// first the nav column
|
// first the nav column
|
||||||
//
|
//
|
||||||
sb->safePrintf(
|
sb->safePrintf(
|
||||||
"<TD bgcolor=#f3c714 " // yellow/gold
|
"<TD bgcolor=#%s " // f3c714 " // yellow/gold
|
||||||
"valign=top "
|
"valign=top "
|
||||||
"style=\"width:210px;"
|
"style=\"width:210px;"
|
||||||
"border-right:3px solid blue;"
|
"border-right:3px solid blue;"
|
||||||
@ -699,6 +699,7 @@ bool printLeftColumnRocketAndTabs ( SafeBuf *sb ,
|
|||||||
"width:100px;"
|
"width:100px;"
|
||||||
"height:100px;"
|
"height:100px;"
|
||||||
"\">"
|
"\">"
|
||||||
|
, GOLD
|
||||||
, coll
|
, coll
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -707,7 +708,8 @@ bool printLeftColumnRocketAndTabs ( SafeBuf *sb ,
|
|||||||
"height=57 src=/computer2.png>");
|
"height=57 src=/computer2.png>");
|
||||||
else
|
else
|
||||||
sb->safePrintf("<br style=line-height:10px;>"
|
sb->safePrintf("<br style=line-height:10px;>"
|
||||||
"<img width=54 height=79 src=/rocket.jpg>"
|
"<img border=0 "
|
||||||
|
"width=54 height=79 src=/rocket.jpg>"
|
||||||
);
|
);
|
||||||
|
|
||||||
sb->safePrintf ( "</div>"
|
sb->safePrintf ( "</div>"
|
||||||
@ -725,6 +727,10 @@ bool printLeftColumnRocketAndTabs ( SafeBuf *sb ,
|
|||||||
|
|
||||||
if ( isSearchResultsPage && i >= 5 ) break;
|
if ( isSearchResultsPage && i >= 5 ) break;
|
||||||
|
|
||||||
|
if ( i >= 1 && i <= 4 &&
|
||||||
|
cr->m_diffbotApiUrl.length()<= 0 )
|
||||||
|
continue;
|
||||||
|
|
||||||
char delim = '?';
|
char delim = '?';
|
||||||
if ( strstr ( mi[i].m_url,"?") ) delim = '&';
|
if ( strstr ( mi[i].m_url,"?") ) delim = '&';
|
||||||
|
|
||||||
@ -1042,14 +1048,16 @@ bool printWebHomePage ( SafeBuf &sb , HttpRequest *r , TcpSocket *sock ) {
|
|||||||
|
|
||||||
// put search box in a box
|
// put search box in a box
|
||||||
sb.safePrintf("<div style="
|
sb.safePrintf("<div style="
|
||||||
"background-color:#fcc714;"
|
"background-color:#%s;"//fcc714;"
|
||||||
"border-style:solid;"
|
"border-style:solid;"
|
||||||
"border-width:3px;"
|
"border-width:3px;"
|
||||||
"border-color:blue;"
|
"border-color:blue;"
|
||||||
//"background-color:blue;"
|
//"background-color:blue;"
|
||||||
"padding:20px;"
|
"padding:20px;"
|
||||||
"border-radius:20px;"
|
"border-radius:20px;"
|
||||||
">");
|
">"
|
||||||
|
,GOLD
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
sb.safePrintf("<input name=q type=text "
|
sb.safePrintf("<input name=q type=text "
|
||||||
@ -1113,8 +1121,79 @@ bool printWebHomePage ( SafeBuf &sb , HttpRequest *r , TcpSocket *sock ) {
|
|||||||
sb.safePrintf("\n");
|
sb.safePrintf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// always the option to add event guru to their list of
|
||||||
|
// search engine in their browser
|
||||||
|
sb.safePrintf("<br>"
|
||||||
|
//"<br>"
|
||||||
|
|
||||||
|
"<script>\n"
|
||||||
|
"function addEngine() {\n"
|
||||||
|
"if (window.external && "
|
||||||
|
"('AddSearchProvider' in window.external)) {\n"
|
||||||
|
// Firefox 2 and IE 7, OpenSearch
|
||||||
|
"window.external.AddSearchProvider('http://"
|
||||||
|
"www.gigablast.com/searchbar.xml');\n"
|
||||||
|
"}\n"
|
||||||
|
"else if (window.sidebar && ('addSearchEngine' "
|
||||||
|
"in window.sidebar)) {\n"
|
||||||
|
// Firefox <= 1.5, Sherlock
|
||||||
|
"window.sidebar.addSearchEngine('http://"
|
||||||
|
"www.gigablast.com/searchbar.xml',"
|
||||||
|
//"example.com/search-plugin.src',"
|
||||||
|
"'http://www.gigablast.com/rocket.jpg'," //guru.png
|
||||||
|
"'Search Plugin', '');\n"
|
||||||
|
"}\n"
|
||||||
|
"else {"
|
||||||
|
// No search engine support (IE 6, Opera, etc).
|
||||||
|
"alert('No search engine support');\n"
|
||||||
|
"}\n"
|
||||||
|
// do not ask again if they tried to add it
|
||||||
|
// meta cookie should store this
|
||||||
|
//"document.getElementById('addedse').value='1';\n"
|
||||||
|
// NEVER ask again! permanent cookie
|
||||||
|
"document.cookie = 'didse=3';"
|
||||||
|
// make it invisible again
|
||||||
|
//"var e = document.getElementById('addse');\n"
|
||||||
|
//"e.style.display = 'none';\n"
|
||||||
|
"}\n"
|
||||||
|
|
||||||
|
|
||||||
|
"</script>\n"
|
||||||
|
|
||||||
|
|
||||||
|
"<center>"
|
||||||
|
"<a onclick='addEngine();' style="
|
||||||
|
"cursor:pointer;"
|
||||||
|
"cursor:hand;"
|
||||||
|
"color:blue;"
|
||||||
|
">"
|
||||||
|
|
||||||
|
"<img height=16 width=16 border=0 src=/rocket16.png>"
|
||||||
|
|
||||||
|
"<font color=#505050>"
|
||||||
|
"%c%c%c "
|
||||||
|
"</font>"
|
||||||
|
|
||||||
|
" "
|
||||||
|
|
||||||
|
"Add Gigablast to your browser's "
|
||||||
|
"search engines"
|
||||||
|
"</a>"
|
||||||
|
"</center>"
|
||||||
|
"<br>"
|
||||||
|
"<br>"
|
||||||
|
|
||||||
|
// print triangle
|
||||||
|
,0xe2
|
||||||
|
,0x96
|
||||||
|
,0xbc
|
||||||
|
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
// print any red boxes we might need to
|
// print any red boxes we might need to
|
||||||
if ( printRedBox2 ( &sb , true ) )
|
if ( printRedBox2 ( &sb , sock , r ) ) // true ) )
|
||||||
sb.safePrintf("<br>\n");
|
sb.safePrintf("<br>\n");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1280,9 +1359,6 @@ bool printWebHomePage ( SafeBuf &sb , HttpRequest *r , TcpSocket *sock ) {
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// begin new stuff
|
// begin new stuff
|
||||||
//
|
//
|
||||||
@ -1352,6 +1428,7 @@ bool printWebHomePage ( SafeBuf &sb , HttpRequest *r , TcpSocket *sock ) {
|
|||||||
|
|
||||||
sb.safePrintf("</div>");
|
sb.safePrintf("</div>");
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
sb.safePrintf("<div class=grad style=\"border-radius:200px;border-color:blue;border-style:solid;border-width:3px;padding:12px;width:280px;height:280px;display:inline-block;z-index:105;color:black;margin-left:-50px;position:absolute;margin-top:50px;background-color:lightgray;\">");
|
sb.safePrintf("<div class=grad style=\"border-radius:200px;border-color:blue;border-style:solid;border-width:3px;padding:12px;width:280px;height:280px;display:inline-block;z-index:105;color:black;margin-left:-50px;position:absolute;margin-top:50px;background-color:lightgray;\">");
|
||||||
|
|
||||||
@ -1378,6 +1455,7 @@ bool printWebHomePage ( SafeBuf &sb , HttpRequest *r , TcpSocket *sock ) {
|
|||||||
sb.safePrintf("</div>");
|
sb.safePrintf("</div>");
|
||||||
|
|
||||||
sb.safePrintf("</div>");
|
sb.safePrintf("</div>");
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
sb.safePrintf("<div class=grad style=\"border-radius:300px;border-color:blue;border-style:solid;border-width:3px;padding:12px;width:240px;height:240px;display:inline-block;z-index:110;color:black;margin-left:-240px;position:absolute;margin-top:230px;background-color:lightgray;\">");
|
sb.safePrintf("<div class=grad style=\"border-radius:300px;border-color:blue;border-style:solid;border-width:3px;padding:12px;width:240px;height:240px;display:inline-block;z-index:110;color:black;margin-left:-240px;position:absolute;margin-top:230px;background-color:lightgray;\">");
|
||||||
@ -1399,16 +1477,49 @@ bool printWebHomePage ( SafeBuf &sb , HttpRequest *r , TcpSocket *sock ) {
|
|||||||
|
|
||||||
sb.safePrintf("</div>");
|
sb.safePrintf("</div>");
|
||||||
|
|
||||||
|
//
|
||||||
|
// donate with paypal bubble
|
||||||
|
//
|
||||||
|
|
||||||
|
sb.safePrintf("<div class=grad style=\"border-radius:300px;border-color:blue;border-style:solid;border-width:3px;padding:12px;width:180px;height:180px;display:inline-block;z-index:120;color:black;margin-left:10px;position:absolute;margin-top:270px;background-color:lightgray;\">");
|
||||||
|
|
||||||
|
sb.safePrintf("<br>");
|
||||||
|
sb.safePrintf("<b>");
|
||||||
|
sb.safePrintf("<font style=font-size:18px;margin-left:40px;>");
|
||||||
|
sb.safePrintf("Contribute");
|
||||||
|
sb.safePrintf("</font>");
|
||||||
|
sb.safePrintf("<br>");
|
||||||
|
sb.safePrintf("<br>");
|
||||||
|
sb.safePrintf("</b>");
|
||||||
|
|
||||||
|
sb.safePrintf("<div style=margin-left:15px;margin-right:5px;>");
|
||||||
|
|
||||||
|
|
||||||
//sb.safePrintf("</TD></TR></TABLE></body></html>");
|
sb.safePrintf(
|
||||||
|
|
||||||
|
"Help Gigablast development with PayPal."
|
||||||
|
"<br>"
|
||||||
|
"<br>"
|
||||||
|
// BEGIN PAYPAL DONATE BUTTON
|
||||||
|
"<form action=\"https://www.paypal.com/cgi-bin/webscr\" method=\"post\" target=\"_top\">"
|
||||||
|
"<input type=\"hidden\" name=\"cmd\" value=\"_donations\">"
|
||||||
|
"<input type=\"hidden\" name=\"business\" value=\"2SFSFLUY3KS9Y\">"
|
||||||
|
"<input type=\"hidden\" name=\"lc\" value=\"US\">"
|
||||||
|
"<input type=\"hidden\" name=\"item_name\" value=\"Gigablast, Inc.\">"
|
||||||
|
"<input type=\"hidden\" name=\"currency_code\" value=\"USD\">"
|
||||||
|
"<input type=\"hidden\" name=\"bn\" value=\"PP-DonationsBF:btn_donateCC_LG.gif:NonHosted\">"
|
||||||
|
"<input type=\"image\" src=\"https://www.paypalobjects.com/en_US/i/btn/btn_donateCC_LG.gif\" border=\"0\" name=\"submit\" alt=\"PayPal - The safer, easier way to pay online!\" height=47 width=147>"
|
||||||
|
"<img alt=\"\" border=\"0\" src=\"https://www.paypalobjects.com/en_US/i/scr/pixel.gif\" width=\"1\" height=\"1\">"
|
||||||
|
"</form>"
|
||||||
|
// END PAYPAY BUTTON
|
||||||
|
"</center></div></center>"
|
||||||
|
//"</td>\n"
|
||||||
|
);
|
||||||
|
|
||||||
//
|
//
|
||||||
// end new stuff
|
// end new stuff
|
||||||
//
|
//
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
sb.safePrintf("\n");
|
sb.safePrintf("\n");
|
||||||
sb.safePrintf("\n");
|
sb.safePrintf("\n");
|
||||||
@ -1466,14 +1577,16 @@ bool printAddUrlHomePage ( SafeBuf &sb , char *url , HttpRequest *r ) {
|
|||||||
|
|
||||||
// put search box in a box
|
// put search box in a box
|
||||||
sb.safePrintf("<div style="
|
sb.safePrintf("<div style="
|
||||||
"background-color:#fcc714;"
|
"background-color:#%s;" // fcc714;"
|
||||||
"border-style:solid;"
|
"border-style:solid;"
|
||||||
"border-width:3px;"
|
"border-width:3px;"
|
||||||
"border-color:blue;"
|
"border-color:blue;"
|
||||||
//"background-color:blue;"
|
//"background-color:blue;"
|
||||||
"padding:20px;"
|
"padding:20px;"
|
||||||
"border-radius:20px;"
|
"border-radius:20px;"
|
||||||
">");
|
">"
|
||||||
|
, GOLD
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
sb.safePrintf("<input name=urls type=text "
|
sb.safePrintf("<input name=urls type=text "
|
||||||
@ -1637,14 +1750,16 @@ bool printDirHomePage ( SafeBuf &sb , HttpRequest *r ) {
|
|||||||
|
|
||||||
// put search box in a box
|
// put search box in a box
|
||||||
sb.safePrintf("<div style="
|
sb.safePrintf("<div style="
|
||||||
"background-color:#fcc714;"
|
"background-color:#%s;" // fcc714;"
|
||||||
"border-style:solid;"
|
"border-style:solid;"
|
||||||
"border-width:3px;"
|
"border-width:3px;"
|
||||||
"border-color:blue;"
|
"border-color:blue;"
|
||||||
//"background-color:blue;"
|
//"background-color:blue;"
|
||||||
"padding:20px;"
|
"padding:20px;"
|
||||||
"border-radius:20px;"
|
"border-radius:20px;"
|
||||||
">");
|
">"
|
||||||
|
,GOLD
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
sb.safePrintf("<input name=q type=text "
|
sb.safePrintf("<input name=q type=text "
|
||||||
@ -2627,7 +2742,7 @@ void resetPageAddUrl ( ) {
|
|||||||
s_htable.reset();
|
s_htable.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
bool sendPageAdvanced ( TcpSocket *sock , HttpRequest *hr ) {
|
bool sendPageAdvanced ( TcpSocket *sock , HttpRequest *hr ) {
|
||||||
|
|
||||||
SafeBuf sb;
|
SafeBuf sb;
|
||||||
@ -2802,7 +2917,7 @@ bool sendPageAdvanced ( TcpSocket *sock , HttpRequest *hr ) {
|
|||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
|
bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
|
||||||
|
|
||||||
@ -2833,7 +2948,7 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
|
|||||||
|
|
||||||
// yellow/gold bar
|
// yellow/gold bar
|
||||||
"<tr>"
|
"<tr>"
|
||||||
"<td colspan=2 bgcolor=#f3c714>"
|
"<td colspan=2 bgcolor=#%s>" // f3c714>"
|
||||||
"<b>"
|
"<b>"
|
||||||
"Basic Query Syntax"
|
"Basic Query Syntax"
|
||||||
"</b>"
|
"</b>"
|
||||||
@ -2946,6 +3061,7 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
|
|||||||
// " </tr>"
|
// " </tr>"
|
||||||
// ""
|
// ""
|
||||||
// ""
|
// ""
|
||||||
|
, GOLD
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
@ -3002,7 +3118,7 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
|
|||||||
"cellpadding=5 cellspacing=0 border=0>"
|
"cellpadding=5 cellspacing=0 border=0>"
|
||||||
// yellow/gold bar
|
// yellow/gold bar
|
||||||
"<tr>"
|
"<tr>"
|
||||||
"<td colspan=2 bgcolor=#f3c714>"
|
"<td colspan=2 bgcolor=#%s>"//f3c714>"
|
||||||
"<b>"
|
"<b>"
|
||||||
"%s"
|
"%s"
|
||||||
"</b>"
|
"</b>"
|
||||||
@ -3014,6 +3130,7 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
|
|||||||
"<th><font color=33dcff>"
|
"<th><font color=33dcff>"
|
||||||
"Description</font></th>"
|
"Description</font></th>"
|
||||||
"</tr>\n"
|
"</tr>\n"
|
||||||
|
, GOLD
|
||||||
, g_fields[i].m_title
|
, g_fields[i].m_title
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -3055,7 +3172,7 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
|
|||||||
|
|
||||||
// yellow/gold bar
|
// yellow/gold bar
|
||||||
"<tr>"
|
"<tr>"
|
||||||
"<td colspan=2 bgcolor=#f3c714>"
|
"<td colspan=2 bgcolor=#%s>" // f3c714>"
|
||||||
"<b>"
|
"<b>"
|
||||||
"Boolean Queries"
|
"Boolean Queries"
|
||||||
"</b>"
|
"</b>"
|
||||||
@ -3160,6 +3277,7 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
|
|||||||
//"</td></tr>"
|
//"</td></tr>"
|
||||||
//"</table>"
|
//"</table>"
|
||||||
//"<br>"
|
//"<br>"
|
||||||
|
, GOLD
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
@ -983,6 +983,19 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
|
|||||||
"<td colspan=2 class=hdrow>"
|
"<td colspan=2 class=hdrow>"
|
||||||
"<center><b>Network</b></td></tr>\n"
|
"<center><b>Network</b></td></tr>\n"
|
||||||
|
|
||||||
|
"<tr class=poo><td><b>http server "
|
||||||
|
"bytes downloaded</b>"
|
||||||
|
"</td><td>%llu</td></tr>\n"
|
||||||
|
|
||||||
|
"<tr class=poo><td><b>http server "
|
||||||
|
"bytes downloaded (uncompressed)</b>"
|
||||||
|
"</td><td>%llu</td></tr>\n"
|
||||||
|
|
||||||
|
"<tr class=poo><td><b>http server "
|
||||||
|
"compression ratio</b>"
|
||||||
|
"</td><td>%.02f</td></tr>\n"
|
||||||
|
|
||||||
|
|
||||||
"<tr class=poo><td><b>ip1 bytes/packets in</b>"
|
"<tr class=poo><td><b>ip1 bytes/packets in</b>"
|
||||||
"</td><td>%llu / %llu</td></tr>\n"
|
"</td><td>%llu / %llu</td></tr>\n"
|
||||||
|
|
||||||
@ -1007,6 +1020,11 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
|
|||||||
|
|
||||||
,
|
,
|
||||||
TABLE_STYLE,
|
TABLE_STYLE,
|
||||||
|
|
||||||
|
g_httpServer.m_bytesDownloaded,
|
||||||
|
g_httpServer.m_uncompressedBytes,
|
||||||
|
g_httpServer.getCompressionRatio(),
|
||||||
|
|
||||||
g_udpServer.m_eth0BytesIn,
|
g_udpServer.m_eth0BytesIn,
|
||||||
g_udpServer.m_eth0PacketsIn,
|
g_udpServer.m_eth0PacketsIn,
|
||||||
|
|
||||||
@ -1030,6 +1048,15 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
|
|||||||
p.safePrintf (
|
p.safePrintf (
|
||||||
"\t<networkStats>\n"
|
"\t<networkStats>\n"
|
||||||
|
|
||||||
|
"\t\t<httpServerBytesDownloaded>%llu"
|
||||||
|
"</httpServerBytesDownloaded>\n"
|
||||||
|
|
||||||
|
"\t\t<httpServerBytesDownloadedUncompressed>%llu"
|
||||||
|
"</httpServerBytesDownloadedUncompressed>\n"
|
||||||
|
|
||||||
|
"\t\t<httpServerCompressionRatio>%.02f"
|
||||||
|
"</httpServerCompressionRatio>\n"
|
||||||
|
|
||||||
"\t\t<ip1BytesIn>%llu</ip1BytesIn>\n"
|
"\t\t<ip1BytesIn>%llu</ip1BytesIn>\n"
|
||||||
"\t\t<ip1PacketsIn>%llu</ip1PacketsIn>\n"
|
"\t\t<ip1PacketsIn>%llu</ip1PacketsIn>\n"
|
||||||
|
|
||||||
@ -1053,6 +1080,12 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
|
|||||||
"\t</networkStats>\n"
|
"\t</networkStats>\n"
|
||||||
|
|
||||||
,
|
,
|
||||||
|
|
||||||
|
g_httpServer.m_bytesDownloaded,
|
||||||
|
g_httpServer.m_uncompressedBytes,
|
||||||
|
g_httpServer.getCompressionRatio(),
|
||||||
|
|
||||||
|
|
||||||
g_udpServer.m_eth0BytesIn,
|
g_udpServer.m_eth0BytesIn,
|
||||||
g_udpServer.m_eth0PacketsIn,
|
g_udpServer.m_eth0PacketsIn,
|
||||||
|
|
||||||
@ -1075,6 +1108,12 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
|
|||||||
p.safePrintf (
|
p.safePrintf (
|
||||||
"\t\"networkStats\":{\n"
|
"\t\"networkStats\":{\n"
|
||||||
|
|
||||||
|
|
||||||
|
"\t\t\"httpServerBytesDownloaded\":%llu,\n"
|
||||||
|
"\t\t\"httpServerBytesDownloadedUncompressed\""
|
||||||
|
":%llu,\n"
|
||||||
|
"\t\t\"httpServerCompressionRatio\":%.02f,\n"
|
||||||
|
|
||||||
"\t\t\"ip1BytesIn\":%llu,\n"
|
"\t\t\"ip1BytesIn\":%llu,\n"
|
||||||
"\t\t\"ip1PacketsIn\":%llu,\n"
|
"\t\t\"ip1PacketsIn\":%llu,\n"
|
||||||
|
|
||||||
@ -1098,6 +1137,12 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
|
|||||||
"\t},\n"
|
"\t},\n"
|
||||||
|
|
||||||
,
|
,
|
||||||
|
|
||||||
|
g_httpServer.m_bytesDownloaded,
|
||||||
|
g_httpServer.m_uncompressedBytes,
|
||||||
|
g_httpServer.getCompressionRatio(),
|
||||||
|
|
||||||
|
|
||||||
g_udpServer.m_eth0BytesIn,
|
g_udpServer.m_eth0BytesIn,
|
||||||
g_udpServer.m_eth0PacketsIn,
|
g_udpServer.m_eth0PacketsIn,
|
||||||
|
|
||||||
|
451
Pages.cpp
451
Pages.cpp
@ -41,17 +41,20 @@ class WebPage {
|
|||||||
// otherwise you'll get a malformed error when running
|
// otherwise you'll get a malformed error when running
|
||||||
static long s_numPages = 0;
|
static long s_numPages = 0;
|
||||||
static WebPage s_pages[] = {
|
static WebPage s_pages[] = {
|
||||||
|
|
||||||
|
/*
|
||||||
// dummy pages
|
// dummy pages
|
||||||
{ PAGE_NOHOSTLINKS , "nohostlinks", 0, "host links", 0, 0,
|
{ PAGE_NOHOSTLINKS , "nohostlinks", 0, "host links", 0, 0,
|
||||||
"dummy page - if set in the users row then host links will not be "
|
"dummy page - if set in the users row then host links will not be "
|
||||||
" shown",
|
" shown",
|
||||||
NULL, 0 ,NULL,NULL,PG_NOAPI},
|
NULL, 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI},
|
||||||
|
|
||||||
{ PAGE_ADMIN , "colladmin", 0, "master=0", 0, 0,
|
{ PAGE_ADMIN , "colladmin", 0, "master=0", 0, 0,
|
||||||
"dummy page - if set in the users row then user will have master=0 and "
|
"dummy page - if set in the users row then user will have master=0 and "
|
||||||
" collection links will be highlighted in red",
|
" collection links will be highlighted in red",
|
||||||
NULL, 0 ,NULL,NULL,PG_NOAPI},
|
NULL, 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI},
|
||||||
|
|
||||||
|
|
||||||
//{ PAGE_QUALITY , "quality", 0, "quality", 0, 0,
|
//{ PAGE_QUALITY , "quality", 0, "quality", 0, 0,
|
||||||
// "dummy page - if set in the users row then \"Quality Control\""
|
// "dummy page - if set in the users row then \"Quality Control\""
|
||||||
@ -61,14 +64,19 @@ static WebPage s_pages[] = {
|
|||||||
"dummy page - if set in the users row then page function is"
|
"dummy page - if set in the users row then page function is"
|
||||||
" called directly and not through g_parms.setFromRequest",
|
" called directly and not through g_parms.setFromRequest",
|
||||||
NULL, 0 ,NULL,NULL,PG_NOAPI},
|
NULL, 0 ,NULL,NULL,PG_NOAPI},
|
||||||
|
*/
|
||||||
|
|
||||||
// publicly accessible pages
|
// publicly accessible pages
|
||||||
{ PAGE_ROOT , "index.html" , 0 , "root" , 0 , 0 ,
|
{ PAGE_ROOT , "index.html" , 0 , "root" , 0 , 0 ,
|
||||||
"search page to query",
|
"search page to query",
|
||||||
sendPageRoot , 0 ,NULL,NULL,PG_NOAPI},
|
sendPageRoot , 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI},
|
||||||
|
|
||||||
{ PAGE_RESULTS , "search" , 0 , "search" , 0 , 0 ,
|
{ PAGE_RESULTS , "search" , 0 , "search" , 0 , 0 ,
|
||||||
"search results page",
|
"search results page",
|
||||||
sendPageResults, 0 ,NULL,NULL,0},
|
sendPageResults, 0 ,NULL,NULL,
|
||||||
|
0},
|
||||||
|
|
||||||
//{ PAGE_WIDGET , "widget" , 0 , "widget" , 0 , 0 ,
|
//{ PAGE_WIDGET , "widget" , 0 , "widget" , 0 , 0 ,
|
||||||
// "widget page",
|
// "widget page",
|
||||||
// sendPageWidget, 0 ,NULL,NULL,PG_NOAPI},
|
// sendPageWidget, 0 ,NULL,NULL,PG_NOAPI},
|
||||||
@ -77,25 +85,33 @@ static WebPage s_pages[] = {
|
|||||||
// api use PAGE_ADDURL2 which is /admin/addurl. so we set PG_NOAPI here
|
// api use PAGE_ADDURL2 which is /admin/addurl. so we set PG_NOAPI here
|
||||||
{ PAGE_ADDURL , "addurl" , 0 , "add url" , 0 , 0 ,
|
{ PAGE_ADDURL , "addurl" , 0 , "add url" , 0 , 0 ,
|
||||||
"Page where you can add url for spidering",
|
"Page where you can add url for spidering",
|
||||||
sendPageAddUrl, 0 ,NULL,NULL,PG_NOAPI},
|
sendPageAddUrl, 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI},
|
||||||
|
|
||||||
{ PAGE_GET , "get" , 0 , "get" , 0 , 0 ,
|
{ PAGE_GET , "get" , 0 , "get" , 0 , 0 ,
|
||||||
//USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_CLIENT,
|
//USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_CLIENT,
|
||||||
"gets cached web page",
|
"gets cached web page",
|
||||||
sendPageGet , 0 ,NULL,NULL,0},
|
sendPageGet , 0 ,NULL,NULL,
|
||||||
|
0},
|
||||||
|
|
||||||
{ PAGE_LOGIN , "login" , 0 , "login" , 0 , 0 ,
|
{ PAGE_LOGIN , "login" , 0 , "login" , 0 , 0 ,
|
||||||
//USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_SPAM | USER_CLIENT,
|
//USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_SPAM | USER_CLIENT,
|
||||||
"login",
|
"login",
|
||||||
sendPageLogin, 0 ,NULL,NULL,PG_NOAPI},
|
sendPageLogin, 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI},
|
||||||
|
|
||||||
{ PAGE_DIRECTORY , "dir" , 0 , "directory" , 0 , 0 ,
|
{ PAGE_DIRECTORY , "dir" , 0 , "directory" , 0 , 0 ,
|
||||||
//USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_CLIENT,
|
//USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_CLIENT,
|
||||||
"directory",
|
"directory",
|
||||||
// until api is ready, take this out of the menu
|
// until api is ready, take this out of the menu
|
||||||
sendPageDirectory , 0 ,NULL,NULL,PG_NOAPI},
|
sendPageDirectory , 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI},
|
||||||
|
|
||||||
{ PAGE_REPORTSPAM , "reportspam" , 0 , "report spam" , 0 , 0 ,
|
{ PAGE_REPORTSPAM , "reportspam" , 0 , "report spam" , 0 , 0 ,
|
||||||
//USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_PROXY | USER_CLIENT,
|
//USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_PROXY | USER_CLIENT
|
||||||
"report spam",
|
"report spam",
|
||||||
sendPageReportSpam , 0 ,NULL,NULL,PG_NOAPI},
|
sendPageReportSpam , 0 ,NULL,NULL,PG_NOAPI},
|
||||||
|
|
||||||
//{ PAGE_WORDVECTOR, "vec" , 0 , "word vectors" , 0 , 1 ,
|
//{ PAGE_WORDVECTOR, "vec" , 0 , "word vectors" , 0 , 1 ,
|
||||||
// //USER_PUBLIC | USER_MASTER | USER_ADMIN ,
|
// //USER_PUBLIC | USER_MASTER | USER_ADMIN ,
|
||||||
// "word vectors",
|
// "word vectors",
|
||||||
@ -103,115 +119,142 @@ static WebPage s_pages[] = {
|
|||||||
|
|
||||||
// use post now for the "site list" which can be big
|
// use post now for the "site list" which can be big
|
||||||
{ PAGE_BASIC_SETTINGS, "admin/settings", 0 , "settings",1, M_POST ,
|
{ PAGE_BASIC_SETTINGS, "admin/settings", 0 , "settings",1, M_POST ,
|
||||||
"basic settings", sendPageGeneric , 0 ,NULL,NULL,PG_NOAPI},
|
"basic settings", sendPageGeneric , 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI|PG_COLLADMIN},
|
||||||
|
|
||||||
{ PAGE_BASIC_STATUS, "admin/status", 0 , "status",1, 0 ,
|
{ PAGE_BASIC_STATUS, "admin/status", 0 , "status",1, 0 ,
|
||||||
"basic status", sendPageBasicStatus , 0 ,NULL,NULL,PG_STATUS},
|
"basic status", sendPageBasicStatus , 0 ,NULL,NULL,
|
||||||
|
PG_STATUS|PG_COLLADMIN},
|
||||||
|
|
||||||
//{ PAGE_BASIC_DIFFBOT, "admin/diffbot", 0 , "diffbot",1, 0 ,
|
//{ PAGE_BASIC_DIFFBOT, "admin/diffbot", 0 , "diffbot",1, 0 ,
|
||||||
// "Basic diffbot page.", sendPageBasicDiffbot , 0 ,NULL,NULL,PG_NOAPI},
|
// "Basic diffbot page.", sendPageBasicDiffbot , 0 ,
|
||||||
{ PAGE_BASIC_SECURITY, "admin/security", 0 , "security",1, 0 ,
|
//NULL,NULL,PG_NOAPI},
|
||||||
"basic security", sendPageGeneric , 0 ,NULL,NULL,0},
|
|
||||||
|
{ PAGE_BASIC_SECURITY,
|
||||||
|
"admin/collectionpasswords", 0,"collection passwords",0,0,
|
||||||
|
"passwords", sendPageGeneric , 0 ,NULL,NULL,
|
||||||
|
PG_COLLADMIN},
|
||||||
|
|
||||||
{ PAGE_BASIC_SEARCH, "", 0 , "search",1, 0 ,
|
{ PAGE_BASIC_SEARCH, "", 0 , "search",1, 0 ,
|
||||||
"basic search", sendPageRoot , 0 ,NULL,NULL,PG_NOAPI},
|
"basic search", sendPageRoot , 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI},
|
||||||
|
|
||||||
|
|
||||||
{ PAGE_HOSTS , "admin/hosts" , 0 , "hosts" , 0 , 0 ,
|
{ PAGE_HOSTS , "admin/hosts" , 0 , "hosts" , 0 , 0 ,
|
||||||
//USER_MASTER | USER_PROXY,
|
//USER_MASTER | USER_PROXY,
|
||||||
"hosts status",
|
"hosts status", sendPageHosts , 0 ,NULL,NULL,
|
||||||
sendPageHosts , 0 ,NULL,NULL,PG_STATUS},
|
PG_STATUS|PG_ROOTADMIN},
|
||||||
|
|
||||||
{ PAGE_MASTER , "admin/master" , 0 , "master controls" , 1 , 0 ,
|
{ PAGE_MASTER , "admin/master" , 0 , "master controls" , 1 , 0 ,
|
||||||
//USER_MASTER | USER_PROXY ,
|
//USER_MASTER | USER_PROXY ,
|
||||||
"master controls",
|
"master controls", sendPageGeneric , 0 ,NULL,NULL,
|
||||||
sendPageGeneric , 0 ,NULL,NULL,0},
|
PG_ROOTADMIN},
|
||||||
|
|
||||||
// use POST for html head/tail and page root html. might be large.
|
// use POST for html head/tail and page root html. might be large.
|
||||||
{ PAGE_SEARCH , "admin/search" , 0 , "search controls" ,1,M_POST,
|
{ PAGE_SEARCH , "admin/search" , 0 , "search controls" ,1,M_POST,
|
||||||
//USER_ADMIN | USER_MASTER ,
|
//USER_ADMIN | USER_MASTER ,
|
||||||
"search controls",
|
"search controls", sendPageGeneric , 0 ,NULL,NULL,
|
||||||
sendPageGeneric , 0 ,NULL,NULL,0},
|
0},
|
||||||
|
|
||||||
// use post now for the "site list" which can be big
|
// use post now for the "site list" which can be big
|
||||||
{ PAGE_SPIDER , "admin/spider" , 0 , "spider controls" ,1,M_POST,
|
{ PAGE_SPIDER , "admin/spider" , 0 , "spider controls" ,1,M_POST,
|
||||||
//USER_ADMIN | USER_MASTER | USER_PROXY ,
|
//USER_ADMIN | USER_MASTER | USER_PROXY ,
|
||||||
"spider controls",
|
"spider controls", sendPageGeneric , 0 ,NULL,NULL,
|
||||||
sendPageGeneric , 0 ,NULL,NULL,0},
|
PG_COLLADMIN},
|
||||||
|
|
||||||
{ PAGE_SPIDERPROXIES,"admin/proxies" , 0 , "proxies" , 1 , 0,
|
{ PAGE_SPIDERPROXIES,"admin/proxies" , 0 , "proxies" , 1 , 0,
|
||||||
"proxies", sendPageGeneric , 0,NULL,NULL,0 } ,
|
"proxies", sendPageGeneric , 0,NULL,NULL,
|
||||||
|
PG_ROOTADMIN } ,
|
||||||
|
|
||||||
{ PAGE_LOG , "admin/log" , 0 , "log controls" , 1 , 0 ,
|
{ PAGE_LOG , "admin/log" , 0 , "log controls" , 1 , 0 ,
|
||||||
//USER_MASTER | USER_PROXY,
|
//USER_MASTER | USER_PROXY,
|
||||||
"log controls",
|
"log controls", sendPageGeneric , 0 ,NULL,NULL,
|
||||||
sendPageGeneric , 0 ,NULL,NULL,0},
|
PG_ROOTADMIN},
|
||||||
{ PAGE_SECURITY, "admin/security2", 0 , "security" , 1 , 0 ,
|
|
||||||
|
{ PAGE_ROOTPASSWORDS, "admin/rootpasswords",
|
||||||
|
0 , "root passwords" , 1 , 0 ,
|
||||||
//USER_MASTER | USER_PROXY ,
|
//USER_MASTER | USER_PROXY ,
|
||||||
"advanced security",
|
"root passwords",
|
||||||
sendPageGeneric , 0 ,NULL,NULL,PG_NOAPI},
|
sendPageGeneric , 0 ,NULL,NULL,
|
||||||
|
PG_ROOTADMIN},
|
||||||
|
|
||||||
{ PAGE_ADDCOLL , "admin/addcoll" , 0 , "add collection" , 1 , 0 ,
|
{ PAGE_ADDCOLL , "admin/addcoll" , 0 , "add collection" , 1 , 0 ,
|
||||||
//USER_MASTER ,
|
//USER_MASTER ,
|
||||||
"add a new collection",
|
"add a new collection",
|
||||||
sendPageAddColl , 0 ,NULL,NULL,0},
|
sendPageAddColl , 0 ,NULL,NULL,
|
||||||
|
PG_ROOTADMIN},
|
||||||
|
|
||||||
{ PAGE_DELCOLL , "admin/delcoll" , 0 , "delete collections" , 1 ,0,
|
{ PAGE_DELCOLL , "admin/delcoll" , 0 , "delete collections" , 1 ,0,
|
||||||
//USER_MASTER ,
|
//USER_MASTER ,
|
||||||
"delete a collection",
|
"delete a collection",
|
||||||
sendPageDelColl , 0 ,NULL,NULL,0},
|
sendPageDelColl , 0 ,NULL,NULL,
|
||||||
|
PG_COLLADMIN},
|
||||||
|
|
||||||
{ PAGE_CLONECOLL, "admin/clonecoll" , 0 , "clone collection" , 1 ,0,
|
{ PAGE_CLONECOLL, "admin/clonecoll" , 0 , "clone collection" , 1 ,0,
|
||||||
//USER_MASTER ,
|
//USER_MASTER ,
|
||||||
"clone one collection's settings to another",
|
"clone one collection's settings to another",
|
||||||
sendPageCloneColl , 0 ,NULL,NULL,0},
|
sendPageCloneColl , 0 ,NULL,NULL,
|
||||||
|
PG_ROOTADMIN},
|
||||||
|
|
||||||
{ PAGE_REPAIR , "admin/repair" , 0 , "repair" , 1 , 0 ,
|
{ PAGE_REPAIR , "admin/repair" , 0 , "repair" , 1 , 0 ,
|
||||||
//USER_MASTER ,
|
|
||||||
"repair data",
|
"repair data",
|
||||||
sendPageGeneric , 0 ,NULL,NULL,PG_NOAPI},
|
//USER_MASTER ,
|
||||||
// { PAGE_SITES , "admin/sites", 0 , "site list" , 1 , 1,
|
sendPageGeneric , 0 ,NULL,NULL,
|
||||||
// "what sites can be spidered",
|
PG_ROOTADMIN },
|
||||||
// sendPageGeneric , 0 ,NULL,NULL,PG_NOAPI}, // sendPageBasicSettings
|
|
||||||
{ PAGE_FILTERS , "admin/filters", 0 , "url filters" , 1 ,M_POST,
|
{ PAGE_FILTERS , "admin/filters", 0 , "url filters" , 1 ,M_POST,
|
||||||
//USER_ADMIN | USER_MASTER ,
|
|
||||||
"prioritize urls for spidering",
|
"prioritize urls for spidering",
|
||||||
// until we get this working, set PG_NOAPI
|
sendPageGeneric , 0 ,NULL,NULL,
|
||||||
sendPageGeneric , 0 ,NULL,NULL,PG_NOAPI},
|
PG_NOAPI|PG_COLLADMIN},
|
||||||
|
|
||||||
{ PAGE_INJECT , "admin/inject" , 0 , "inject url" , 0,M_MULTI ,
|
{ PAGE_INJECT , "admin/inject" , 0 , "inject url" , 0,M_MULTI ,
|
||||||
//USER_ADMIN | USER_MASTER ,
|
//USER_ADMIN | USER_MASTER ,
|
||||||
"inject url in the index here",
|
"inject url in the index here",
|
||||||
sendPageInject , 2 } ,
|
sendPageInject , 2 ,NULL,NULL,
|
||||||
|
0} ,
|
||||||
|
|
||||||
// this is the addurl page the the admin!
|
// this is the addurl page the the admin!
|
||||||
{ PAGE_ADDURL2 , "admin/addurl" , 0 , "add urls" , 0 , 0 ,
|
{ PAGE_ADDURL2 , "admin/addurl" , 0 , "add urls" , 0 , 0 ,
|
||||||
"add url page for admin",
|
"add url page for admin",
|
||||||
sendPageAddUrl2 , 0 ,NULL,NULL,0},
|
sendPageAddUrl2 , 0 ,NULL,NULL,
|
||||||
|
PG_COLLADMIN},
|
||||||
|
|
||||||
{ PAGE_REINDEX , "admin/reindex" , 0 , "query reindex" , 0 , 0 ,
|
{ PAGE_REINDEX , "admin/reindex" , 0 , "query reindex" , 0 , 0 ,
|
||||||
//USER_ADMIN | USER_MASTER,
|
//USER_ADMIN | USER_MASTER,
|
||||||
"query delete/reindex",
|
"query delete/reindex",
|
||||||
sendPageReindex , 0 ,NULL,NULL,0},
|
sendPageReindex , 0 ,NULL,NULL,
|
||||||
|
PG_COLLADMIN},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// master admin pages
|
// master admin pages
|
||||||
{ PAGE_STATS , "admin/stats" , 0 , "stats" , 0 , 0 ,
|
{ PAGE_STATS , "admin/stats" , 0 , "stats" , 0 , 0 ,
|
||||||
//USER_MASTER | USER_PROXY ,
|
//USER_MASTER | USER_PROXY ,
|
||||||
"general statistics",
|
"general statistics",
|
||||||
sendPageStats , 0 ,NULL,NULL,PG_STATUS},
|
sendPageStats , 0 ,NULL,NULL,
|
||||||
|
PG_STATUS|PG_ROOTADMIN},
|
||||||
|
|
||||||
{ PAGE_GRAPH , "admin/graph" , 0 , "graph" , 0 , 0 ,
|
{ PAGE_GRAPH , "admin/graph" , 0 , "graph" , 0 , 0 ,
|
||||||
//USER_MASTER ,
|
//USER_MASTER ,
|
||||||
"query stats graph",
|
"query stats graph",
|
||||||
sendPageGraph , 2 /*niceness*/ ,NULL,NULL,PG_STATUS|PG_NOAPI},
|
sendPageGraph , 2 ,NULL,NULL,
|
||||||
|
PG_STATUS|PG_NOAPI|PG_ROOTADMIN},
|
||||||
|
|
||||||
{ PAGE_PERF , "admin/perf" , 0 , "performance" , 0 , 0 ,
|
{ PAGE_PERF , "admin/perf" , 0 , "performance" , 0 , 0 ,
|
||||||
//USER_MASTER | USER_PROXY ,
|
//USER_MASTER | USER_PROXY ,
|
||||||
"function performance graph",
|
"function performance graph",
|
||||||
sendPagePerf , 0 ,NULL,NULL,PG_STATUS|PG_NOAPI},
|
sendPagePerf , 0 ,NULL,NULL,
|
||||||
|
PG_STATUS|PG_NOAPI|PG_ROOTADMIN},
|
||||||
|
|
||||||
{ PAGE_SOCKETS , "admin/sockets" , 0 , "sockets" , 0 , 0 ,
|
{ PAGE_SOCKETS , "admin/sockets" , 0 , "sockets" , 0 , 0 ,
|
||||||
//USER_MASTER | USER_PROXY,
|
//USER_MASTER | USER_PROXY,
|
||||||
"sockets",
|
"sockets",
|
||||||
sendPageSockets , 0 ,NULL,NULL,PG_STATUS|PG_NOAPI},
|
sendPageSockets , 0 ,NULL,NULL,
|
||||||
|
PG_STATUS|PG_NOAPI|PG_ROOTADMIN},
|
||||||
|
|
||||||
{ PAGE_LOGVIEW , "admin/logview" , 0 , "log view" , 0 , 0 ,
|
{ PAGE_LOGVIEW , "admin/logview" , 0 , "log view" , 0 , 0 ,
|
||||||
//USER_MASTER ,
|
//USER_MASTER ,
|
||||||
"logview",
|
"logview",
|
||||||
sendPageLogView , 0 ,NULL,NULL,PG_STATUS|PG_NOAPI},
|
sendPageLogView , 0 ,NULL,NULL,
|
||||||
|
PG_STATUS|PG_NOAPI|PG_ROOTADMIN},
|
||||||
|
|
||||||
// { PAGE_SYNC , "master/sync" , 0 , "sync" , 0 , 0 ,
|
// { PAGE_SYNC , "master/sync" , 0 , "sync" , 0 , 0 ,
|
||||||
// //USER_MASTER ,
|
// //USER_MASTER ,
|
||||||
// "sync",
|
// "sync",
|
||||||
@ -220,19 +263,21 @@ static WebPage s_pages[] = {
|
|||||||
{ PAGE_AUTOBAN ,"admin/autoban" , 0 , "autoban" , 1 , M_POST ,
|
{ PAGE_AUTOBAN ,"admin/autoban" , 0 , "autoban" , 1 , M_POST ,
|
||||||
//USER_MASTER | USER_PROXY ,
|
//USER_MASTER | USER_PROXY ,
|
||||||
"autobanned ips",
|
"autobanned ips",
|
||||||
sendPageAutoban , 0 ,NULL,NULL,PG_NOAPI},
|
sendPageAutoban , 0 ,NULL,NULL,
|
||||||
/*
|
PG_NOAPI|PG_ROOTADMIN},
|
||||||
{ PAGE_SPIDERLOCKS,"admin/spiderlocks" , 0 , "spider locks" , 0 , 0 ,
|
|
||||||
USER_MASTER , sendPageSpiderLocks , 0 ,NULL,NULL,PG_NOAPI},
|
|
||||||
*/
|
|
||||||
{ PAGE_PROFILER , "admin/profiler" , 0 , "profiler" , 0 ,M_POST,
|
{ PAGE_PROFILER , "admin/profiler" , 0 , "profiler" , 0 ,M_POST,
|
||||||
//USER_MASTER ,
|
//USER_MASTER ,
|
||||||
"profiler",
|
"profiler",
|
||||||
sendPageProfiler , 0 ,NULL,NULL,PG_NOAPI},
|
sendPageProfiler , 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI|PG_ROOTADMIN},
|
||||||
|
|
||||||
{ PAGE_THREADS , "admin/threads" , 0 , "threads" , 0 , 0 ,
|
{ PAGE_THREADS , "admin/threads" , 0 , "threads" , 0 , 0 ,
|
||||||
//USER_MASTER ,
|
//USER_MASTER ,
|
||||||
"threads",
|
"threads",
|
||||||
sendPageThreads , 0 ,NULL,NULL,PG_STATUS|PG_NOAPI},
|
sendPageThreads , 0 ,NULL,NULL,
|
||||||
|
PG_STATUS|PG_NOAPI|PG_ROOTADMIN},
|
||||||
|
|
||||||
//{ PAGE_THESAURUS, "admin/thesaurus", 0 , "thesaurus", 0 , 0 ,
|
//{ PAGE_THESAURUS, "admin/thesaurus", 0 , "thesaurus", 0 , 0 ,
|
||||||
// //USER_MASTER ,
|
// //USER_MASTER ,
|
||||||
// "thesaurus",
|
// "thesaurus",
|
||||||
@ -246,38 +291,51 @@ static WebPage s_pages[] = {
|
|||||||
// sendPageOverview , 0 ,NULL,NULL,PG_NOAPI},
|
// sendPageOverview , 0 ,NULL,NULL,PG_NOAPI},
|
||||||
|
|
||||||
{ PAGE_QA , "admin/qa" , 0 , "qa" , 0 , 0 ,
|
{ PAGE_QA , "admin/qa" , 0 , "qa" , 0 , 0 ,
|
||||||
"quality assurance", sendPageQA , 0 ,NULL,NULL,PG_NOAPI},
|
"quality assurance",
|
||||||
|
sendPageQA , 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI|PG_ROOTADMIN},
|
||||||
|
|
||||||
{ PAGE_IMPORT , "admin/import" , 0 , "import" , 0 , 0 ,
|
{ PAGE_IMPORT , "admin/import" , 0 , "import" , 0 , 0 ,
|
||||||
"import documents from another cluster",
|
"import documents from another cluster",
|
||||||
sendPageGeneric , 0 ,NULL,NULL,PG_NOAPI},
|
sendPageGeneric , 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI|PG_ROOTADMIN},
|
||||||
|
|
||||||
{ PAGE_API , "admin/api" , 0 , "api" , 0 , 0 ,
|
{ PAGE_API , "admin/api" , 0 , "api" , 0 , 0 ,
|
||||||
//USER_MASTER | USER_ADMIN ,
|
//USER_MASTER | USER_ADMIN ,
|
||||||
"api", sendPageAPI , 0 ,NULL,NULL,PG_NOAPI},
|
"api",
|
||||||
|
sendPageAPI , 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI|PG_COLLADMIN},
|
||||||
|
|
||||||
{ PAGE_RULES , "admin/siterules", 0 , "site rules", 1, M_POST,
|
{ PAGE_RULES , "admin/siterules", 0 , "site rules", 1, M_POST,
|
||||||
//USER_ADMIN | USER_MASTER ,
|
//USER_ADMIN | USER_MASTER ,
|
||||||
"site rules",
|
"site rules",
|
||||||
sendPageGeneric , 0,NULL,NULL,PG_NOAPI},
|
sendPageGeneric , 0,NULL,NULL,
|
||||||
|
PG_NOAPI},
|
||||||
|
|
||||||
{ PAGE_INDEXDB , "admin/indexdb" , 0 , "indexdb" , 0 , 0,
|
{ PAGE_INDEXDB , "admin/indexdb" , 0 , "indexdb" , 0 , 0,
|
||||||
//USER_MASTER ,
|
//USER_MASTER ,
|
||||||
"indexdb",
|
"indexdb",
|
||||||
sendPageIndexdb , 0 ,NULL,NULL,PG_NOAPI},
|
sendPageIndexdb , 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI|PG_ROOTADMIN},
|
||||||
|
|
||||||
{ PAGE_TITLEDB , "admin/titledb" , 0 , "titledb" , 0 , 0,
|
{ PAGE_TITLEDB , "admin/titledb" , 0 , "titledb" , 0 , 0,
|
||||||
//USER_MASTER ,
|
//USER_MASTER ,
|
||||||
"titledb",
|
"titledb",
|
||||||
sendPageTitledb , 2,NULL,NULL,PG_NOAPI},
|
sendPageTitledb , 2,NULL,NULL,
|
||||||
|
PG_NOAPI|PG_ROOTADMIN},
|
||||||
// 1 = usePost
|
// 1 = usePost
|
||||||
|
|
||||||
{ PAGE_CRAWLBOT , "crawlbot" , 0 , "crawlbot" , 1 , 0,
|
{ PAGE_CRAWLBOT , "crawlbot" , 0 , "crawlbot" , 1 , 0,
|
||||||
"simplified spider controls",
|
"simplified spider controls",
|
||||||
sendPageCrawlbot , 0 ,NULL,NULL,PG_NOAPI},
|
sendPageCrawlbot , 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI|PG_ROOTADMIN},
|
||||||
|
|
||||||
{ PAGE_SPIDERDB , "admin/spiderdb" , 0 , "spider queue" , 0 , 0 ,
|
{ PAGE_SPIDERDB , "admin/spiderdb" , 0 , "spider queue" , 0 , 0 ,
|
||||||
//USER_ADMIN | USER_MASTER ,
|
//USER_ADMIN | USER_MASTER ,
|
||||||
"spider queue",
|
"spider queue",
|
||||||
sendPageSpiderdb , 0 ,NULL,NULL,PG_STATUS|PG_NOAPI},
|
sendPageSpiderdb , 0 ,NULL,NULL,
|
||||||
|
PG_STATUS|PG_NOAPI|PG_ROOTADMIN},
|
||||||
|
|
||||||
//{ PAGE_PRIORITIES, "admin/priorities" , 0 , "priority controls",1,1,
|
//{ PAGE_PRIORITIES, "admin/priorities" , 0 , "priority controls",1,1,
|
||||||
// //USER_ADMIN | USER_MASTER ,
|
// //USER_ADMIN | USER_MASTER ,
|
||||||
// "spider priorities",
|
// "spider priorities",
|
||||||
@ -290,33 +348,45 @@ static WebPage s_pages[] = {
|
|||||||
#ifndef CYGWIN
|
#ifndef CYGWIN
|
||||||
{ PAGE_SEO, "seo",0,"seo" , 0 , 0 ,
|
{ PAGE_SEO, "seo",0,"seo" , 0 , 0 ,
|
||||||
"SEO info",
|
"SEO info",
|
||||||
sendPageSEO , 2 ,NULL,NULL,PG_NOAPI},
|
sendPageSEO , 2 ,NULL,NULL,
|
||||||
|
PG_NOAPI|PG_ROOTADMIN},
|
||||||
#else
|
#else
|
||||||
{ PAGE_SEO, "seo",0,"seo" , 0 , 0 ,
|
{ PAGE_SEO, "seo",0,"seo" , 0 , 0 ,
|
||||||
"SEO info",
|
"SEO info",
|
||||||
sendPageResults , 0 ,NULL,NULL,PG_NOAPI},
|
sendPageResults , 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI|PG_ROOTADMIN},
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
{ PAGE_ACCESS , "admin/access" , 0 , "access" , 1 , M_POST,
|
{ PAGE_ACCESS , "admin/access" , 0 , "access" , 1 , M_POST,
|
||||||
//USER_ADMIN | USER_MASTER ,
|
//USER_ADMIN | USER_MASTER ,
|
||||||
"access password, ip, admin ips etc. all goes in here",
|
"access password, ip, admin ips etc. all goes in here",
|
||||||
sendPageGeneric , 0 ,NULL,NULL,PG_NOAPI},
|
sendPageGeneric , 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI|PG_ROOTADMIN},
|
||||||
|
|
||||||
{ PAGE_SEARCHBOX , "admin/searchbox", 0 , "search" , 0 , 0 ,
|
{ PAGE_SEARCHBOX , "admin/searchbox", 0 , "search" , 0 , 0 ,
|
||||||
//USER_ADMIN | USER_MASTER ,
|
//USER_ADMIN | USER_MASTER ,
|
||||||
"search box",
|
"search box",
|
||||||
sendPageResults , 0 ,NULL,NULL,PG_NOAPI},
|
sendPageResults , 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI},
|
||||||
|
|
||||||
{ PAGE_PARSER , "admin/parser" , 0 , "parser" , 0,M_POST,
|
{ PAGE_PARSER , "admin/parser" , 0 , "parser" , 0,M_POST,
|
||||||
//USER_MASTER ,
|
//USER_MASTER ,
|
||||||
"page parser",
|
"page parser",
|
||||||
sendPageParser , 2 ,NULL,NULL,PG_NOAPI},
|
sendPageParser , 2 ,NULL,NULL,
|
||||||
|
PG_NOAPI|PG_COLLADMIN},
|
||||||
|
|
||||||
{ PAGE_SITEDB , "admin/tagdb" , 0 , "tagdb" , 0 , M_POST,
|
{ PAGE_SITEDB , "admin/tagdb" , 0 , "tagdb" , 0 , M_POST,
|
||||||
//USER_MASTER | USER_ADMIN,
|
//USER_MASTER | USER_ADMIN,
|
||||||
"add/remove/get tags for sites/urls",
|
"add/remove/get tags for sites/urls",
|
||||||
sendPageTagdb , 0 ,NULL,NULL,PG_NOAPI},
|
sendPageTagdb , 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI|PG_COLLADMIN},
|
||||||
|
|
||||||
{ PAGE_CATDB , "admin/catdb" , 0 , "catdb" , 0,M_POST,
|
{ PAGE_CATDB , "admin/catdb" , 0 , "catdb" , 0,M_POST,
|
||||||
//USER_MASTER | USER_ADMIN,
|
//USER_MASTER | USER_ADMIN,
|
||||||
"catdb",
|
"catdb",
|
||||||
sendPageCatdb , 0 ,NULL,NULL,PG_NOAPI},
|
sendPageCatdb , 0 ,NULL,NULL,
|
||||||
|
PG_NOAPI|PG_ROOTADMIN},
|
||||||
|
|
||||||
//{ PAGE_LOGIN2 , "admin/login" , 0 , "login" , 0 , 0,
|
//{ PAGE_LOGIN2 , "admin/login" , 0 , "login" , 0 , 0,
|
||||||
// //USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_SPAM | USER_CLIENT,
|
// //USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_SPAM | USER_CLIENT,
|
||||||
//"login link - also logoffs user",
|
//"login link - also logoffs user",
|
||||||
@ -525,7 +595,11 @@ bool Pages::sendDynamicReply ( TcpSocket *s , HttpRequest *r , long page ) {
|
|||||||
//Host *h = g_hostdb.m_myHost;
|
//Host *h = g_hostdb.m_myHost;
|
||||||
|
|
||||||
// now use this...
|
// now use this...
|
||||||
bool isAdmin = g_conf.isRootAdmin ( s , r );
|
bool isRootAdmin = g_conf.isRootAdmin ( s , r );
|
||||||
|
|
||||||
|
|
||||||
|
CollectionRec *cr = g_collectiondb.getRec ( r , true );
|
||||||
|
|
||||||
|
|
||||||
////////////////////
|
////////////////////
|
||||||
////////////////////
|
////////////////////
|
||||||
@ -534,10 +608,14 @@ bool Pages::sendDynamicReply ( TcpSocket *s , HttpRequest *r , long page ) {
|
|||||||
//
|
//
|
||||||
////////////////////
|
////////////////////
|
||||||
////////////////////
|
////////////////////
|
||||||
if ( ! publicPage && ! isAdmin )
|
|
||||||
return sendPageLogin ( s , r );
|
|
||||||
|
|
||||||
if ( page == PAGE_CRAWLBOT && ! isAdmin )
|
// no longer, we let anyone snoop around to check out the gui
|
||||||
|
//char guest = r->getLong("guest",0);
|
||||||
|
|
||||||
|
//if ( ! publicPage && ! isRootAdmin && ! guest )
|
||||||
|
// return sendPageLogin ( s , r );
|
||||||
|
|
||||||
|
if ( page == PAGE_CRAWLBOT && ! isRootAdmin )
|
||||||
log("pages: accessing a crawlbot page without admin privs. "
|
log("pages: accessing a crawlbot page without admin privs. "
|
||||||
"no parms can be changed.");
|
"no parms can be changed.");
|
||||||
|
|
||||||
@ -655,6 +733,39 @@ bool Pages::sendDynamicReply ( TcpSocket *s , HttpRequest *r , long page ) {
|
|||||||
// }
|
// }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// CLOUD SEARCH ENGINE SUPPORT
|
||||||
|
//
|
||||||
|
// if not the root admin only all user to change settings, etc.
|
||||||
|
// if the collection rec is a guest collection. i.e. in the cloud.
|
||||||
|
//
|
||||||
|
//bool isRootAdmin = g_conf.isRootAdmin(sock,hr);
|
||||||
|
bool isRootColl = false;
|
||||||
|
if ( cr && strcmp(cr->m_coll,"main")==0 ) isRootColl = true;
|
||||||
|
if ( cr && strcmp(cr->m_coll,"dmoz")==0 ) isRootColl = true;
|
||||||
|
if ( cr && strcmp(cr->m_coll,"demo")==0 ) isRootColl = true;
|
||||||
|
// the main,dmoz and demo collections are root admin only
|
||||||
|
// if ( ! isRootAdmin && isRootColl ) {
|
||||||
|
// g_errno = ENOPERM;
|
||||||
|
// return log("parms: root admin can only change main/dmoz/demo"
|
||||||
|
// " collections.");
|
||||||
|
// }
|
||||||
|
// just knowing the collection name is enough for a cloud user to
|
||||||
|
// modify the collection's parms. however, to modify the master
|
||||||
|
// controls or stuff in g_conf, you have to be root admin.
|
||||||
|
if ( ! g_conf.m_allowCloudUsers && ! isRootAdmin ) {
|
||||||
|
//g_errno = ENOPERM;
|
||||||
|
//return log("parms: permission denied for user");
|
||||||
|
return sendPageLogin ( s , r );
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// get safebuf stored in TcpSocket class
|
// get safebuf stored in TcpSocket class
|
||||||
SafeBuf *parmList = &s->m_handyBuf;
|
SafeBuf *parmList = &s->m_handyBuf;
|
||||||
|
|
||||||
@ -668,13 +779,12 @@ bool Pages::sendDynamicReply ( TcpSocket *s , HttpRequest *r , long page ) {
|
|||||||
////////
|
////////
|
||||||
|
|
||||||
// . convert http request to list of parmdb records
|
// . convert http request to list of parmdb records
|
||||||
// . will only add parm recs we have permission to modify
|
// . will only add parm recs we have permission to modify!!!
|
||||||
// . if no collection supplied will just return true with no g_errno
|
// . if no collection supplied will just return true with no g_errno
|
||||||
if ( isAdmin &&
|
if ( //isRootAdmin &&
|
||||||
! g_parms.convertHttpRequestToParmList ( r, parmList, page, s))
|
! g_parms.convertHttpRequestToParmList ( r, parmList, page, s))
|
||||||
return g_httpServer.sendErrorReply(s,505,mstrerror(g_errno));
|
return g_httpServer.sendErrorReply(s,505,mstrerror(g_errno));
|
||||||
|
|
||||||
|
|
||||||
// . add parmList using Parms::m_msg4 to all hosts!
|
// . add parmList using Parms::m_msg4 to all hosts!
|
||||||
// . returns true and sets g_errno on error
|
// . returns true and sets g_errno on error
|
||||||
// . returns false if would block
|
// . returns false if would block
|
||||||
@ -682,7 +792,7 @@ bool Pages::sendDynamicReply ( TcpSocket *s , HttpRequest *r , long page ) {
|
|||||||
// . so then doneBroadcastingParms() is called when all hosts
|
// . so then doneBroadcastingParms() is called when all hosts
|
||||||
// have received the updated parms, unless a host is dead,
|
// have received the updated parms, unless a host is dead,
|
||||||
// in which case he should sync up when he comes back up
|
// in which case he should sync up when he comes back up
|
||||||
if ( isAdmin &&
|
if ( //isCollAdmin &&
|
||||||
! g_parms.broadcastParmList ( parmList ,
|
! g_parms.broadcastParmList ( parmList ,
|
||||||
s , // state is socket i guess
|
s , // state is socket i guess
|
||||||
doneBroadcastingParms ) )
|
doneBroadcastingParms ) )
|
||||||
@ -960,6 +1070,10 @@ bool printTopNavButton ( char *text,
|
|||||||
"border-style:solid;"
|
"border-style:solid;"
|
||||||
//"margin-bottom:-3px;"
|
//"margin-bottom:-3px;"
|
||||||
"border-color:blue;"
|
"border-color:blue;"
|
||||||
|
// fix for msie. no this is bad for firefox
|
||||||
|
//"padding-bottom:7px;"
|
||||||
|
// fix msie this way:
|
||||||
|
"border-bottom-width:4px;"
|
||||||
"border-bottom-color:white;"
|
"border-bottom-color:white;"
|
||||||
//"overflow-y:hidden;"
|
//"overflow-y:hidden;"
|
||||||
//"overflow-x:hidden;"
|
//"overflow-x:hidden;"
|
||||||
@ -1190,7 +1304,7 @@ bool Pages::printAdminTop (SafeBuf *sb ,
|
|||||||
//
|
//
|
||||||
// first the nav column
|
// first the nav column
|
||||||
//
|
//
|
||||||
sb->safePrintf("<TD bgcolor=#f3c714 " // yellow/gold
|
sb->safePrintf("<TD bgcolor=#%s "//f3c714 " // yellow/gold
|
||||||
"valign=top "
|
"valign=top "
|
||||||
"style=\""
|
"style=\""
|
||||||
"width:210px;"
|
"width:210px;"
|
||||||
@ -1213,13 +1327,15 @@ bool Pages::printAdminTop (SafeBuf *sb ,
|
|||||||
"height:100px;"
|
"height:100px;"
|
||||||
"\">"
|
"\">"
|
||||||
"<br style=line-height:10px;>"
|
"<br style=line-height:10px;>"
|
||||||
"<img width=54 height=79 alt=HOME src=/rocket.jpg>"
|
"<img width=54 height=79 alt=HOME border=0 "
|
||||||
|
"src=/rocket.jpg>"
|
||||||
"</div>"
|
"</div>"
|
||||||
"</a>"
|
"</a>"
|
||||||
"</center>"
|
"</center>"
|
||||||
|
|
||||||
"<br>"
|
"<br>"
|
||||||
"<br>"
|
"<br>"
|
||||||
|
, GOLD
|
||||||
,coll
|
,coll
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -1302,6 +1418,27 @@ bool Pages::printAdminTop (SafeBuf *sb ,
|
|||||||
// collection navbar
|
// collection navbar
|
||||||
status&=printCollectionNavBar ( sb, page , username, coll,pwd, qs,s,r);
|
status&=printCollectionNavBar ( sb, page , username, coll,pwd, qs,s,r);
|
||||||
|
|
||||||
|
// count the statuses
|
||||||
|
long emptyCount = 0;
|
||||||
|
long doneCount = 0;
|
||||||
|
long activeCount = 0;
|
||||||
|
long pauseCount = 0;
|
||||||
|
for (long i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
|
||||||
|
CollectionRec *cc = g_collectiondb.m_recs[i];
|
||||||
|
if ( ! cc ) continue;
|
||||||
|
CrawlInfo *ci = &cc->m_globalCrawlInfo;
|
||||||
|
if ( cc->m_spideringEnabled &&
|
||||||
|
! ci->m_hasUrlsReadyToSpider &&
|
||||||
|
ci->m_urlsHarvested )
|
||||||
|
emptyCount++;
|
||||||
|
else if ( ! ci->m_hasUrlsReadyToSpider )
|
||||||
|
doneCount++;
|
||||||
|
else if (cc->m_spideringEnabled && ci->m_hasUrlsReadyToSpider )
|
||||||
|
activeCount++;
|
||||||
|
else if (!cc->m_spideringEnabled && ci->m_hasUrlsReadyToSpider)
|
||||||
|
pauseCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
sb->safePrintf("</div>");
|
sb->safePrintf("</div>");
|
||||||
|
|
||||||
@ -1313,21 +1450,27 @@ bool Pages::printAdminTop (SafeBuf *sb ,
|
|||||||
);
|
);
|
||||||
sb->safePrintf(
|
sb->safePrintf(
|
||||||
"<font color=black>"
|
"<font color=black>"
|
||||||
"●</font> spider is done"
|
"●</font> spider is done (%li)"
|
||||||
"<br>"
|
"<br>"
|
||||||
|
|
||||||
"<font color=orange>"
|
"<font color=orange>"
|
||||||
"●</font> spider is paused"
|
"●</font> spider is paused (%li)"
|
||||||
"<br>"
|
"<br>"
|
||||||
|
|
||||||
"<font color=green>"
|
"<font color=green>"
|
||||||
"●</font> spider is active"
|
"●</font> spider is active (%li)"
|
||||||
"<br>"
|
"<br>"
|
||||||
|
|
||||||
"<font color=gray>"
|
"<font color=gray>"
|
||||||
"●</font> spider queue is empty"
|
"●</font> spider queue empty (%li)"
|
||||||
"<br>"
|
"<br>"
|
||||||
"</div>"
|
"</div>"
|
||||||
|
|
||||||
|
,doneCount
|
||||||
|
,pauseCount
|
||||||
|
,activeCount
|
||||||
|
,emptyCount
|
||||||
|
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
@ -1339,14 +1482,31 @@ bool Pages::printAdminTop (SafeBuf *sb ,
|
|||||||
//
|
//
|
||||||
|
|
||||||
// the controls will go here
|
// the controls will go here
|
||||||
sb->safePrintf("<TD valign=top >"
|
sb->safePrintf("<TD valign=top>"
|
||||||
|
|
||||||
|
// MDW 9/27/2014: tried to fix that blue border
|
||||||
|
// in MSIE but could not easily make it go away.
|
||||||
|
// seems like the table cell truncates the div's
|
||||||
|
// left border below even if i put a z-index:1000;
|
||||||
|
// on there.
|
||||||
|
|
||||||
|
// "style="
|
||||||
|
// "border-color:green;"
|
||||||
|
// "border-left-width:3px;"
|
||||||
|
// "border-style:solid;"
|
||||||
|
// "margin-left:-30px;"
|
||||||
|
// ">"
|
||||||
|
|
||||||
"<div style=\"padding-left:20px;"
|
"<div style=\"padding-left:20px;"
|
||||||
|
|
||||||
"margin-left:-3px;"
|
"margin-left:-3px;"
|
||||||
|
|
||||||
"border-color:#f3c714;"
|
"border-color:#%s;"//f3c714;"
|
||||||
"border-width:3px;"
|
"border-width:3px;"
|
||||||
"border-left-width:3px;"
|
// make this from 3px to 4px for msie
|
||||||
|
"border-left-width:4px;"
|
||||||
|
// another msie fix:
|
||||||
|
//"position:absolute;"
|
||||||
"border-top-width:0px;"
|
"border-top-width:0px;"
|
||||||
"border-right-width:0px;"
|
"border-right-width:0px;"
|
||||||
"border-bottom-color:blue;"
|
"border-bottom-color:blue;"
|
||||||
@ -1354,8 +1514,11 @@ bool Pages::printAdminTop (SafeBuf *sb ,
|
|||||||
"border-style:solid;"
|
"border-style:solid;"
|
||||||
"padding:4px;"
|
"padding:4px;"
|
||||||
|
|
||||||
"background-color:#f3c714;\" " // yellow/gold
|
"background-color:#%s;\" "//f3c714;\" " // yellow/gold
|
||||||
"id=prepane>");
|
"id=prepane>"
|
||||||
|
, GOLD
|
||||||
|
, GOLD
|
||||||
|
);
|
||||||
|
|
||||||
// logout link on far right
|
// logout link on far right
|
||||||
sb->safePrintf("<div align=right "
|
sb->safePrintf("<div align=right "
|
||||||
@ -1392,7 +1555,7 @@ bool Pages::printAdminTop (SafeBuf *sb ,
|
|||||||
//sb->safePrintf ("</td></tr></table><br/>\n");//<br/>\n");
|
//sb->safePrintf ("</td></tr></table><br/>\n");//<br/>\n");
|
||||||
|
|
||||||
SafeBuf mb;
|
SafeBuf mb;
|
||||||
bool added = printRedBox ( &mb );
|
bool added = printRedBox ( &mb , s , r );
|
||||||
|
|
||||||
// print emergency msg box
|
// print emergency msg box
|
||||||
if ( added )
|
if ( added )
|
||||||
@ -2229,6 +2392,7 @@ bool Pages::printAdminLinks ( SafeBuf *sb,
|
|||||||
if ( i == PAGE_API ) continue;
|
if ( i == PAGE_API ) continue;
|
||||||
if ( i == PAGE_SEARCHBOX ) continue;
|
if ( i == PAGE_SEARCHBOX ) continue;
|
||||||
if ( i == PAGE_TITLEDB ) continue;
|
if ( i == PAGE_TITLEDB ) continue;
|
||||||
|
if ( i == PAGE_IMPORT ) continue;
|
||||||
// move these links to the coll nav bar on the left
|
// move these links to the coll nav bar on the left
|
||||||
if ( i == PAGE_ADDCOLL ) continue;
|
if ( i == PAGE_ADDCOLL ) continue;
|
||||||
if ( i == PAGE_DELCOLL ) continue;
|
if ( i == PAGE_DELCOLL ) continue;
|
||||||
@ -2511,7 +2675,7 @@ bool Pages::printCollectionNavBar ( SafeBuf *sb ,
|
|||||||
|
|
||||||
// every other coll in a darker div
|
// every other coll in a darker div
|
||||||
if ( (row % 2) == 0 )
|
if ( (row % 2) == 0 )
|
||||||
sb->safePrintf("</div>");
|
sb->safePrintf("</div>\n");
|
||||||
else
|
else
|
||||||
sb->safePrintf("<br>\n");
|
sb->safePrintf("<br>\n");
|
||||||
}
|
}
|
||||||
@ -3170,7 +3334,8 @@ bool printApiForPage ( SafeBuf *sb , long PAGENUM , CollectionRec *cr ) {
|
|||||||
"<td>STRING</td>"
|
"<td>STRING</td>"
|
||||||
"<td>output format</td>"
|
"<td>output format</td>"
|
||||||
"<td>html</td>"
|
"<td>html</td>"
|
||||||
"<td>Display output in this format.</td>"
|
"<td>Display output in this format. Can be "
|
||||||
|
"<i>html</i>, <i>json</i> or <i>xml</i>.</td>"
|
||||||
"</tr>"
|
"</tr>"
|
||||||
, blues[count%2]
|
, blues[count%2]
|
||||||
, count
|
, count
|
||||||
@ -3243,7 +3408,7 @@ bool printApiForPage ( SafeBuf *sb , long PAGENUM , CollectionRec *cr ) {
|
|||||||
|
|
||||||
// dup page fix. so we should 'masterpwd' and 'masterip'
|
// dup page fix. so we should 'masterpwd' and 'masterip'
|
||||||
// in the list now.
|
// in the list now.
|
||||||
if ( pageNum == PAGE_SECURITY ) pageNum = PAGE_BASIC_SECURITY;
|
//if ( pageNum ==PAGE_SECURITY ) pageNum = PAGE_BASIC_SECURITY;
|
||||||
|
|
||||||
|
|
||||||
if ( pageNum != PAGENUM ) continue;
|
if ( pageNum != PAGENUM ) continue;
|
||||||
@ -3629,7 +3794,9 @@ bool sendPageLogin ( TcpSocket *socket , HttpRequest *hr ) {
|
|||||||
"<input type=submit value=ok border=0 onclick=\""
|
"<input type=submit value=ok border=0 onclick=\""
|
||||||
"document.cookie='pwd='+document.getElementById('ppp')"
|
"document.cookie='pwd='+document.getElementById('ppp')"
|
||||||
".value+"
|
".value+"
|
||||||
"';expires=0';"
|
// fix so cookies work for msie. expires= is wrong i guess.
|
||||||
|
//"';expires=9999999';"
|
||||||
|
"';max-age=9999999';"
|
||||||
"\"></td>"
|
"\"></td>"
|
||||||
"</tr></table>"
|
"</tr></table>"
|
||||||
"</center>"
|
"</center>"
|
||||||
@ -3649,10 +3816,10 @@ bool sendPageLogin ( TcpSocket *socket , HttpRequest *hr ) {
|
|||||||
NULL);// cookie
|
NULL);// cookie
|
||||||
}
|
}
|
||||||
|
|
||||||
bool printRedBox2 ( SafeBuf *sb , bool isRootWebPage ) {
|
bool printRedBox2 ( SafeBuf *sb , TcpSocket *sock , HttpRequest *hr ) {
|
||||||
SafeBuf mb;
|
SafeBuf mb;
|
||||||
// return false if no red box
|
// return false if no red box
|
||||||
if ( ! printRedBox ( &mb , isRootWebPage ) ) return false;
|
if ( ! printRedBox ( &mb , sock , hr ) ) return false;
|
||||||
// otherwise, print it
|
// otherwise, print it
|
||||||
sb->safeStrcpy ( mb.getBufStart() );
|
sb->safeStrcpy ( mb.getBufStart() );
|
||||||
// return true since we printed one
|
// return true since we printed one
|
||||||
@ -3660,7 +3827,7 @@ bool printRedBox2 ( SafeBuf *sb , bool isRootWebPage ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// emergency message box
|
// emergency message box
|
||||||
bool printRedBox ( SafeBuf *mb , bool isRootWebPage ) {
|
bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
|
||||||
|
|
||||||
PingServer *ps = &g_pingServer;
|
PingServer *ps = &g_pingServer;
|
||||||
|
|
||||||
@ -3685,12 +3852,14 @@ bool printRedBox ( SafeBuf *mb , bool isRootWebPage ) {
|
|||||||
|
|
||||||
mb->safePrintf("<div style=max-width:500px;>");
|
mb->safePrintf("<div style=max-width:500px;>");
|
||||||
|
|
||||||
|
long page = g_pages.getDynamicPageNumber ( hr );
|
||||||
|
|
||||||
// are we just starting off? give them a little help.
|
// are we just starting off? give them a little help.
|
||||||
CollectionRec *cr = g_collectiondb.getRec("main");
|
CollectionRec *crm = g_collectiondb.getRec("main");
|
||||||
if ( g_collectiondb.m_numRecs == 1 &&
|
if ( g_collectiondb.m_numRecs == 1 &&
|
||||||
cr &&
|
crm &&
|
||||||
isRootWebPage &&
|
page == PAGE_ROOT && // isRootWebPage &&
|
||||||
cr->m_globalCrawlInfo.m_pageDownloadAttempts == 0 ) {
|
crm->m_globalCrawlInfo.m_pageDownloadAttempts == 0 ) {
|
||||||
if ( adds ) mb->safePrintf("<br>");
|
if ( adds ) mb->safePrintf("<br>");
|
||||||
adds++;
|
adds++;
|
||||||
mb->safePrintf("%s",box);
|
mb->safePrintf("%s",box);
|
||||||
@ -3702,18 +3871,36 @@ bool printRedBox ( SafeBuf *mb , bool isRootWebPage ) {
|
|||||||
mb->safePrintf("%s",boxEnd);
|
mb->safePrintf("%s",boxEnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( isRootWebPage ) {
|
if ( page == PAGE_ROOT ) { // isRootWebPage ) {
|
||||||
mb->safePrintf("</div>");
|
mb->safePrintf("</div>");
|
||||||
return (bool)adds;
|
return (bool)adds;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( g_conf.m_numConnectIps == 0 && g_conf.m_numMasterPwds == 0 ) {
|
if ( g_conf.m_masterPwds.length() == 0 ) {
|
||||||
if ( adds ) mb->safePrintf("<br>");
|
if ( adds ) mb->safePrintf("<br>");
|
||||||
adds++;
|
adds++;
|
||||||
mb->safePrintf("%s",box);
|
mb->safePrintf("%s",box);
|
||||||
mb->safePrintf("URGENT. Please specify a password "
|
mb->safePrintf("URGENT. Please specify a ROOT password "
|
||||||
"or IP address in the "
|
"or IP address in the "
|
||||||
"<a href=/admin/security>security</a> "
|
"<a href=/admin/rootpassword>root "
|
||||||
|
"password</a> "
|
||||||
|
"table. Right now anybody might be able "
|
||||||
|
"to access the Gigablast admin controls.");
|
||||||
|
mb->safePrintf("%s",boxEnd);
|
||||||
|
}
|
||||||
|
|
||||||
|
CollectionRec *cr = g_collectiondb.getRec ( hr );
|
||||||
|
|
||||||
|
if ( cr &&
|
||||||
|
cr->m_collectionPasswords.length() == 0 &&
|
||||||
|
cr->m_collectionIps.length() == 0 ) {
|
||||||
|
if ( adds ) mb->safePrintf("<br>");
|
||||||
|
adds++;
|
||||||
|
mb->safePrintf("%s",box);
|
||||||
|
mb->safePrintf("URGENT. Please specify a COLLECTION password "
|
||||||
|
"or IP address in the "
|
||||||
|
"<a href=/admin/collectionpasswords>"
|
||||||
|
"password</a> "
|
||||||
"table. Right now anybody might be able "
|
"table. Right now anybody might be able "
|
||||||
"to access the Gigablast admin controls.");
|
"to access the Gigablast admin controls.");
|
||||||
mb->safePrintf("%s",boxEnd);
|
mb->safePrintf("%s",boxEnd);
|
||||||
@ -3792,6 +3979,42 @@ bool printRedBox ( SafeBuf *mb , bool isRootWebPage ) {
|
|||||||
mb->safePrintf("%s",boxEnd);
|
mb->safePrintf("%s",boxEnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
WebPage *wp = g_pages.getPage(page);
|
||||||
|
|
||||||
|
if ( wp &&
|
||||||
|
(wp->m_pgflags & (PG_ROOTADMIN|PG_COLLADMIN)) &&
|
||||||
|
! g_conf.isRootAdmin(sock,hr) &&
|
||||||
|
! g_conf.isCollAdmin(sock,hr) ) {
|
||||||
|
if ( adds ) mb->safePrintf("<br>");
|
||||||
|
adds++;
|
||||||
|
mb->safePrintf("%s",box);
|
||||||
|
|
||||||
|
char *ff = "admin/settings";
|
||||||
|
if ( wp ) ff = wp->m_filename;
|
||||||
|
|
||||||
|
mb->safePrintf("You have no write access to these "
|
||||||
|
"controls. Please enter the collection or "
|
||||||
|
"root password to get access: "
|
||||||
|
|
||||||
|
"<form method=GET action=\"/%s\" name=xyz>"
|
||||||
|
|
||||||
|
"<input type=password id=ppp name=xpwd size=20>"
|
||||||
|
|
||||||
|
"<input type=submit value=ok "
|
||||||
|
"border=0 onclick=\""
|
||||||
|
"document.cookie='pwd='+"
|
||||||
|
"document.getElementById('ppp')"
|
||||||
|
".value+"
|
||||||
|
"';max-age=9999999';"
|
||||||
|
"\">"
|
||||||
|
|
||||||
|
"</form>"
|
||||||
|
, ff );
|
||||||
|
|
||||||
|
mb->safePrintf("%s",boxEnd);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if ( ps->m_numHostsDead ) {
|
if ( ps->m_numHostsDead ) {
|
||||||
if ( adds ) mb->safePrintf("<br>");
|
if ( adds ) mb->safePrintf("<br>");
|
||||||
adds++;
|
adds++;
|
||||||
|
23
Pages.h
23
Pages.h
@ -5,8 +5,13 @@
|
|||||||
#ifndef _PAGES_H_
|
#ifndef _PAGES_H_
|
||||||
#define _PAGES_H_
|
#define _PAGES_H_
|
||||||
|
|
||||||
bool printRedBox2 ( SafeBuf *sb , bool isRootWebPage = false ) ;
|
bool printRedBox2 ( SafeBuf *sb ,
|
||||||
bool printRedBox ( SafeBuf *mb , bool isRootWebPage = false ) ;
|
class TcpSocket *sock ,
|
||||||
|
class HttpRequest *hr );
|
||||||
|
|
||||||
|
bool printRedBox ( SafeBuf *mb ,
|
||||||
|
class TcpSocket *sock ,
|
||||||
|
class HttpRequest *hr );
|
||||||
|
|
||||||
// for PageEvents.cpp and Accessdb.cpp
|
// for PageEvents.cpp and Accessdb.cpp
|
||||||
//#define RESULTSWIDTHSTR "550px"
|
//#define RESULTSWIDTHSTR "550px"
|
||||||
@ -17,6 +22,8 @@ bool printRedBox ( SafeBuf *mb , bool isRootWebPage = false ) ;
|
|||||||
#include "SafeBuf.h"
|
#include "SafeBuf.h"
|
||||||
#include "PageCrawlBot.h" // sendPageCrawlBot()
|
#include "PageCrawlBot.h" // sendPageCrawlBot()
|
||||||
|
|
||||||
|
#define GOLD "f3c734"
|
||||||
|
|
||||||
#define LIGHTER_BLUE "e8e8ff"
|
#define LIGHTER_BLUE "e8e8ff"
|
||||||
#define LIGHT_BLUE "d0d0e0"
|
#define LIGHT_BLUE "d0d0e0"
|
||||||
#define DARK_BLUE "c0c0f0"
|
#define DARK_BLUE "c0c0f0"
|
||||||
@ -102,6 +109,8 @@ bool sendPageQA ( TcpSocket *sock , HttpRequest *hr ) ;
|
|||||||
// values for WebPage::m_flags
|
// values for WebPage::m_flags
|
||||||
#define PG_NOAPI 0x01
|
#define PG_NOAPI 0x01
|
||||||
#define PG_STATUS 0x02
|
#define PG_STATUS 0x02
|
||||||
|
#define PG_COLLADMIN 0x04
|
||||||
|
#define PG_ROOTADMIN 0x08
|
||||||
|
|
||||||
// . description of a dynamic page
|
// . description of a dynamic page
|
||||||
// . we have a static array of these in Pages.cpp
|
// . we have a static array of these in Pages.cpp
|
||||||
@ -308,13 +317,13 @@ extern class Pages g_pages;
|
|||||||
// . some pages also have urls like /search to mean page=0
|
// . some pages also have urls like /search to mean page=0
|
||||||
enum {
|
enum {
|
||||||
// dummy pages
|
// dummy pages
|
||||||
PAGE_NOHOSTLINKS = 0,
|
//PAGE_NOHOSTLINKS = 0,
|
||||||
PAGE_ADMIN ,
|
//PAGE_ADMIN ,
|
||||||
//PAGE_QUALITY ,
|
//PAGE_QUALITY ,
|
||||||
PAGE_PUBLIC ,
|
//PAGE_PUBLIC ,
|
||||||
|
|
||||||
// public pages
|
// public pages
|
||||||
PAGE_ROOT ,
|
PAGE_ROOT =0,
|
||||||
PAGE_RESULTS ,
|
PAGE_RESULTS ,
|
||||||
//PAGE_WIDGET,
|
//PAGE_WIDGET,
|
||||||
PAGE_ADDURL , // 5
|
PAGE_ADDURL , // 5
|
||||||
@ -339,7 +348,7 @@ enum {
|
|||||||
PAGE_SPIDER ,
|
PAGE_SPIDER ,
|
||||||
PAGE_SPIDERPROXIES ,
|
PAGE_SPIDERPROXIES ,
|
||||||
PAGE_LOG ,
|
PAGE_LOG ,
|
||||||
PAGE_SECURITY , // 19
|
PAGE_ROOTPASSWORDS , // 19
|
||||||
PAGE_ADDCOLL , //20
|
PAGE_ADDCOLL , //20
|
||||||
PAGE_DELCOLL ,
|
PAGE_DELCOLL ,
|
||||||
PAGE_CLONECOLL ,
|
PAGE_CLONECOLL ,
|
||||||
|
475
Parms.cpp
475
Parms.cpp
@ -234,7 +234,7 @@ bool CommandRemoveConnectIpRow ( char *rec ) {
|
|||||||
for ( long i = 0 ; i < g_parms.m_numParms ; i++ ) {
|
for ( long i = 0 ; i < g_parms.m_numParms ; i++ ) {
|
||||||
Parm *m = &g_parms.m_parms[i];
|
Parm *m = &g_parms.m_parms[i];
|
||||||
// parm must be a url filters parm
|
// parm must be a url filters parm
|
||||||
if ( m->m_page != PAGE_SECURITY ) continue;
|
if ( m->m_page != PAGE_ROOTPASSWORDS ) continue;
|
||||||
// must be an array!
|
// must be an array!
|
||||||
if ( ! m->isArray() ) continue;
|
if ( ! m->isArray() ) continue;
|
||||||
// sanity check
|
// sanity check
|
||||||
@ -263,7 +263,7 @@ bool CommandRemovePasswordRow ( char *rec ) {
|
|||||||
for ( long i = 0 ; i < g_parms.m_numParms ; i++ ) {
|
for ( long i = 0 ; i < g_parms.m_numParms ; i++ ) {
|
||||||
Parm *m = &g_parms.m_parms[i];
|
Parm *m = &g_parms.m_parms[i];
|
||||||
// parm must be a url filters parm
|
// parm must be a url filters parm
|
||||||
if ( m->m_page != PAGE_SECURITY ) continue;
|
if ( m->m_page != PAGE_ROOTPASSWORDS ) continue;
|
||||||
// must be an array!
|
// must be an array!
|
||||||
if ( ! m->isArray() ) continue;
|
if ( ! m->isArray() ) continue;
|
||||||
// sanity check
|
// sanity check
|
||||||
@ -1164,11 +1164,14 @@ bool Parms::sendPageGeneric ( TcpSocket *s , HttpRequest *r ) {
|
|||||||
|
|
||||||
char format = r->getReplyFormat();
|
char format = r->getReplyFormat();
|
||||||
|
|
||||||
|
char guide = r->getLong("guide",0);
|
||||||
|
|
||||||
//
|
//
|
||||||
// CLOUD SEARCH ENGINE SUPPORT
|
// CLOUD SEARCH ENGINE SUPPORT
|
||||||
//
|
//
|
||||||
char *action = r->getString("action",NULL);
|
char *action = r->getString("action",NULL);
|
||||||
if ( page == PAGE_BASIC_SETTINGS &&
|
if ( page == PAGE_BASIC_SETTINGS &&
|
||||||
|
guide &&
|
||||||
// this is non-null if handling a submit request
|
// this is non-null if handling a submit request
|
||||||
action &&
|
action &&
|
||||||
format == FORMAT_HTML ) {
|
format == FORMAT_HTML ) {
|
||||||
@ -1299,14 +1302,14 @@ bool Parms::printParmTable ( SafeBuf *sb , TcpSocket *s , HttpRequest *r ) {
|
|||||||
if ( page == PAGE_LOG ) tt = "Log Controls";
|
if ( page == PAGE_LOG ) tt = "Log Controls";
|
||||||
if ( page == PAGE_MASTER ) tt = "Master Controls";
|
if ( page == PAGE_MASTER ) tt = "Master Controls";
|
||||||
if ( page == PAGE_INJECT ) tt = "Inject Url";
|
if ( page == PAGE_INJECT ) tt = "Inject Url";
|
||||||
if ( page == PAGE_SECURITY ) tt = "Security";
|
if ( page == PAGE_ROOTPASSWORDS ) tt = "Root Passwords";
|
||||||
if ( page == PAGE_ADDURL2 ) tt = "Add Urls";
|
if ( page == PAGE_ADDURL2 ) tt = "Add Urls";
|
||||||
if ( page == PAGE_SPIDER ) tt = "Spider Controls";
|
if ( page == PAGE_SPIDER ) tt = "Spider Controls";
|
||||||
if ( page == PAGE_SEARCH ) tt = "Search Controls";
|
if ( page == PAGE_SEARCH ) tt = "Search Controls";
|
||||||
if ( page == PAGE_ACCESS ) tt = "Access Controls";
|
if ( page == PAGE_ACCESS ) tt = "Access Controls";
|
||||||
if ( page == PAGE_FILTERS ) tt = "Url Filters";
|
if ( page == PAGE_FILTERS ) tt = "Url Filters";
|
||||||
if ( page == PAGE_BASIC_SETTINGS ) tt = "Settings";
|
if ( page == PAGE_BASIC_SETTINGS ) tt = "Settings";
|
||||||
if ( page == PAGE_BASIC_SECURITY ) tt = "Security";
|
if ( page == PAGE_BASIC_SECURITY ) tt = "Collection Passwords";
|
||||||
//if ( page == PAGE_SITES ) tt = "Site List";
|
//if ( page == PAGE_SITES ) tt = "Site List";
|
||||||
//if ( page == PAGE_PRIORITIES ) tt = "Priority Controls";
|
//if ( page == PAGE_PRIORITIES ) tt = "Priority Controls";
|
||||||
//if ( page == PAGE_RULES ) tt = "Site Rules";
|
//if ( page == PAGE_RULES ) tt = "Site Rules";
|
||||||
@ -1329,6 +1332,8 @@ bool Parms::printParmTable ( SafeBuf *sb , TcpSocket *s , HttpRequest *r ) {
|
|||||||
if ( format == FORMAT_XML || format == FORMAT_JSON ) {
|
if ( format == FORMAT_XML || format == FORMAT_JSON ) {
|
||||||
char *coll = g_collectiondb.getDefaultColl(r);
|
char *coll = g_collectiondb.getDefaultColl(r);
|
||||||
CollectionRec *cr = g_collectiondb.getRec(coll);//2(r,true);
|
CollectionRec *cr = g_collectiondb.getRec(coll);//2(r,true);
|
||||||
|
bool isRootAdmin = g_conf.isRootAdmin ( s , r );
|
||||||
|
bool isCollAdmin = g_conf.isCollAdmin ( s , r );
|
||||||
g_parms.printParms2 ( sb ,
|
g_parms.printParms2 ( sb ,
|
||||||
page ,
|
page ,
|
||||||
cr ,
|
cr ,
|
||||||
@ -1336,7 +1341,9 @@ bool Parms::printParmTable ( SafeBuf *sb , TcpSocket *s , HttpRequest *r ) {
|
|||||||
1 , // long pd , print desc?
|
1 , // long pd , print desc?
|
||||||
false , // isCrawlbot
|
false , // isCrawlbot
|
||||||
format ,
|
format ,
|
||||||
NULL ); // TcpSocket *sock
|
NULL , // TcpSocket *sock
|
||||||
|
isRootAdmin ,
|
||||||
|
isCollAdmin );
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1705,12 +1712,16 @@ bool Parms::printParms (SafeBuf* sb, TcpSocket *s , HttpRequest *r) {
|
|||||||
long pd = r->getLong("pd",1);
|
long pd = r->getLong("pd",1);
|
||||||
char *coll = g_collectiondb.getDefaultColl(r);
|
char *coll = g_collectiondb.getDefaultColl(r);
|
||||||
CollectionRec *cr = g_collectiondb.getRec(coll);//2(r,true);
|
CollectionRec *cr = g_collectiondb.getRec(coll);//2(r,true);
|
||||||
|
|
||||||
|
bool isRootAdmin = g_conf.isRootAdmin ( s , r );
|
||||||
|
bool isCollAdmin = g_conf.isCollAdmin ( s , r );
|
||||||
|
|
||||||
//char *coll = r->getString ( "c" );
|
//char *coll = r->getString ( "c" );
|
||||||
//if ( ! coll || ! coll[0] ) coll = "main";
|
//if ( ! coll || ! coll[0] ) coll = "main";
|
||||||
//CollectionRec *cr = g_collectiondb.getRec ( coll );
|
//CollectionRec *cr = g_collectiondb.getRec ( coll );
|
||||||
// if "main" collection does not exist, try another
|
// if "main" collection does not exist, try another
|
||||||
//if ( ! cr ) cr = getCollRecFromHttpRequest ( r );
|
//if ( ! cr ) cr = getCollRecFromHttpRequest ( r );
|
||||||
printParms2 ( sb, page, cr, nc, pd,0,0 , s);
|
printParms2 ( sb, page, cr, nc, pd,0,0 , s,isRootAdmin,isCollAdmin);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1723,7 +1734,9 @@ bool Parms::printParms2 ( SafeBuf* sb ,
|
|||||||
long pd ,
|
long pd ,
|
||||||
bool isCrawlbot ,
|
bool isCrawlbot ,
|
||||||
char format , // bool isJSON ,
|
char format , // bool isJSON ,
|
||||||
TcpSocket *sock ) {
|
TcpSocket *sock ,
|
||||||
|
bool isRootAdmin ,
|
||||||
|
bool isCollAdmin ) {
|
||||||
bool status = true;
|
bool status = true;
|
||||||
s_count = 0;
|
s_count = 0;
|
||||||
// background color
|
// background color
|
||||||
@ -1736,13 +1749,12 @@ bool Parms::printParms2 ( SafeBuf* sb ,
|
|||||||
if ( cr ) coll = cr->m_coll;
|
if ( cr ) coll = cr->m_coll;
|
||||||
|
|
||||||
// page aliases
|
// page aliases
|
||||||
if ( page == PAGE_BASIC_SECURITY )
|
//if ( page == PAGE_BASIC_SECURITY )
|
||||||
page = PAGE_SECURITY;
|
// page = PAGE_ROOTPASSWORDS;
|
||||||
|
|
||||||
GigablastRequest gr;
|
GigablastRequest gr;
|
||||||
g_parms.setToDefault ( (char *)&gr , OBJ_GBREQUEST , NULL);
|
g_parms.setToDefault ( (char *)&gr , OBJ_GBREQUEST , NULL);
|
||||||
|
|
||||||
|
|
||||||
// find in parms list
|
// find in parms list
|
||||||
for ( long i = 0 ; i < m_numParms ; i++ ) {
|
for ( long i = 0 ; i < m_numParms ; i++ ) {
|
||||||
// get it
|
// get it
|
||||||
@ -1824,7 +1836,7 @@ bool Parms::printParms2 ( SafeBuf* sb ,
|
|||||||
sb->safePrintf ( "%s" , m->m_desc );
|
sb->safePrintf ( "%s" , m->m_desc );
|
||||||
// print users current ip if showing the list
|
// print users current ip if showing the list
|
||||||
// of "Master IPs" for admin access
|
// of "Master IPs" for admin access
|
||||||
if ( m->m_page == PAGE_SECURITY &&
|
if ( m->m_page == PAGE_ROOTPASSWORDS &&
|
||||||
sock &&
|
sock &&
|
||||||
m->m_title &&
|
m->m_title &&
|
||||||
strstr(m->m_title,"IP") )
|
strstr(m->m_title,"IP") )
|
||||||
@ -1852,7 +1864,9 @@ bool Parms::printParms2 ( SafeBuf* sb ,
|
|||||||
bg,nc,pd,
|
bg,nc,pd,
|
||||||
false,
|
false,
|
||||||
isCrawlbot,
|
isCrawlbot,
|
||||||
format);//isJSON);
|
format,
|
||||||
|
isRootAdmin,
|
||||||
|
isCollAdmin);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// if not first in a row, skip it, we printed it already
|
// if not first in a row, skip it, we printed it already
|
||||||
@ -1872,7 +1886,9 @@ bool Parms::printParms2 ( SafeBuf* sb ,
|
|||||||
status &=printParm(sb,NULL,&m_parms[k],k,
|
status &=printParm(sb,NULL,&m_parms[k],k,
|
||||||
newj,jend,(char *)THIS,coll,NULL,
|
newj,jend,(char *)THIS,coll,NULL,
|
||||||
bg,nc,pd, j==size-1,
|
bg,nc,pd, j==size-1,
|
||||||
isCrawlbot,format);//isJSON)
|
isCrawlbot,format,
|
||||||
|
isRootAdmin,
|
||||||
|
isCollAdmin);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// end array table
|
// end array table
|
||||||
@ -1901,7 +1917,9 @@ bool Parms::printParm ( SafeBuf* sb,
|
|||||||
bool lastRow ,
|
bool lastRow ,
|
||||||
bool isCrawlbot ,
|
bool isCrawlbot ,
|
||||||
//bool isJSON ) {
|
//bool isJSON ) {
|
||||||
char format ) {
|
char format ,
|
||||||
|
bool isRootAdmin ,
|
||||||
|
bool isCollAdmin ) {
|
||||||
bool status = true;
|
bool status = true;
|
||||||
// do not print if no permissions
|
// do not print if no permissions
|
||||||
//if ( m->m_perms != 0 && !g_users.hasPermission(username,m->m_perms) )
|
//if ( m->m_perms != 0 && !g_users.hasPermission(username,m->m_perms) )
|
||||||
@ -1961,7 +1979,7 @@ bool Parms::printParm ( SafeBuf* sb,
|
|||||||
page == PAGE_SPIDER ||
|
page == PAGE_SPIDER ||
|
||||||
page == PAGE_SPIDERPROXIES ||
|
page == PAGE_SPIDERPROXIES ||
|
||||||
page == PAGE_FILTERS ||
|
page == PAGE_FILTERS ||
|
||||||
page == PAGE_SECURITY ||
|
page == PAGE_ROOTPASSWORDS ||
|
||||||
page == PAGE_REPAIR ||
|
page == PAGE_REPAIR ||
|
||||||
page == PAGE_LOG ) {
|
page == PAGE_LOG ) {
|
||||||
sb->safePrintf ( "\t\t<currentValue><![CDATA[");
|
sb->safePrintf ( "\t\t<currentValue><![CDATA[");
|
||||||
@ -1994,7 +2012,7 @@ bool Parms::printParm ( SafeBuf* sb,
|
|||||||
page == PAGE_SPIDER ||
|
page == PAGE_SPIDER ||
|
||||||
page == PAGE_SPIDERPROXIES ||
|
page == PAGE_SPIDERPROXIES ||
|
||||||
page == PAGE_FILTERS ||
|
page == PAGE_FILTERS ||
|
||||||
page == PAGE_SECURITY ||
|
page == PAGE_ROOTPASSWORDS ||
|
||||||
page == PAGE_REPAIR ||
|
page == PAGE_REPAIR ||
|
||||||
page == PAGE_LOG ) {
|
page == PAGE_LOG ) {
|
||||||
sb->safePrintf ( "\t\t\"currentValue\":\"");
|
sb->safePrintf ( "\t\t\"currentValue\":\"");
|
||||||
@ -2390,6 +2408,19 @@ bool Parms::printParm ( SafeBuf* sb,
|
|||||||
strcmp(m->m_title,"url filters profile")==0)
|
strcmp(m->m_title,"url filters profile")==0)
|
||||||
// url filters profile drop down "ufp"
|
// url filters profile drop down "ufp"
|
||||||
printDropDownProfile ( sb , "ufp" , cr );//*s );
|
printDropDownProfile ( sb , "ufp" , cr );//*s );
|
||||||
|
|
||||||
|
// do not expose master passwords or IPs to non-root admins
|
||||||
|
else if ( ( m->m_flags & PF_PRIVATE ) &&
|
||||||
|
m->m_obj == OBJ_CONF &&
|
||||||
|
! isRootAdmin )
|
||||||
|
return true;
|
||||||
|
|
||||||
|
// do not expose master passwords or IPs to non-root admins
|
||||||
|
else if ( ( m->m_flags & PF_PRIVATE ) &&
|
||||||
|
m->m_obj == OBJ_COLL &&
|
||||||
|
! isCollAdmin )
|
||||||
|
return true;
|
||||||
|
|
||||||
else if ( t == TYPE_RETRIES )
|
else if ( t == TYPE_RETRIES )
|
||||||
printDropDown ( 4 , sb , cgi , *s , false , false );
|
printDropDown ( 4 , sb , cgi , *s , false , false );
|
||||||
else if ( t == TYPE_FILEUPLOADBUTTON ) {
|
else if ( t == TYPE_FILEUPLOADBUTTON ) {
|
||||||
@ -2742,10 +2773,10 @@ bool Parms::printParm ( SafeBuf* sb,
|
|||||||
// do not allow removal of last default url filters rule
|
// do not allow removal of last default url filters rule
|
||||||
//if ( lastRow && !strcmp(m->m_cgi,"fsp")) show = false;
|
//if ( lastRow && !strcmp(m->m_cgi,"fsp")) show = false;
|
||||||
char *suffix = "";
|
char *suffix = "";
|
||||||
if ( m->m_page == PAGE_SECURITY &&
|
if ( m->m_page == PAGE_ROOTPASSWORDS &&
|
||||||
m->m_type == TYPE_IP )
|
m->m_type == TYPE_IP )
|
||||||
suffix = "ip";
|
suffix = "ip";
|
||||||
if ( m->m_page == PAGE_SECURITY &&
|
if ( m->m_page == PAGE_ROOTPASSWORDS &&
|
||||||
m->m_type == TYPE_STRINGNONEMPTY )
|
m->m_type == TYPE_STRINGNONEMPTY )
|
||||||
suffix = "pwd";
|
suffix = "pwd";
|
||||||
if ( show )
|
if ( show )
|
||||||
@ -4993,7 +5024,7 @@ void Parms::init ( ) {
|
|||||||
"assigns a url or site to a ruleset. Each tagdb record is "
|
"assigns a url or site to a ruleset. Each tagdb record is "
|
||||||
"about 100 bytes or so.";
|
"about 100 bytes or so.";
|
||||||
m->m_off = (char *)&g_conf.m_tagdbMaxTreeMem - g;
|
m->m_off = (char *)&g_conf.m_tagdbMaxTreeMem - g;
|
||||||
m->m_def = "31028000";
|
m->m_def = "1028000";
|
||||||
m->m_type = TYPE_LONG;
|
m->m_type = TYPE_LONG;
|
||||||
m->m_flags = PF_NOSYNC|PF_NOAPI;
|
m->m_flags = PF_NOSYNC|PF_NOAPI;
|
||||||
m->m_page = PAGE_NONE;
|
m->m_page = PAGE_NONE;
|
||||||
@ -6416,7 +6447,8 @@ void Parms::init ( ) {
|
|||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "percent similar dedup summary";
|
m->m_title = "percent similar dedup summary";
|
||||||
m->m_desc = "If document summary is this percent similar "
|
m->m_desc = "If document summary (and title) are "
|
||||||
|
"this percent similar "
|
||||||
"to a document summary above it, then remove it from the "
|
"to a document summary above it, then remove it from the "
|
||||||
"search results. 100 means only to remove if exactly the "
|
"search results. 100 means only to remove if exactly the "
|
||||||
"same. 0 means no summary deduping. You must also supply "
|
"same. 0 means no summary deduping. You must also supply "
|
||||||
@ -6790,6 +6822,7 @@ void Parms::init ( ) {
|
|||||||
m->m_flags = PF_API;
|
m->m_flags = PF_API;
|
||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
|
m->m_flags = PF_NOAPI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -8690,6 +8723,17 @@ void Parms::init ( ) {
|
|||||||
m++;
|
m++;
|
||||||
|
|
||||||
|
|
||||||
|
m->m_title = "use cache";
|
||||||
|
m->m_desc = "Use 0 if Gigablast should not read or write from "
|
||||||
|
"any caches at any level.";
|
||||||
|
m->m_def = "-1";
|
||||||
|
m->m_off = (char *)&si.m_useCache - y;
|
||||||
|
m->m_type = TYPE_CHAR;
|
||||||
|
m->m_cgi = "usecache";
|
||||||
|
m->m_page = PAGE_RESULTS;
|
||||||
|
m->m_obj = OBJ_SI;
|
||||||
|
m++;
|
||||||
|
|
||||||
m->m_title = "read from cache";
|
m->m_title = "read from cache";
|
||||||
m->m_desc = "Should we read search results from the cache? Set "
|
m->m_desc = "Should we read search results from the cache? Set "
|
||||||
"to false to fix dmoz bug.";
|
"to false to fix dmoz bug.";
|
||||||
@ -8704,17 +8748,6 @@ void Parms::init ( ) {
|
|||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "use cache";
|
|
||||||
m->m_desc = "Use 0 if Gigablast should not read or write from "
|
|
||||||
"any caches at any level.";
|
|
||||||
m->m_def = "-1";
|
|
||||||
m->m_off = (char *)&si.m_useCache - y;
|
|
||||||
m->m_type = TYPE_CHAR;
|
|
||||||
m->m_cgi = "usecache";
|
|
||||||
m->m_page = PAGE_RESULTS;
|
|
||||||
m->m_obj = OBJ_SI;
|
|
||||||
m++;
|
|
||||||
|
|
||||||
m->m_title = "write to cache";
|
m->m_title = "write to cache";
|
||||||
m->m_desc = "Use 0 if Gigablast should not write to "
|
m->m_desc = "Use 0 if Gigablast should not write to "
|
||||||
"any caches at any level.";
|
"any caches at any level.";
|
||||||
@ -8768,6 +8801,7 @@ void Parms::init ( ) {
|
|||||||
m->m_sprpp = 0;
|
m->m_sprpp = 0;
|
||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
|
m->m_flags = PF_NOAPI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "restrict search to pages that link to this url";
|
m->m_title = "restrict search to pages that link to this url";
|
||||||
@ -8783,7 +8817,8 @@ void Parms::init ( ) {
|
|||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "search for this phrase quoted";
|
m->m_title = "search for this phrase quoted";
|
||||||
m->m_desc = "The phrase which will be quoted.";
|
m->m_desc = "The phrase which will be quoted in the query. From the "
|
||||||
|
"advanced search page, adv.html.";
|
||||||
m->m_off = (char *)&si.m_quote1 - y;
|
m->m_off = (char *)&si.m_quote1 - y;
|
||||||
m->m_type = TYPE_CHARPTR;//STRING;
|
m->m_type = TYPE_CHARPTR;//STRING;
|
||||||
//m->m_size = 512;
|
//m->m_size = 512;
|
||||||
@ -8792,10 +8827,12 @@ void Parms::init ( ) {
|
|||||||
m->m_sprpp = 0;
|
m->m_sprpp = 0;
|
||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
|
m->m_flags = PF_NOAPI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "search for this second phrase quoted";
|
m->m_title = "search for this second phrase quoted";
|
||||||
m->m_desc = "The phrase which will be quoted.";
|
m->m_desc = "The phrase which will be quoted in the query. From the "
|
||||||
|
"advanced search page, adv.html.";
|
||||||
m->m_off = (char *)&si.m_quote2 - y;
|
m->m_off = (char *)&si.m_quote2 - y;
|
||||||
m->m_type = TYPE_CHARPTR;//STRING;
|
m->m_type = TYPE_CHARPTR;//STRING;
|
||||||
//m->m_size = 512;
|
//m->m_size = 512;
|
||||||
@ -8804,6 +8841,7 @@ void Parms::init ( ) {
|
|||||||
m->m_sprpp = 0;
|
m->m_sprpp = 0;
|
||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
|
m->m_flags = PF_NOAPI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -8836,7 +8874,8 @@ void Parms::init ( ) {
|
|||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "require these query terms";
|
m->m_title = "require these query terms";
|
||||||
m->m_desc = "Returned results will have all the words in X.";
|
m->m_desc = "Returned results will have all the words in X. "
|
||||||
|
"From the advanced search page, adv.html.";
|
||||||
m->m_off = (char *)&si.m_plus - y;
|
m->m_off = (char *)&si.m_plus - y;
|
||||||
m->m_def = NULL;
|
m->m_def = NULL;
|
||||||
m->m_type = TYPE_CHARPTR;//STRING;
|
m->m_type = TYPE_CHARPTR;//STRING;
|
||||||
@ -8846,10 +8885,12 @@ void Parms::init ( ) {
|
|||||||
m->m_sprpp = 0;
|
m->m_sprpp = 0;
|
||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
|
m->m_flags = PF_NOAPI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "avoid these query terms";
|
m->m_title = "avoid these query terms";
|
||||||
m->m_desc = "Returned results will NOT have any of the words in X.";
|
m->m_desc = "Returned results will NOT have any of the words in X. "
|
||||||
|
"From the advanced search page, adv.html.";
|
||||||
m->m_off = (char *)&si.m_minus - y;
|
m->m_off = (char *)&si.m_minus - y;
|
||||||
m->m_type = TYPE_CHARPTR;//STRING;
|
m->m_type = TYPE_CHARPTR;//STRING;
|
||||||
m->m_cgi = "minus";
|
m->m_cgi = "minus";
|
||||||
@ -8858,6 +8899,7 @@ void Parms::init ( ) {
|
|||||||
m->m_sprpp = 0;
|
m->m_sprpp = 0;
|
||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
|
m->m_flags = PF_NOAPI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "format of the returned search results";
|
m->m_title = "format of the returned search results";
|
||||||
@ -8869,6 +8911,7 @@ void Parms::init ( ) {
|
|||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
m->m_cgi = "format";
|
m->m_cgi = "format";
|
||||||
|
m->m_flags = PF_NOAPI; // alread in the api, so don't repeat
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "family filter";
|
m->m_title = "family filter";
|
||||||
@ -8899,10 +8942,8 @@ void Parms::init ( ) {
|
|||||||
m++;
|
m++;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
m->m_title = "cached page highlight query";
|
m->m_title = "cached page highlight query";
|
||||||
m->m_desc = "Highlight the terms in this query instead. For "
|
m->m_desc = "Highlight the terms in this query instead.";
|
||||||
"display of the cached page.";
|
|
||||||
m->m_def = NULL;
|
m->m_def = NULL;
|
||||||
m->m_off = (char *)&si.m_highlightQuery - y;
|
m->m_off = (char *)&si.m_highlightQuery - y;
|
||||||
m->m_type = TYPE_CHARPTR;//STRING;
|
m->m_type = TYPE_CHARPTR;//STRING;
|
||||||
@ -8914,6 +8955,7 @@ void Parms::init ( ) {
|
|||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
m->m_title = "highlight event date in summaries.";
|
m->m_title = "highlight event date in summaries.";
|
||||||
m->m_desc = "Can be 0 or 1 to respectively disable or enable "
|
m->m_desc = "Can be 0 or 1 to respectively disable or enable "
|
||||||
@ -8942,8 +8984,8 @@ void Parms::init ( ) {
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
m->m_title = "Query match offsets";
|
m->m_title = "Query match offsets";
|
||||||
m->m_desc = "Return a list of the offsets of each query word"
|
m->m_desc = "Return a list of the offsets of each query word "
|
||||||
"actually matched in the document. 1 means byte offset,"
|
"actually matched in the document. 1 means byte offset, "
|
||||||
"and 2 means word offset.";
|
"and 2 means word offset.";
|
||||||
m->m_def = "0";
|
m->m_def = "0";
|
||||||
m->m_off = (char *)&si.m_queryMatchOffsets - y;
|
m->m_off = (char *)&si.m_queryMatchOffsets - y;
|
||||||
@ -8953,6 +8995,7 @@ void Parms::init ( ) {
|
|||||||
m->m_smax = 2;
|
m->m_smax = 2;
|
||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
|
m->m_flags = PF_NOAPI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "boolean status";
|
m->m_title = "boolean status";
|
||||||
@ -9016,7 +9059,7 @@ void Parms::init ( ) {
|
|||||||
"<br><br>\n"
|
"<br><br>\n"
|
||||||
"<b>META</b> is the meta tag name to which Gigablast will "
|
"<b>META</b> is the meta tag name to which Gigablast will "
|
||||||
"restrict the content used to generate the topics. Do not "
|
"restrict the content used to generate the topics. Do not "
|
||||||
"specify thie field to restrict the content to the body of "
|
"specify this field to restrict the content to the body of "
|
||||||
"each document, that is the default.\n"
|
"each document, that is the default.\n"
|
||||||
"<br><br>\n"
|
"<br><br>\n"
|
||||||
"<b>DEL</b> is a single character delimeter which defines "
|
"<b>DEL</b> is a single character delimeter which defines "
|
||||||
@ -9060,21 +9103,46 @@ void Parms::init ( ) {
|
|||||||
m++;
|
m++;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
m->m_title = "niceness";
|
||||||
|
m->m_desc = "Can be 0 or 1. 0 is usually a faster, high-priority "
|
||||||
|
"query, 1 is a slower, lower-priority query.";
|
||||||
|
m->m_def = "0";
|
||||||
|
m->m_off = (char *)&si.m_niceness - y;
|
||||||
|
m->m_type = TYPE_LONG;
|
||||||
|
m->m_cgi = "niceness";
|
||||||
|
m->m_smin = 0;
|
||||||
|
m->m_smax = 1;
|
||||||
|
m->m_page = PAGE_RESULTS;
|
||||||
|
m->m_obj = OBJ_SI;
|
||||||
|
m++;
|
||||||
|
|
||||||
|
m->m_title = "debug flag";
|
||||||
|
m->m_desc = "Is 1 to log debug information, 0 otherwise.";
|
||||||
|
m->m_def = "0";
|
||||||
|
m->m_off = (char *)&si.m_debug - y;
|
||||||
|
m->m_type = TYPE_BOOL;
|
||||||
|
m->m_cgi = "debug";
|
||||||
|
//m->m_priv = 1;
|
||||||
|
m->m_page = PAGE_RESULTS;
|
||||||
|
m->m_obj = OBJ_SI;
|
||||||
|
m++;
|
||||||
|
|
||||||
m->m_title = "return number of docs per topic";
|
m->m_title = "return number of docs per topic";
|
||||||
m->m_desc = "Use 1 if you want Gigablast to return the number of "
|
m->m_desc = "Use 1 if you want Gigablast to return the number of "
|
||||||
"documents in the search results that contained each topic.";
|
"documents in the search results that contained each topic "
|
||||||
|
"(gigabit).";
|
||||||
m->m_def = "1";
|
m->m_def = "1";
|
||||||
m->m_off = (char *)&si.m_returnDocIdCount - y;
|
m->m_off = (char *)&si.m_returnDocIdCount - y;
|
||||||
m->m_type = TYPE_BOOL;
|
m->m_type = TYPE_BOOL;
|
||||||
m->m_cgi = "rdc";
|
m->m_cgi = "rdc";
|
||||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "return docids per topic";
|
m->m_title = "return docids per topic";
|
||||||
m->m_desc = "Use 1 if you want Gigablast to return the list of "
|
m->m_desc = "Use 1 if you want Gigablast to return the list of "
|
||||||
"docIds from the search results that contained each topic.";
|
"docIds from the search results that contained each topic "
|
||||||
|
"(gigabit).";
|
||||||
m->m_def = "0";
|
m->m_def = "0";
|
||||||
m->m_off = (char *)&si.m_returnDocIds - y;
|
m->m_off = (char *)&si.m_returnDocIds - y;
|
||||||
m->m_type = TYPE_BOOL;
|
m->m_type = TYPE_BOOL;
|
||||||
@ -9085,7 +9153,7 @@ void Parms::init ( ) {
|
|||||||
|
|
||||||
m->m_title = "return popularity per topic";
|
m->m_title = "return popularity per topic";
|
||||||
m->m_desc = "Use 1 if you want Gigablast to return the popularity "
|
m->m_desc = "Use 1 if you want Gigablast to return the popularity "
|
||||||
"of each topic.";
|
"of each topic (gigabit).";
|
||||||
m->m_def = "0";
|
m->m_def = "0";
|
||||||
m->m_off = (char *)&si.m_returnPops - y;
|
m->m_off = (char *)&si.m_returnPops - y;
|
||||||
m->m_type = TYPE_BOOL;
|
m->m_type = TYPE_BOOL;
|
||||||
@ -9095,19 +9163,6 @@ void Parms::init ( ) {
|
|||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "niceness";
|
|
||||||
m->m_desc = "Can be 0 or 1. 0 is usually a faster, high-priority "
|
|
||||||
"query, 1 is a slower, lower-priority query.";
|
|
||||||
m->m_def = "0";
|
|
||||||
m->m_off = (char *)&si.m_niceness - y;
|
|
||||||
m->m_type = TYPE_LONG;
|
|
||||||
m->m_cgi = "niceness";
|
|
||||||
m->m_smin = 0;
|
|
||||||
m->m_smax = 1;
|
|
||||||
m->m_page = PAGE_RESULTS;
|
|
||||||
m->m_obj = OBJ_SI;
|
|
||||||
m++;
|
|
||||||
|
|
||||||
//m->m_title = "compound list max size";
|
//m->m_title = "compound list max size";
|
||||||
//m->m_desc = "Is the max size in bytes of the compound termlist. "
|
//m->m_desc = "Is the max size in bytes of the compound termlist. "
|
||||||
// "Each document id is 6 bytes.";
|
// "Each document id is 6 bytes.";
|
||||||
@ -9120,23 +9175,12 @@ void Parms::init ( ) {
|
|||||||
//m++;
|
//m++;
|
||||||
|
|
||||||
|
|
||||||
m->m_title = "debug flag";
|
|
||||||
m->m_desc = "Is 1 to log debug information, 0 otherwise.";
|
|
||||||
m->m_def = "0";
|
|
||||||
m->m_off = (char *)&si.m_debug - y;
|
|
||||||
m->m_type = TYPE_BOOL;
|
|
||||||
m->m_cgi = "debug";
|
|
||||||
//m->m_priv = 1;
|
|
||||||
m->m_page = PAGE_RESULTS;
|
|
||||||
m->m_obj = OBJ_SI;
|
|
||||||
m++;
|
|
||||||
|
|
||||||
m->m_title = "debug gigabits flag";
|
m->m_title = "debug gigabits flag";
|
||||||
m->m_desc = "Is 1 to log gigabits debug information, 0 otherwise.";
|
m->m_desc = "Is 1 to log gigabits debug information, 0 otherwise.";
|
||||||
m->m_def = "0";
|
m->m_def = "0";
|
||||||
m->m_off = (char *)&si.m_debugGigabits - y;
|
m->m_off = (char *)&si.m_debugGigabits - y;
|
||||||
m->m_type = TYPE_BOOL;
|
m->m_type = TYPE_BOOL;
|
||||||
m->m_cgi = "debug";
|
m->m_cgi = "debuggigabits";
|
||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
m++;
|
m++;
|
||||||
@ -9161,6 +9205,7 @@ void Parms::init ( ) {
|
|||||||
m->m_cgi = "iu";
|
m->m_cgi = "iu";
|
||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
|
m->m_flags = PF_NOAPI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "image link";
|
m->m_title = "image link";
|
||||||
@ -9173,6 +9218,7 @@ void Parms::init ( ) {
|
|||||||
m->m_cgi = "ix";
|
m->m_cgi = "ix";
|
||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
|
m->m_flags = PF_NOAPI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "image width";
|
m->m_title = "image width";
|
||||||
@ -9183,6 +9229,7 @@ void Parms::init ( ) {
|
|||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
m->m_def = "200";
|
m->m_def = "200";
|
||||||
|
m->m_flags = PF_NOAPI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "image height";
|
m->m_title = "image height";
|
||||||
@ -9194,6 +9241,7 @@ void Parms::init ( ) {
|
|||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
m->m_def = "200";
|
m->m_def = "200";
|
||||||
|
m->m_flags = PF_NOAPI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
// m->m_title = "password";
|
// m->m_title = "password";
|
||||||
@ -9269,6 +9317,7 @@ void Parms::init ( ) {
|
|||||||
m->m_cgi = "gbcountry";
|
m->m_cgi = "gbcountry";
|
||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
|
m->m_flags = PF_NOAPI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -9370,6 +9419,7 @@ void Parms::init ( ) {
|
|||||||
m->m_cgi = "qcs";
|
m->m_cgi = "qcs";
|
||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
|
m->m_flags = PF_NOAPI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
// buzz
|
// buzz
|
||||||
@ -9381,6 +9431,7 @@ void Parms::init ( ) {
|
|||||||
m->m_cgi = "inlinks";
|
m->m_cgi = "inlinks";
|
||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
|
m->m_flags = PF_NOAPI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
// buzz
|
// buzz
|
||||||
@ -9394,6 +9445,7 @@ void Parms::init ( ) {
|
|||||||
m->m_cgi = "outlinks";
|
m->m_cgi = "outlinks";
|
||||||
m->m_page = PAGE_RESULTS;
|
m->m_page = PAGE_RESULTS;
|
||||||
m->m_obj = OBJ_SI;
|
m->m_obj = OBJ_SI;
|
||||||
|
m->m_flags = PF_NOAPI;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
// buzz
|
// buzz
|
||||||
@ -9537,6 +9589,17 @@ void Parms::init ( ) {
|
|||||||
m->m_flags = PF_API;
|
m->m_flags = PF_API;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
|
m->m_title = "query";
|
||||||
|
m->m_desc = "Highlight this query in the page.";
|
||||||
|
m->m_def = "";
|
||||||
|
m->m_type = TYPE_CHARPTR;
|
||||||
|
m->m_page = PAGE_GET;
|
||||||
|
m->m_obj = OBJ_GBREQUEST;
|
||||||
|
m->m_cgi = "q";
|
||||||
|
m->m_off = (char *)&gr.m_query - (char *)&gr;
|
||||||
|
m->m_flags = PF_API;
|
||||||
|
m++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// for /get
|
// for /get
|
||||||
m->m_title = "query highlighting query";
|
m->m_title = "query highlighting query";
|
||||||
@ -10071,7 +10134,7 @@ void Parms::init ( ) {
|
|||||||
m->m_cgi = "afgdwd";
|
m->m_cgi = "afgdwd";
|
||||||
m->m_off = (char *)&g_conf.m_gzipDownloads - g;
|
m->m_off = (char *)&g_conf.m_gzipDownloads - g;
|
||||||
m->m_type = TYPE_BOOL;
|
m->m_type = TYPE_BOOL;
|
||||||
m->m_def = "0";
|
m->m_def = "1";
|
||||||
m->m_page = PAGE_MASTER;
|
m->m_page = PAGE_MASTER;
|
||||||
m->m_obj = OBJ_CONF;
|
m->m_obj = OBJ_CONF;
|
||||||
m++;
|
m++;
|
||||||
@ -14309,7 +14372,7 @@ void Parms::init ( ) {
|
|||||||
|
|
||||||
m->m_title = "directory containing titledb files";
|
m->m_title = "directory containing titledb files";
|
||||||
m->m_desc = "Import documents contained in titledb files in this "
|
m->m_desc = "Import documents contained in titledb files in this "
|
||||||
"directory.";
|
"directory. This is an ABSOLUTE directory path.";
|
||||||
m->m_cgi = "importdir";
|
m->m_cgi = "importdir";
|
||||||
m->m_xml = "importDir";
|
m->m_xml = "importDir";
|
||||||
m->m_page = PAGE_IMPORT;
|
m->m_page = PAGE_IMPORT;
|
||||||
@ -14951,7 +15014,8 @@ void Parms::init ( ) {
|
|||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "percent similar dedup summary default value";
|
m->m_title = "percent similar dedup summary default value";
|
||||||
m->m_desc = "If document summary is this percent similar "
|
m->m_desc = "If document summary (and title) are "
|
||||||
|
"this percent similar "
|
||||||
"to a document summary above it, then remove it from the "
|
"to a document summary above it, then remove it from the "
|
||||||
"search results. 100 means only to remove if exactly the "
|
"search results. 100 means only to remove if exactly the "
|
||||||
"same. 0 means no summary deduping.";
|
"same. 0 means no summary deduping.";
|
||||||
@ -15991,7 +16055,19 @@ void Parms::init ( ) {
|
|||||||
m++;
|
m++;
|
||||||
|
|
||||||
|
|
||||||
|
m->m_title = "use proxies for spidering";
|
||||||
|
m->m_desc = "If this is true Gigablast will use the proxies "
|
||||||
|
"listed on the <i>proxies</i> page for spidering for "
|
||||||
|
"this collection regardless whether the proxies are enabled "
|
||||||
|
"on the <i>proxies</i> page.";
|
||||||
|
m->m_cgi = "useproxies";
|
||||||
|
m->m_off = (char *)&cr.m_forceUseFloaters - x;
|
||||||
|
m->m_type = TYPE_BOOL;
|
||||||
|
m->m_def = "0";
|
||||||
|
m->m_page = PAGE_SPIDER;
|
||||||
|
m->m_obj = OBJ_COLL;
|
||||||
|
m->m_flags = PF_CLONE;
|
||||||
|
m++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
m->m_title = "add url enabled";
|
m->m_title = "add url enabled";
|
||||||
@ -17305,14 +17381,14 @@ void Parms::init ( ) {
|
|||||||
m->m_def = "1";
|
m->m_def = "1";
|
||||||
m->m_page = PAGE_SPIDER;
|
m->m_page = PAGE_SPIDER;
|
||||||
m->m_obj = OBJ_COLL;
|
m->m_obj = OBJ_COLL;
|
||||||
m->m_flags = PF_CLONE;
|
m->m_flags = PF_CLONE | PF_HIDDEN;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_cgi = "apiUrl";
|
m->m_cgi = "apiUrl";
|
||||||
m->m_desc = "Send every spidered url to this url and index "
|
m->m_desc = "Send every spidered url to this url and index "
|
||||||
"the reply in addition to the normal indexing process. "
|
"the reply in addition to the normal indexing process. "
|
||||||
"Example: by specifying http://api.diffbot.com/v2/"
|
"Example: by specifying http://api.diffbot.com/v3/"
|
||||||
"analyze?mode=auto&token=<yourDiffbotToken> here "
|
"analyze?mode=high-precision&token=<yourDiffbotToken> here "
|
||||||
"you can index the structured JSON replies from diffbot for "
|
"you can index the structured JSON replies from diffbot for "
|
||||||
"every url that is spidered. "
|
"every url that is spidered. "
|
||||||
"Gigablast will automatically "
|
"Gigablast will automatically "
|
||||||
@ -18331,12 +18407,13 @@ void Parms::init ( ) {
|
|||||||
/////////////
|
/////////////
|
||||||
|
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
// SECURITY CONTROLS
|
// ROOT PASSWORDS page
|
||||||
///////////////////////////////////////////
|
///////////////////////////////////////////
|
||||||
|
|
||||||
|
|
||||||
m->m_title = "Master Passwords";
|
m->m_title = "Root Passwords";
|
||||||
m->m_desc = "Any matching password will have administrative access "
|
m->m_desc = "Whitespace separated list of passwords. "
|
||||||
|
"Any matching password will have administrative access "
|
||||||
"to Gigablast and all collections.";
|
"to Gigablast and all collections.";
|
||||||
//"If no Admin Password or Admin IP is specified then "
|
//"If no Admin Password or Admin IP is specified then "
|
||||||
//"Gigablast will only allow local IPs to connect to it "
|
//"Gigablast will only allow local IPs to connect to it "
|
||||||
@ -18344,17 +18421,17 @@ void Parms::init ( ) {
|
|||||||
m->m_cgi = "masterpwd";
|
m->m_cgi = "masterpwd";
|
||||||
m->m_xml = "masterPassword";
|
m->m_xml = "masterPassword";
|
||||||
m->m_obj = OBJ_CONF;
|
m->m_obj = OBJ_CONF;
|
||||||
m->m_max = MAX_MASTER_PASSWORDS;
|
|
||||||
m->m_off = (char *)&g_conf.m_masterPwds - g;
|
m->m_off = (char *)&g_conf.m_masterPwds - g;
|
||||||
m->m_type = TYPE_STRINGNONEMPTY;
|
m->m_type = TYPE_SAFEBUF; // STRINGNONEMPTY;
|
||||||
m->m_size = PASSWORD_MAX_LEN+1;
|
m->m_page = PAGE_ROOTPASSWORDS;
|
||||||
m->m_page = PAGE_SECURITY;
|
//m->m_max = MAX_MASTER_PASSWORDS;
|
||||||
m->m_addin = 1; // "insert" follows?
|
//m->m_size = PASSWORD_MAX_LEN+1;
|
||||||
m->m_flags = PF_PRIVATE;
|
//m->m_addin = 1; // "insert" follows?
|
||||||
|
m->m_flags = PF_PRIVATE | PF_TEXTAREA;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
|
|
||||||
m->m_title = "Master IPs";
|
m->m_title = "Root IPs";
|
||||||
//m->m_desc = "Allow UDP requests from this list of IPs. Any datagram "
|
//m->m_desc = "Allow UDP requests from this list of IPs. Any datagram "
|
||||||
// "received not coming from one of these IPs, or an IP in "
|
// "received not coming from one of these IPs, or an IP in "
|
||||||
// "hosts.conf, is dropped. If another cluster is accessing this "
|
// "hosts.conf, is dropped. If another cluster is accessing this "
|
||||||
@ -18364,41 +18441,42 @@ void Parms::init ( ) {
|
|||||||
// "was disabled in the Master Controls. IPs that have 0 has "
|
// "was disabled in the Master Controls. IPs that have 0 has "
|
||||||
// "their Least Significant Byte are treated as wildcards for "
|
// "their Least Significant Byte are treated as wildcards for "
|
||||||
// "IP blocks. That is, 1.2.3.0 means 1.2.3.*.";
|
// "IP blocks. That is, 1.2.3.0 means 1.2.3.*.";
|
||||||
m->m_desc = "Any IPs in this list will have administrative access "
|
m->m_desc = "Whitespace separated list of Ips. "
|
||||||
|
"Any IPs in this list will have administrative access "
|
||||||
"to Gigablast and all collections.";
|
"to Gigablast and all collections.";
|
||||||
m->m_cgi = "masterip";
|
m->m_cgi = "masterip";
|
||||||
m->m_xml = "masterIp";
|
m->m_xml = "masterIp";
|
||||||
m->m_page = PAGE_SECURITY;
|
m->m_page = PAGE_ROOTPASSWORDS;
|
||||||
m->m_max = MAX_CONNECT_IPS;
|
m->m_off = (char *)&g_conf.m_connectIps - g;
|
||||||
m->m_off = (char *)g_conf.m_connectIps - g;
|
m->m_type = TYPE_SAFEBUF;//IP;
|
||||||
m->m_type = TYPE_IP;
|
|
||||||
m->m_priv = 2;
|
|
||||||
m->m_def = "";
|
m->m_def = "";
|
||||||
m->m_addin = 1; // "insert" follows?
|
//m->m_max = MAX_CONNECT_IPS;
|
||||||
|
//m->m_priv = 2;
|
||||||
|
//m->m_addin = 1; // "insert" follows?
|
||||||
//m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
//m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||||
m->m_obj = OBJ_CONF;
|
m->m_obj = OBJ_CONF;
|
||||||
m->m_flags = PF_PRIVATE;
|
m->m_flags = PF_PRIVATE | PF_TEXTAREA;
|
||||||
m++;
|
m++;
|
||||||
|
|
||||||
m->m_title = "remove connect ip";
|
// m->m_title = "remove connect ip";
|
||||||
m->m_desc = "remove a connect ip";
|
// m->m_desc = "remove a connect ip";
|
||||||
m->m_cgi = "removeip";
|
// m->m_cgi = "removeip";
|
||||||
m->m_type = TYPE_CMD;
|
// m->m_type = TYPE_CMD;
|
||||||
m->m_page = PAGE_NONE;
|
// m->m_page = PAGE_NONE;
|
||||||
m->m_func = CommandRemoveConnectIpRow;
|
// m->m_func = CommandRemoveConnectIpRow;
|
||||||
m->m_cast = 1;
|
// m->m_cast = 1;
|
||||||
m->m_obj = OBJ_CONF;
|
// m->m_obj = OBJ_CONF;
|
||||||
m++;
|
// m++;
|
||||||
|
|
||||||
m->m_title = "remove a password";
|
// m->m_title = "remove a password";
|
||||||
m->m_desc = "remove a password";
|
// m->m_desc = "remove a password";
|
||||||
m->m_cgi = "removepwd";
|
// m->m_cgi = "removepwd";
|
||||||
m->m_type = TYPE_CMD;
|
// m->m_type = TYPE_CMD;
|
||||||
m->m_page = PAGE_NONE;
|
// m->m_page = PAGE_NONE;
|
||||||
m->m_func = CommandRemovePasswordRow;
|
// m->m_func = CommandRemovePasswordRow;
|
||||||
m->m_cast = 1;
|
// m->m_cast = 1;
|
||||||
m->m_obj = OBJ_CONF;
|
// m->m_obj = OBJ_CONF;
|
||||||
m++;
|
// m++;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -18414,7 +18492,7 @@ void Parms::init ( ) {
|
|||||||
m->m_perms = PAGE_MASTER;
|
m->m_perms = PAGE_MASTER;
|
||||||
m->m_size = USERS_TEXT_SIZE;
|
m->m_size = USERS_TEXT_SIZE;
|
||||||
m->m_plen = (char *)&g_conf.m_superTurksLen - g;
|
m->m_plen = (char *)&g_conf.m_superTurksLen - g;
|
||||||
m->m_page = PAGE_SECURITY;
|
m->m_page = PAGE_ROOTPASSWORDS;
|
||||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||||
m++;
|
m++;
|
||||||
*/
|
*/
|
||||||
@ -18447,7 +18525,7 @@ void Parms::init ( ) {
|
|||||||
m->m_perms = PAGE_MASTER;
|
m->m_perms = PAGE_MASTER;
|
||||||
m->m_size = USERS_TEXT_SIZE;
|
m->m_size = USERS_TEXT_SIZE;
|
||||||
m->m_plen = (char *)&g_conf.m_usersLen - g;
|
m->m_plen = (char *)&g_conf.m_usersLen - g;
|
||||||
m->m_page = PAGE_SECURITY;
|
m->m_page = PAGE_ROOTPASSWORDS;
|
||||||
m++;
|
m++;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -18469,6 +18547,36 @@ void Parms::init ( ) {
|
|||||||
m++;
|
m++;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
m->m_title = "Collection Passwords";
|
||||||
|
m->m_desc = "Whitespace separated list of passwords. "
|
||||||
|
"Any matching password will have administrative access "
|
||||||
|
"to the controls for just this collection.";
|
||||||
|
m->m_cgi = "collpwd";
|
||||||
|
m->m_xml = "collectionPasswords";
|
||||||
|
m->m_obj = OBJ_COLL;
|
||||||
|
m->m_off = (char *)&cr.m_collectionPasswords - x;
|
||||||
|
m->m_def = "";
|
||||||
|
m->m_type = TYPE_SAFEBUF; // STRINGNONEMPTY;
|
||||||
|
m->m_page = PAGE_BASIC_SECURITY;
|
||||||
|
m->m_flags = PF_PRIVATE | PF_TEXTAREA;
|
||||||
|
m++;
|
||||||
|
|
||||||
|
m->m_title = "Collection Ips";
|
||||||
|
m->m_desc = "Whitespace separated list of IPs. "
|
||||||
|
"Any matching IP will have administrative access "
|
||||||
|
"to the controls for just this collection.";
|
||||||
|
m->m_cgi = "collips";
|
||||||
|
m->m_xml = "collectionIps";
|
||||||
|
m->m_obj = OBJ_COLL;
|
||||||
|
m->m_off = (char *)&cr.m_collectionIps - x;
|
||||||
|
m->m_def = "";
|
||||||
|
m->m_type = TYPE_SAFEBUF; // STRINGNONEMPTY;
|
||||||
|
m->m_page = PAGE_BASIC_SECURITY;
|
||||||
|
m->m_flags = PF_PRIVATE | PF_TEXTAREA;
|
||||||
|
m++;
|
||||||
|
|
||||||
|
|
||||||
//////
|
//////
|
||||||
// END SECURITY CONTROLS
|
// END SECURITY CONTROLS
|
||||||
//////
|
//////
|
||||||
@ -19820,37 +19928,17 @@ bool Parms::convertHttpRequestToParmList (HttpRequest *hr, SafeBuf *parmList,
|
|||||||
// false = useDefaultRec?
|
// false = useDefaultRec?
|
||||||
CollectionRec *cr = g_collectiondb.getRec ( hr , false );
|
CollectionRec *cr = g_collectiondb.getRec ( hr , false );
|
||||||
|
|
||||||
//
|
|
||||||
// CLOUD SEARCH ENGINE SUPPORT
|
|
||||||
//
|
|
||||||
// if not the root admin only all user to change settings, etc.
|
|
||||||
// if the collection rec is a guest collection. i.e. in the cloud.
|
|
||||||
//
|
|
||||||
bool isRootAdmin = g_conf.isRootAdmin(sock,hr);
|
|
||||||
bool isRootColl = false;
|
|
||||||
if ( cr && strcmp(cr->m_coll,"main")==0 ) isRootColl = true;
|
|
||||||
if ( cr && strcmp(cr->m_coll,"dmoz")==0 ) isRootColl = true;
|
|
||||||
if ( cr && strcmp(cr->m_coll,"demo")==0 ) isRootColl = true;
|
|
||||||
// the main,dmoz and demo collections are root admin only
|
|
||||||
if ( ! isRootAdmin && isRootColl ) {
|
|
||||||
g_errno = ENOPERM;
|
|
||||||
return log("parms: root admin can only change main/dmoz/demo"
|
|
||||||
" collections.");
|
|
||||||
}
|
|
||||||
// just knowing the collection name is enough for a cloud user to
|
|
||||||
// modify the collection's parms. however, to modify the master
|
|
||||||
// controls or stuff in g_conf, you have to be root admin.
|
|
||||||
if ( ! g_conf.m_allowCloudUsers && ! isRootAdmin ) {
|
|
||||||
g_errno = ENOPERM;
|
|
||||||
return log("parms: permission denied for user");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//if ( c ) {
|
//if ( c ) {
|
||||||
// cr = g_collectiondb.getRec ( hr );
|
// cr = g_collectiondb.getRec ( hr );
|
||||||
// if ( ! cr ) log("parms: coll not found");
|
// if ( ! cr ) log("parms: coll not found");
|
||||||
//}
|
//}
|
||||||
|
|
||||||
|
bool isRootAdmin = g_conf.isRootAdmin ( sock , hr );
|
||||||
|
|
||||||
|
// does this user have permission to update the parms?
|
||||||
|
bool isCollAdmin = g_conf.isCollAdmin ( sock , hr ) ;
|
||||||
|
|
||||||
|
|
||||||
// might be g_conf specific, not coll specific
|
// might be g_conf specific, not coll specific
|
||||||
//bool hasPerm = false;
|
//bool hasPerm = false;
|
||||||
// just knowing the collection name of a custom crawl means you
|
// just knowing the collection name of a custom crawl means you
|
||||||
@ -19964,6 +20052,9 @@ bool Parms::convertHttpRequestToParmList (HttpRequest *hr, SafeBuf *parmList,
|
|||||||
// skip if not a command parm, like "addcoll"
|
// skip if not a command parm, like "addcoll"
|
||||||
if ( m->m_type != TYPE_CMD ) continue;
|
if ( m->m_type != TYPE_CMD ) continue;
|
||||||
|
|
||||||
|
if ( m->m_obj != OBJ_CONF && m->m_obj != OBJ_COLL )
|
||||||
|
continue;
|
||||||
|
|
||||||
//
|
//
|
||||||
// HACK
|
// HACK
|
||||||
//
|
//
|
||||||
@ -20042,9 +20133,49 @@ bool Parms::convertHttpRequestToParmList (HttpRequest *hr, SafeBuf *parmList,
|
|||||||
//
|
//
|
||||||
// CLOUD SEARCH ENGINE SUPPORT
|
// CLOUD SEARCH ENGINE SUPPORT
|
||||||
//
|
//
|
||||||
|
|
||||||
|
//
|
||||||
|
// if this is the "delcoll" parm then "c" may have been
|
||||||
|
// excluded from http request, therefore isCollAdmin and
|
||||||
|
// isRootAdmin may be false, so see if they have permission
|
||||||
|
// for the "val" collection for this one...
|
||||||
|
bool hasPerm = false;
|
||||||
|
if ( m->m_page == PAGE_DELCOLL &&
|
||||||
|
strcmp(m->m_cgi,"delcoll") == 0 ) {
|
||||||
|
// permission override for /admin/delcoll cmd & parm
|
||||||
|
hasPerm = g_conf.isCollAdminForColl (sock,hr,val);
|
||||||
|
}
|
||||||
|
|
||||||
|
// if this IP c-block as already added a collection then do not
|
||||||
|
// allow it to add another.
|
||||||
|
if ( m->m_page == PAGE_ADDCOLL &&
|
||||||
|
g_conf.m_allowCloudUsers &&
|
||||||
|
! isRootAdmin &&
|
||||||
|
strcmp(m->m_cgi,"addcoll")==0 ) {
|
||||||
|
// see if user's c block has already added a collection
|
||||||
|
long numAdded = 0;
|
||||||
|
if ( numAdded >= 1 ) {
|
||||||
|
g_errno = ENOPERM;
|
||||||
|
log("parms: already added a collection from "
|
||||||
|
"this cloud user's c-block.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
hasPerm = true;
|
||||||
|
}
|
||||||
|
|
||||||
// master controls require root permission
|
// master controls require root permission
|
||||||
if ( m->m_obj == OBJ_CONF && ! isRootAdmin )
|
if ( m->m_obj == OBJ_CONF && ! isRootAdmin ) {
|
||||||
|
log("parms: could not run root parm \"%s\" no perm.",
|
||||||
|
m->m_title);
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// need to have permission for collection for collrec parms
|
||||||
|
if ( m->m_obj == OBJ_COLL && ! isCollAdmin && ! hasPerm ) {
|
||||||
|
log("parms: could not run coll parm \"%s\" no perm.",
|
||||||
|
m->m_title);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// add the cmd parm
|
// add the cmd parm
|
||||||
if ( ! addNewParmToList2 ( parmList ,
|
if ( ! addNewParmToList2 ( parmList ,
|
||||||
@ -20127,35 +20258,6 @@ bool Parms::convertHttpRequestToParmList (HttpRequest *hr, SafeBuf *parmList,
|
|||||||
long occNum;
|
long occNum;
|
||||||
Parm *m = getParmFast1 ( field , &occNum );
|
Parm *m = getParmFast1 ( field , &occNum );
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// CLOUD SEARCH ENGINE SUPPORT
|
|
||||||
//
|
|
||||||
// master controls require root permission. otherwise, just
|
|
||||||
// knowing the collection name is enough for a cloud user
|
|
||||||
// to change settings.
|
|
||||||
//
|
|
||||||
if ( m && m->m_obj == OBJ_CONF && ! isRootAdmin )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
//
|
|
||||||
// CLOUD SEARCH ENGINE SUPPORT
|
|
||||||
//
|
|
||||||
// if this IP c-block as already added a collection then do not
|
|
||||||
// allow it to add another.
|
|
||||||
//
|
|
||||||
if ( m && strcmp(m->m_cgi,"addcoll")==0 && ! isRootAdmin ) {
|
|
||||||
// see if user's c block has already added a collection
|
|
||||||
long numAdded = 0;
|
|
||||||
if ( numAdded >= 1 ) {
|
|
||||||
g_errno = ENOPERM;
|
|
||||||
log("parms: already added a collection from "
|
|
||||||
"this cloud user's c-block.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// map "pause" to spidering enabled
|
// map "pause" to spidering enabled
|
||||||
//
|
//
|
||||||
@ -20168,10 +20270,28 @@ bool Parms::convertHttpRequestToParmList (HttpRequest *hr, SafeBuf *parmList,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if ( ! m ) continue;
|
if ( ! m ) continue;
|
||||||
if ( m->m_type == TYPE_CMD ) continue;
|
|
||||||
|
|
||||||
if ( m->m_obj == OBJ_NONE ) continue;
|
// skip if IS a command parm, like "addcoll", we did that above
|
||||||
if ( m->m_obj == OBJ_SI ) continue;
|
if ( m->m_type == TYPE_CMD )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if ( m->m_obj != OBJ_CONF && m->m_obj != OBJ_COLL )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// CLOUD SEARCH ENGINE SUPPORT
|
||||||
|
//
|
||||||
|
// master controls require root permission. otherwise, just
|
||||||
|
// knowing the collection name is enough for a cloud user
|
||||||
|
// to change settings.
|
||||||
|
//
|
||||||
|
if ( m->m_obj == OBJ_CONF && ! isRootAdmin )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// need to have permission for collection for collrec parms
|
||||||
|
if ( m->m_obj == OBJ_COLL && ! isCollAdmin )
|
||||||
|
continue;
|
||||||
|
|
||||||
// convert spiderRoundStartTime=0 (roundStart=0 roundStart=1)
|
// convert spiderRoundStartTime=0 (roundStart=0 roundStart=1)
|
||||||
// to spiderRoundStartTime=<currenttime>+30secs
|
// to spiderRoundStartTime=<currenttime>+30secs
|
||||||
@ -21283,7 +21403,10 @@ bool Parms::updateParm ( char *rec , WaitEntry *we ) {
|
|||||||
if ( collnum >= 0 ) {
|
if ( collnum >= 0 ) {
|
||||||
cr = g_collectiondb.getRec ( collnum );
|
cr = g_collectiondb.getRec ( collnum );
|
||||||
if ( ! cr ) {
|
if ( ! cr ) {
|
||||||
log("parmdb: invalid collnum for parm");
|
char *ps = "unknown parm";
|
||||||
|
if ( parm ) ps = parm->m_title;
|
||||||
|
log("parmdb: invalid collnum %li for parm \"%s\"",
|
||||||
|
(long)collnum,ps);
|
||||||
g_errno = ENOCOLLREC;
|
g_errno = ENOCOLLREC;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -21389,7 +21512,7 @@ bool Parms::updateParm ( char *rec , WaitEntry *we ) {
|
|||||||
cr->m_regExs[occNum].getLength() == 0 )
|
cr->m_regExs[occNum].getLength() == 0 )
|
||||||
updateCount = false;
|
updateCount = false;
|
||||||
// and for other pages, like master ips, skip if empty!
|
// and for other pages, like master ips, skip if empty!
|
||||||
// PAGE_PASSWORDS, PAGE_SECURITY, ...
|
// PAGE_PASSWORDS, PAGE_ROOTPASSWORDS, ...
|
||||||
if ( parm->m_page != PAGE_FILTERS && ! changed )
|
if ( parm->m_page != PAGE_FILTERS && ! changed )
|
||||||
updateCount = false;
|
updateCount = false;
|
||||||
|
|
||||||
|
11
Parms.h
11
Parms.h
@ -159,6 +159,7 @@ class GigablastRequest {
|
|||||||
long long m_docId;
|
long long m_docId;
|
||||||
long m_strip;
|
long m_strip;
|
||||||
char m_includeHeader;
|
char m_includeHeader;
|
||||||
|
char m_highlightQuery;
|
||||||
|
|
||||||
///////////
|
///////////
|
||||||
//
|
//
|
||||||
@ -345,7 +346,9 @@ class Parms {
|
|||||||
long pd ,
|
long pd ,
|
||||||
bool isCrawlbot ,
|
bool isCrawlbot ,
|
||||||
char format, //bool isJSON,
|
char format, //bool isJSON,
|
||||||
TcpSocket *sock
|
TcpSocket *sock,
|
||||||
|
bool isRootAdmin,
|
||||||
|
bool isCollAdmin
|
||||||
);
|
);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -379,8 +382,10 @@ class Parms {
|
|||||||
long nc ,
|
long nc ,
|
||||||
long pd ,
|
long pd ,
|
||||||
bool lastRow ,
|
bool lastRow ,
|
||||||
bool isCrawlbot = false,
|
bool isCrawlbot ,//= false,
|
||||||
char format = FORMAT_HTML);//bool isJSON = false ) ;
|
char format , //= FORMAT_HTML,
|
||||||
|
bool isRootAdmin ,
|
||||||
|
bool isCollAdmin );
|
||||||
|
|
||||||
char *getTHIS ( HttpRequest *r , long page );
|
char *getTHIS ( HttpRequest *r , long page );
|
||||||
|
|
||||||
|
@ -4156,8 +4156,8 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
|||||||
qti->m_wikiPhraseId = qw->m_wikiPhraseId;
|
qti->m_wikiPhraseId = qw->m_wikiPhraseId;
|
||||||
qti->m_quotedStartId = qw->m_quoteStart;
|
qti->m_quotedStartId = qw->m_quoteStart;
|
||||||
// is it gbsortby:?
|
// is it gbsortby:?
|
||||||
if ( qt->m_fieldCode == FIELD_GBSORTBY ||
|
if ( qt->m_fieldCode == FIELD_GBSORTBYFLOAT ||
|
||||||
qt->m_fieldCode == FIELD_GBREVSORTBY )
|
qt->m_fieldCode == FIELD_GBREVSORTBYFLOAT )
|
||||||
m_sortByTermNum = i;
|
m_sortByTermNum = i;
|
||||||
|
|
||||||
if ( qt->m_fieldCode == FIELD_GBSORTBYINT ||
|
if ( qt->m_fieldCode == FIELD_GBSORTBYINT ||
|
||||||
@ -4314,9 +4314,9 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
|||||||
|
|
||||||
// numeric posdb termlist flags. instead of word position
|
// numeric posdb termlist flags. instead of word position
|
||||||
// they have a float stored there for sorting etc.
|
// they have a float stored there for sorting etc.
|
||||||
if (qt->m_fieldCode == FIELD_GBSORTBY )
|
if (qt->m_fieldCode == FIELD_GBSORTBYFLOAT )
|
||||||
qti->m_bigramFlags[nn]|=BF_NUMBER;
|
qti->m_bigramFlags[nn]|=BF_NUMBER;
|
||||||
if (qt->m_fieldCode == FIELD_GBREVSORTBY )
|
if (qt->m_fieldCode == FIELD_GBREVSORTBYFLOAT )
|
||||||
qti->m_bigramFlags[nn]|=BF_NUMBER;
|
qti->m_bigramFlags[nn]|=BF_NUMBER;
|
||||||
if (qt->m_fieldCode == FIELD_GBNUMBERMIN )
|
if (qt->m_fieldCode == FIELD_GBNUMBERMIN )
|
||||||
qti->m_bigramFlags[nn]|=BF_NUMBER;
|
qti->m_bigramFlags[nn]|=BF_NUMBER;
|
||||||
|
2
Posdb.h
2
Posdb.h
@ -33,7 +33,7 @@
|
|||||||
// so we do not need to repeat the same link text over and over again.
|
// so we do not need to repeat the same link text over and over again.
|
||||||
// Use M bits to hold # of inlinks the page has for other terms.
|
// Use M bits to hold # of inlinks the page has for other terms.
|
||||||
|
|
||||||
// NOTE: for inlinktext terms the pattern rank is the siterank of the
|
// NOTE: for inlinktext terms the spam rank is the siterank of the
|
||||||
// inlinker!
|
// inlinker!
|
||||||
|
|
||||||
// NOTE: densityrank for title is based on # of title words only. same goes
|
// NOTE: densityrank for title is based on # of title words only. same goes
|
||||||
|
29
Query.cpp
29
Query.cpp
@ -2305,6 +2305,10 @@ bool Query::setQWords ( char boolFlag ,
|
|||||||
if ( fieldCode == FIELD_GBNUMBEREQUALFLOAT )
|
if ( fieldCode == FIELD_GBNUMBEREQUALFLOAT )
|
||||||
ph = hash64 ("gbsortby", 8);
|
ph = hash64 ("gbsortby", 8);
|
||||||
|
|
||||||
|
// fix for gbsortbyfloat:product.price
|
||||||
|
if ( fieldCode == FIELD_GBSORTBYFLOAT )
|
||||||
|
ph = hash64 ("gbsortby", 8);
|
||||||
|
|
||||||
if ( fieldCode == FIELD_GBNUMBERMININT )
|
if ( fieldCode == FIELD_GBNUMBERMININT )
|
||||||
ph = hash64 ("gbsortbyint", 11);
|
ph = hash64 ("gbsortbyint", 11);
|
||||||
if ( fieldCode == FIELD_GBNUMBERMAXINT )
|
if ( fieldCode == FIELD_GBNUMBERMAXINT )
|
||||||
@ -2346,8 +2350,8 @@ bool Query::setQWords ( char boolFlag ,
|
|||||||
fieldCode == FIELD_IP ||
|
fieldCode == FIELD_IP ||
|
||||||
fieldCode == FIELD_ISCLEAN ||
|
fieldCode == FIELD_ISCLEAN ||
|
||||||
fieldCode == FIELD_QUOTA ||
|
fieldCode == FIELD_QUOTA ||
|
||||||
fieldCode == FIELD_GBSORTBY ||
|
fieldCode == FIELD_GBSORTBYFLOAT ||
|
||||||
fieldCode == FIELD_GBREVSORTBY ||
|
fieldCode == FIELD_GBREVSORTBYFLOAT ||
|
||||||
// gbmin:price:1.23
|
// gbmin:price:1.23
|
||||||
fieldCode == FIELD_GBNUMBERMIN ||
|
fieldCode == FIELD_GBNUMBERMIN ||
|
||||||
fieldCode == FIELD_GBNUMBERMAX ||
|
fieldCode == FIELD_GBNUMBERMAX ||
|
||||||
@ -2489,8 +2493,8 @@ bool Query::setQWords ( char boolFlag ,
|
|||||||
// i've decided not to make
|
// i've decided not to make
|
||||||
// gbsortby:products.offerPrice
|
// gbsortby:products.offerPrice
|
||||||
// gbmin:price:1.23 case insensitive
|
// gbmin:price:1.23 case insensitive
|
||||||
if ( fieldCode == FIELD_GBSORTBY ||
|
if ( fieldCode == FIELD_GBSORTBYFLOAT ||
|
||||||
fieldCode == FIELD_GBREVSORTBY ||
|
fieldCode == FIELD_GBREVSORTBYFLOAT ||
|
||||||
fieldCode == FIELD_GBSORTBYINT ||
|
fieldCode == FIELD_GBSORTBYINT ||
|
||||||
fieldCode == FIELD_GBREVSORTBYINT ) {
|
fieldCode == FIELD_GBREVSORTBYINT ) {
|
||||||
wid = hash64Lower_utf8 ( w , wlen , 0LL );
|
wid = hash64Lower_utf8 ( w , wlen , 0LL );
|
||||||
@ -3652,8 +3656,11 @@ struct QueryField g_fields[] = {
|
|||||||
"gblang:de",
|
"gblang:de",
|
||||||
"Matches all documents in german. "
|
"Matches all documents in german. "
|
||||||
"The supported language abbreviations "
|
"The supported language abbreviations "
|
||||||
"are at the bottom of the <i>url filters</i> page. Some more "
|
"are at the bottom of the <a href=/admin/filters>url filters</a> "
|
||||||
"common ones are <i>en, es, fr, zh_cn</i>.",
|
"page. Some more "
|
||||||
|
"common ones are <i>gblang:en, gblang:es, gblang:fr, "
|
||||||
|
// need quotes for this one!!
|
||||||
|
"gblang:\"zh_cn\"</i> (note the quotes for zh_cn!).",
|
||||||
NULL,
|
NULL,
|
||||||
0},
|
0},
|
||||||
|
|
||||||
@ -3751,7 +3758,7 @@ struct QueryField g_fields[] = {
|
|||||||
|
|
||||||
|
|
||||||
{"gbsortbyfloat",
|
{"gbsortbyfloat",
|
||||||
FIELD_GBSORTBY,
|
FIELD_GBSORTBYFLOAT,
|
||||||
false,
|
false,
|
||||||
"cameras gbsortbyfloat:price",
|
"cameras gbsortbyfloat:price",
|
||||||
"Sort all documents that "
|
"Sort all documents that "
|
||||||
@ -3762,7 +3769,7 @@ struct QueryField g_fields[] = {
|
|||||||
|
|
||||||
|
|
||||||
{"gbsortbyfloat",
|
{"gbsortbyfloat",
|
||||||
FIELD_GBSORTBY,
|
FIELD_GBSORTBYFLOAT,
|
||||||
false,
|
false,
|
||||||
"cameras gbsortbyfloat:product.price",
|
"cameras gbsortbyfloat:product.price",
|
||||||
"Sort all documents that "
|
"Sort all documents that "
|
||||||
@ -3777,7 +3784,7 @@ struct QueryField g_fields[] = {
|
|||||||
|
|
||||||
|
|
||||||
{"gbrevsortbyfloat",
|
{"gbrevsortbyfloat",
|
||||||
FIELD_GBREVSORTBY,
|
FIELD_GBREVSORTBYFLOAT,
|
||||||
false,
|
false,
|
||||||
"cameras gbrevsortbyfloat:product.price",
|
"cameras gbrevsortbyfloat:product.price",
|
||||||
"Like above example but sorted with highest prices on top.",
|
"Like above example but sorted with highest prices on top.",
|
||||||
@ -3786,7 +3793,7 @@ struct QueryField g_fields[] = {
|
|||||||
|
|
||||||
|
|
||||||
{"gbsortby",
|
{"gbsortby",
|
||||||
FIELD_GBSORTBY,
|
FIELD_GBSORTBYFLOAT,
|
||||||
false,
|
false,
|
||||||
"dog gbsortbyint:gbspiderdate",
|
"dog gbsortbyint:gbspiderdate",
|
||||||
"Sort the documents that contain 'dog' by "
|
"Sort the documents that contain 'dog' by "
|
||||||
@ -3796,7 +3803,7 @@ struct QueryField g_fields[] = {
|
|||||||
QTF_HIDE},
|
QTF_HIDE},
|
||||||
|
|
||||||
{"gbrevsortby",
|
{"gbrevsortby",
|
||||||
FIELD_GBREVSORTBY,
|
FIELD_GBREVSORTBYFLOAT,
|
||||||
false,
|
false,
|
||||||
"dog gbrevsortbyint:gbspiderdate",
|
"dog gbrevsortbyint:gbspiderdate",
|
||||||
"Sort the documents that contain 'dog' by "
|
"Sort the documents that contain 'dog' by "
|
||||||
|
4
Query.h
4
Query.h
@ -111,8 +111,8 @@ typedef unsigned long long qvec_t;
|
|||||||
#define FIELD_GBSECTIONHASH 51
|
#define FIELD_GBSECTIONHASH 51
|
||||||
#define FIELD_GBDOCID 52
|
#define FIELD_GBDOCID 52
|
||||||
#define FIELD_GBCONTENTHASH 53 // for deduping at spider time
|
#define FIELD_GBCONTENTHASH 53 // for deduping at spider time
|
||||||
#define FIELD_GBSORTBY 54 // i.e. sortby:price -> numeric termlist
|
#define FIELD_GBSORTBYFLOAT 54 // i.e. sortby:price -> numeric termlist
|
||||||
#define FIELD_GBREVSORTBY 55 // i.e. sortby:price -> low to high
|
#define FIELD_GBREVSORTBYFLOAT 55 // i.e. sortby:price -> low to high
|
||||||
#define FIELD_GBNUMBERMIN 56
|
#define FIELD_GBNUMBERMIN 56
|
||||||
#define FIELD_GBNUMBERMAX 57
|
#define FIELD_GBNUMBERMAX 57
|
||||||
#define FIELD_GBPARENTURL 58
|
#define FIELD_GBPARENTURL 58
|
||||||
|
168
SearchInput.cpp
168
SearchInput.cpp
@ -360,6 +360,8 @@ bool SearchInput::set ( TcpSocket *sock , HttpRequest *r ) { //, Query *q ) {
|
|||||||
// set m_isRootAdmin to zero if no correct ip or password
|
// set m_isRootAdmin to zero if no correct ip or password
|
||||||
if ( ! g_conf.isRootAdmin ( sock , &m_hr ) ) m_isRootAdmin = 0;
|
if ( ! g_conf.isRootAdmin ( sock , &m_hr ) ) m_isRootAdmin = 0;
|
||||||
|
|
||||||
|
// collection admin?
|
||||||
|
m_isCollAdmin = g_conf.isCollAdmin ( sock , &m_hr );
|
||||||
|
|
||||||
//////////////////////////////////////
|
//////////////////////////////////////
|
||||||
//
|
//
|
||||||
@ -641,6 +643,42 @@ bool SearchInput::setQueryBuffers ( HttpRequest *hr ) {
|
|||||||
m_sbuf1.safePrintf( "%s", qp );
|
m_sbuf1.safePrintf( "%s", qp );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// boolean OR terms
|
||||||
|
bool boolq = false;
|
||||||
|
char *any = hr->getString("any",NULL);
|
||||||
|
bool first = true;
|
||||||
|
if ( any ) {
|
||||||
|
char *s = any;
|
||||||
|
char *send = any + gbstrlen(any);
|
||||||
|
if ( m_sbuf1.length() ) m_sbuf1.pushChar(' ');
|
||||||
|
if ( m_sbuf2.length() ) m_sbuf2.pushChar(' ');
|
||||||
|
while (s < send) {
|
||||||
|
while (isspace(*s) && s < send) s++;
|
||||||
|
char *s2 = s+1;
|
||||||
|
if (*s == '\"') {
|
||||||
|
// if there's no closing quote just treat
|
||||||
|
// the end of the line as such
|
||||||
|
while (*s2 != '\"' && s2 < send) s2++;
|
||||||
|
if (s2 < send) s2++;
|
||||||
|
} else {
|
||||||
|
while (!isspace(*s2) && s2 < send) s2++;
|
||||||
|
}
|
||||||
|
if ( first ) m_sbuf1.safeStrcpy("(");
|
||||||
|
if ( first ) m_sbuf2.safeStrcpy("(");
|
||||||
|
if ( ! first ) m_sbuf1.safeStrcpy(" OR ");
|
||||||
|
if ( ! first ) m_sbuf2.safeStrcpy(" OR ");
|
||||||
|
first = false;
|
||||||
|
m_sbuf1.safeMemcpy ( s , s2 - s );
|
||||||
|
m_sbuf2.safeMemcpy ( s , s2 - s );
|
||||||
|
s = s2 + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( ! first ) m_sbuf1.safeStrcpy(") AND ");
|
||||||
|
if ( ! first ) m_sbuf2.safeStrcpy(") AND ");
|
||||||
|
if ( ! first ) boolq = true;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// and this
|
// and this
|
||||||
if ( m_secsBack > 0 ) {
|
if ( m_secsBack > 0 ) {
|
||||||
long timestamp = getTimeGlobalNoCore();
|
long timestamp = getTimeGlobalNoCore();
|
||||||
@ -694,36 +732,65 @@ bool SearchInput::setQueryBuffers ( HttpRequest *hr ) {
|
|||||||
// }
|
// }
|
||||||
|
|
||||||
if ( m_familyFilter ) {
|
if ( m_familyFilter ) {
|
||||||
if ( m_sbuf1.length() ) m_sbuf1.pushChar(' ');
|
if ( m_sbuf1.length() ) m_sbuf1.pushChar(' ');
|
||||||
m_sbuf1.safePrintf("gbisadult:0 | ");
|
//if ( m_sbuf2.length() ) m_sbuf2.pushChar(' ');
|
||||||
|
m_sbuf1.safePrintf( "+gbisadult:0");
|
||||||
|
//m_sbuf2.safePrintf( "+gbisadult:0");
|
||||||
|
if ( ! boolq ) {
|
||||||
|
m_sbuf1.safeStrcpy(" |");
|
||||||
|
//m_sbuf2.safeStrcpy(" |");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
m_sbuf1.safeStrcpy(" AND ");
|
||||||
|
//m_sbuf2.safeStrcpy(" AND ");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// PRE-pend gblang: term
|
||||||
|
long gblang = hr->getLong("gblang",-1);
|
||||||
|
if( gblang >= 0 ) {
|
||||||
|
if ( m_sbuf1.length() ) m_sbuf1.pushChar(' ');
|
||||||
|
if ( m_sbuf2.length() ) m_sbuf2.pushChar(' ');
|
||||||
|
m_sbuf1.safePrintf( "+gblang:%li", gblang );
|
||||||
|
m_sbuf2.safePrintf( "+gblang:%li", gblang );
|
||||||
|
if ( ! boolq ) {
|
||||||
|
m_sbuf1.safeStrcpy(" |");
|
||||||
|
m_sbuf2.safeStrcpy(" |");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
m_sbuf1.safeStrcpy(" AND ");
|
||||||
|
m_sbuf2.safeStrcpy(" AND ");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// append gblang: term
|
|
||||||
// if( m_gblang > 0 ) {
|
|
||||||
// //if( p > pstart ) *p++ = ' ';
|
|
||||||
// if ( m_sbuf1.length() ) m_sbuf1.pushChar(' ');
|
|
||||||
// //p += sprintf( p, "+gblang:%li |", m_gblang );
|
|
||||||
// m_sbuf1.safePrintf( "+gblang:%li |", m_gblang );
|
|
||||||
// }
|
|
||||||
// bookmark here so we can copy into st->m_displayQuery below
|
// bookmark here so we can copy into st->m_displayQuery below
|
||||||
//long displayQueryOffset = m_sbuf1.length();
|
//long displayQueryOffset = m_sbuf1.length();
|
||||||
// append url: term
|
// append url: term
|
||||||
if ( m_url && m_url[0] ) {
|
// if ( m_url && m_url[0] ) {
|
||||||
//if ( p > pstart ) *p++ = ' ';
|
// //if ( p > pstart ) *p++ = ' ';
|
||||||
if ( m_sbuf1.length() ) m_sbuf1.pushChar(' ');
|
// if ( m_sbuf1.length() ) m_sbuf1.pushChar(' ');
|
||||||
//memcpy ( p , "+url:" , 5 ); p += 5;
|
// //memcpy ( p , "+url:" , 5 ); p += 5;
|
||||||
m_sbuf1.safeStrcpy ( "+url:");
|
// m_sbuf1.safeStrcpy ( "+url:");
|
||||||
//memcpy ( p , m_url , m_urlLen ); p += m_urlLen;
|
// //memcpy ( p , m_url , m_urlLen ); p += m_urlLen;
|
||||||
m_sbuf1.safeStrcpy ( m_url );
|
// m_sbuf1.safeStrcpy ( m_url );
|
||||||
}
|
// }
|
||||||
// append url: term
|
// append url: term
|
||||||
if ( m_link && m_link[0] ) {
|
if ( m_link && m_link[0] ) {
|
||||||
//if ( p > pstart ) *p++ = ' ';
|
if ( m_sbuf1.length() ) m_sbuf1.pushChar(' ');
|
||||||
if ( m_sbuf1.length() ) m_sbuf1.pushChar(' ');
|
if ( m_sbuf2.length() ) m_sbuf2.pushChar(' ');
|
||||||
//memcpy ( p , "+link:" , 6 ); p += 6;
|
|
||||||
m_sbuf1.safeStrcpy ( "+link:");
|
m_sbuf1.safeStrcpy ( "+link:");
|
||||||
//memcpy ( p , m_link , m_linkLen ); p += m_linkLen;
|
m_sbuf2.safeStrcpy ( "+link:");
|
||||||
m_sbuf1.safeStrcpy ( m_link );
|
m_sbuf1.safeStrcpy ( m_link );
|
||||||
|
m_sbuf2.safeStrcpy ( m_link );
|
||||||
|
if ( ! boolq ) {
|
||||||
|
m_sbuf1.safeStrcpy(" |");
|
||||||
|
m_sbuf2.safeStrcpy(" |");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
m_sbuf1.safeStrcpy(" AND ");
|
||||||
|
m_sbuf2.safeStrcpy(" AND ");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// append the natural query
|
// append the natural query
|
||||||
if ( m_query && m_query[0] ) {
|
if ( m_query && m_query[0] ) {
|
||||||
@ -757,7 +824,14 @@ bool SearchInput::setQueryBuffers ( HttpRequest *hr ) {
|
|||||||
if ( m_sbuf1.length() ) m_sbuf1.pushChar(' ');
|
if ( m_sbuf1.length() ) m_sbuf1.pushChar(' ');
|
||||||
//*p++ = '+';
|
//*p++ = '+';
|
||||||
//*p++ = '\"';
|
//*p++ = '\"';
|
||||||
m_sbuf1.safeStrcpy("+\"");
|
if ( ! boolq ) {
|
||||||
|
m_sbuf1.safeStrcpy(" +\"");
|
||||||
|
m_sbuf2.safeStrcpy(" +\"");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
m_sbuf1.safeStrcpy(" AND \"");
|
||||||
|
m_sbuf2.safeStrcpy(" AND \"");
|
||||||
|
}
|
||||||
//p += ucToUtf8(p, pend-p, m_quote1, m_quoteLen1, csStr, 0,0);
|
//p += ucToUtf8(p, pend-p, m_quote1, m_quoteLen1, csStr, 0,0);
|
||||||
m_sbuf1.safeStrcpy ( m_quote1 );
|
m_sbuf1.safeStrcpy ( m_quote1 );
|
||||||
//memcpy ( p , m_quote1 , m_quoteLen1 ); p += m_quoteLen1 ;
|
//memcpy ( p , m_quote1 , m_quoteLen1 ); p += m_quoteLen1 ;
|
||||||
@ -768,7 +842,6 @@ bool SearchInput::setQueryBuffers ( HttpRequest *hr ) {
|
|||||||
if ( m_sbuf2.length() ) m_sbuf2.pushChar(' ');
|
if ( m_sbuf2.length() ) m_sbuf2.pushChar(' ');
|
||||||
//*p2++ = '+';
|
//*p2++ = '+';
|
||||||
//*p2++ = '\"';
|
//*p2++ = '\"';
|
||||||
m_sbuf2.safeStrcpy("+\"");
|
|
||||||
//p2+=ucToUtf8(p2, pend2-p2, m_quote1, m_quoteLen1, csStr,0,0);
|
//p2+=ucToUtf8(p2, pend2-p2, m_quote1, m_quoteLen1, csStr,0,0);
|
||||||
m_sbuf2.safeStrcpy ( m_quote1 );
|
m_sbuf2.safeStrcpy ( m_quote1 );
|
||||||
//memcpy ( p2 , m_quote1 , m_quoteLen1 ); p2 += m_quoteLen1 ;
|
//memcpy ( p2 , m_quote1 , m_quoteLen1 ); p2 += m_quoteLen1 ;
|
||||||
@ -785,7 +858,17 @@ bool SearchInput::setQueryBuffers ( HttpRequest *hr ) {
|
|||||||
if ( m_sbuf1.length() ) m_sbuf1.pushChar(' ');
|
if ( m_sbuf1.length() ) m_sbuf1.pushChar(' ');
|
||||||
//*p++ = '+';
|
//*p++ = '+';
|
||||||
//*p++ = '\"';
|
//*p++ = '\"';
|
||||||
m_sbuf1.safeStrcpy("+\"");
|
|
||||||
|
if ( ! boolq ) {
|
||||||
|
m_sbuf1.safeStrcpy(" +\"");
|
||||||
|
m_sbuf2.safeStrcpy(" +\"");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
m_sbuf1.safeStrcpy(" AND \"");
|
||||||
|
m_sbuf2.safeStrcpy(" AND \"");
|
||||||
|
}
|
||||||
|
|
||||||
|
//m_sbuf1.safeStrcpy("+\"");
|
||||||
//p += ucToUtf8(p, pend-p, m_quote2, m_quoteLen2, csStr, 0,0);
|
//p += ucToUtf8(p, pend-p, m_quote2, m_quoteLen2, csStr, 0,0);
|
||||||
m_sbuf1.safeStrcpy ( m_quote2 );
|
m_sbuf1.safeStrcpy ( m_quote2 );
|
||||||
//memcpy ( p , m_quote2 , m_quoteLen2 ); p += m_quoteLen2 ;
|
//memcpy ( p , m_quote2 , m_quoteLen2 ); p += m_quoteLen2 ;
|
||||||
@ -796,7 +879,7 @@ bool SearchInput::setQueryBuffers ( HttpRequest *hr ) {
|
|||||||
if ( m_sbuf2.length() ) m_sbuf2.pushChar(' ');
|
if ( m_sbuf2.length() ) m_sbuf2.pushChar(' ');
|
||||||
//*p2++ = '+';
|
//*p2++ = '+';
|
||||||
//*p2++ = '\"';
|
//*p2++ = '\"';
|
||||||
m_sbuf2.safeStrcpy("+\"");
|
//m_sbuf2.safeStrcpy("+\"");
|
||||||
//p2+=ucToUtf8(p2, pend2-p2, m_quote2, m_quoteLen2, csStr,0,0);
|
//p2+=ucToUtf8(p2, pend2-p2, m_quote2, m_quoteLen2, csStr,0,0);
|
||||||
m_sbuf2.safeStrcpy ( m_quote2 );
|
m_sbuf2.safeStrcpy ( m_quote2 );
|
||||||
//memcpy ( p2 , m_quote2 , m_quoteLen2 ); p2 += m_quoteLen2 ;
|
//memcpy ( p2 , m_quote2 , m_quoteLen2 ); p2 += m_quoteLen2 ;
|
||||||
@ -828,11 +911,20 @@ bool SearchInput::setQueryBuffers ( HttpRequest *hr ) {
|
|||||||
} else {
|
} else {
|
||||||
while (!isspace(*s2) && s2 < send) s2++;
|
while (!isspace(*s2) && s2 < send) s2++;
|
||||||
}
|
}
|
||||||
if (s2 < send) break;
|
//if (s2 < send) break;
|
||||||
//if (p < pend) *p++ = '+';
|
//if (p < pend) *p++ = '+';
|
||||||
//if (p2 < pend2) *p2++ = '+';
|
//if (p2 < pend2) *p2++ = '+';
|
||||||
m_sbuf1.pushChar('+');
|
//m_sbuf1.pushChar('+');
|
||||||
m_sbuf2.pushChar('+');
|
//m_sbuf2.pushChar('+');
|
||||||
|
if ( ! boolq ) {
|
||||||
|
m_sbuf1.safeStrcpy("+");
|
||||||
|
m_sbuf2.safeStrcpy("+");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
m_sbuf1.safeStrcpy(" AND ");
|
||||||
|
m_sbuf2.safeStrcpy(" AND ");
|
||||||
|
}
|
||||||
|
|
||||||
//p += ucToUtf8(p, pend-p, s, s2-s, csStr, 0,0);
|
//p += ucToUtf8(p, pend-p, s, s2-s, csStr, 0,0);
|
||||||
//p2 += ucToUtf8(p2, pend2-p2, s, s2-s, csStr, 0,0);
|
//p2 += ucToUtf8(p2, pend2-p2, s, s2-s, csStr, 0,0);
|
||||||
m_sbuf1.safeMemcpy ( s , s2 - s );
|
m_sbuf1.safeMemcpy ( s , s2 - s );
|
||||||
@ -882,8 +974,18 @@ bool SearchInput::setQueryBuffers ( HttpRequest *hr ) {
|
|||||||
if (s2 < send) break;
|
if (s2 < send) break;
|
||||||
//if (p < pend) *p++ = '-';
|
//if (p < pend) *p++ = '-';
|
||||||
//if (p2 < pend2) *p2++ = '-';
|
//if (p2 < pend2) *p2++ = '-';
|
||||||
m_sbuf1.pushChar('-');
|
// m_sbuf1.pushChar('-');
|
||||||
m_sbuf2.pushChar('-');
|
// m_sbuf2.pushChar('-');
|
||||||
|
|
||||||
|
if ( ! boolq ) {
|
||||||
|
m_sbuf1.safeStrcpy("-");
|
||||||
|
m_sbuf2.safeStrcpy("-");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
m_sbuf1.safeStrcpy(" AND NOT ");
|
||||||
|
m_sbuf2.safeStrcpy(" AND NOT ");
|
||||||
|
}
|
||||||
|
|
||||||
//p += ucToUtf8(p, pend-p, s, s2-s, csStr, 0,0);
|
//p += ucToUtf8(p, pend-p, s, s2-s, csStr, 0,0);
|
||||||
//p2 += ucToUtf8(p2, pend2-p2, s, s2-s, csStr, 0,0);
|
//p2 += ucToUtf8(p2, pend2-p2, s, s2-s, csStr, 0,0);
|
||||||
m_sbuf1.safeMemcpy ( s , s2 - s );
|
m_sbuf1.safeMemcpy ( s , s2 - s );
|
||||||
@ -923,9 +1025,9 @@ bool SearchInput::setQueryBuffers ( HttpRequest *hr ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// null terms
|
// null terms
|
||||||
if ( ! m_sbuf1.pushChar('\0') ) return false;
|
if ( ! m_sbuf1.nullTerm() ) return false;
|
||||||
if ( ! m_sbuf2.pushChar('\0') ) return false;
|
if ( ! m_sbuf2.nullTerm() ) return false;
|
||||||
if ( ! m_sbuf3.pushChar('\0') ) return false;
|
if ( ! m_sbuf3.nullTerm() ) return false;
|
||||||
|
|
||||||
// the natural query
|
// the natural query
|
||||||
m_displayQuery = m_sbuf2.getBufStart();// + displayQueryOffset;
|
m_displayQuery = m_sbuf2.getBufStart();// + displayQueryOffset;
|
||||||
|
@ -118,7 +118,7 @@ class SearchInput {
|
|||||||
Query *m_q2;
|
Query *m_q2;
|
||||||
|
|
||||||
char m_isRootAdmin;
|
char m_isRootAdmin;
|
||||||
|
char m_isCollAdmin;
|
||||||
|
|
||||||
// these are set from things above
|
// these are set from things above
|
||||||
TopicGroup m_topicGroups [ MAX_TOPIC_GROUPS ];// msg40
|
TopicGroup m_topicGroups [ MAX_TOPIC_GROUPS ];// msg40
|
||||||
|
12
Spider.cpp
12
Spider.cpp
@ -12862,6 +12862,18 @@ bool getSpiderStatusMsg ( CollectionRec *cx , SafeBuf *msg , long *status ) {
|
|||||||
"adding new urls, or wait for "
|
"adding new urls, or wait for "
|
||||||
"existing urls to be respidered.");
|
"existing urls to be respidered.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// let's pass the qareindex() test in qa.cpp... it wasn't updating
|
||||||
|
// the status to done. it kept saying in progress.
|
||||||
|
if ( ! cx->m_isCustomCrawl &&
|
||||||
|
! cx->m_globalCrawlInfo.m_hasUrlsReadyToSpider ) {
|
||||||
|
//*status = SP_COMPLETED;
|
||||||
|
return msg->safePrintf ( "Nothing currently "
|
||||||
|
"available to spider. "
|
||||||
|
"Change your url filters, try "
|
||||||
|
"adding new urls, or wait for "
|
||||||
|
"existing urls to be respidered.");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if ( cx->m_spiderStatus == SP_ROUNDDONE ) {
|
if ( cx->m_spiderStatus == SP_ROUNDDONE ) {
|
||||||
|
@ -337,9 +337,9 @@ bool printSpiderProxyTable ( SafeBuf *sb ) {
|
|||||||
// we fetch a test url every minute or so through
|
// we fetch a test url every minute or so through
|
||||||
// each proxy to ensure it is up. typically this should
|
// each proxy to ensure it is up. typically this should
|
||||||
// be your website so you do not make someone angry.
|
// be your website so you do not make someone angry.
|
||||||
"<td><b>test url last download</b></td>"
|
"<td><b>test url last download attempt</b></td>"
|
||||||
// print "FAILED" in red if it failed to download
|
// print "FAILED" in red if it failed to download
|
||||||
"<td><b>test url download time</b></td>"
|
"<td><b>test url download took</b></td>"
|
||||||
|
|
||||||
"<td><b>last bytes downloaded</b></td>"
|
"<td><b>last bytes downloaded</b></td>"
|
||||||
|
|
||||||
@ -505,6 +505,9 @@ bool downloadTestUrlFromProxies ( ) {
|
|||||||
// only host #0 should do the testing i guess
|
// only host #0 should do the testing i guess
|
||||||
//if ( g_hostdb.m_myHost->m_hostId != 0 ) return true;
|
//if ( g_hostdb.m_myHost->m_hostId != 0 ) return true;
|
||||||
|
|
||||||
|
// no need if no url
|
||||||
|
if ( g_conf.m_proxyTestUrl.length() <= 1 ) return true;
|
||||||
|
|
||||||
// if host #0 dies then host #1 must take its place managing the
|
// if host #0 dies then host #1 must take its place managing the
|
||||||
// spider proxies
|
// spider proxies
|
||||||
Host *h0 = g_hostdb.getFirstAliveHost();
|
Host *h0 = g_hostdb.getFirstAliveHost();
|
||||||
@ -706,8 +709,11 @@ void handleRequest54 ( UdpSlot *udpSlot , long niceness ) {
|
|||||||
goto redo;
|
goto redo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// reset minCount so we can take the min over those we check here
|
||||||
|
minCount = -1;
|
||||||
long long oldest = 0x7fffffffffffffffLL;
|
long long oldest = 0x7fffffffffffffffLL;
|
||||||
SpiderProxy *winnersp = NULL;
|
SpiderProxy *winnersp = NULL;
|
||||||
|
long count = 0;
|
||||||
// now find the best proxy wih the minCount
|
// now find the best proxy wih the minCount
|
||||||
for ( long i = 0 ; i < s_iptab.getNumSlots() ; i++ ) {
|
for ( long i = 0 ; i < s_iptab.getNumSlots() ; i++ ) {
|
||||||
// skip empty slots
|
// skip empty slots
|
||||||
@ -716,12 +722,7 @@ void handleRequest54 ( UdpSlot *udpSlot , long niceness ) {
|
|||||||
SpiderProxy *sp = (SpiderProxy *)s_iptab.getValueFromSlot(i);
|
SpiderProxy *sp = (SpiderProxy *)s_iptab.getValueFromSlot(i);
|
||||||
// if it failed the last test, skip it... not here...
|
// if it failed the last test, skip it... not here...
|
||||||
if ( skipDead && sp->m_lastDownloadError ) continue;
|
if ( skipDead && sp->m_lastDownloadError ) continue;
|
||||||
// if all hosts were "dead" because they all had
|
|
||||||
// m_lastDownloadError set then minCount will be 999999
|
|
||||||
// and nobody should continue from this statement:
|
|
||||||
if ( sp->m_countForThisIp > minCount ) continue;
|
|
||||||
// then go by last download time for this ip
|
|
||||||
if ( sp->m_lastTimeUsedForThisIp >= oldest ) continue;
|
|
||||||
// if this proxy was banned by the url's ip... skip it. it is
|
// if this proxy was banned by the url's ip... skip it. it is
|
||||||
// not a candidate...
|
// not a candidate...
|
||||||
if ( skipDead ) {
|
if ( skipDead ) {
|
||||||
@ -730,8 +731,34 @@ void handleRequest54 ( UdpSlot *udpSlot , long niceness ) {
|
|||||||
long long h64 = hash64h ( uip , pip );
|
long long h64 = hash64h ( uip , pip );
|
||||||
if ( s_proxyBannedTable.isInTable ( &h64 ) ) continue;
|
if ( s_proxyBannedTable.isInTable ( &h64 ) ) continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if some proxies are "alive" then only pick from
|
||||||
|
// the first half of the proxies that are alive (i.e. still
|
||||||
|
// work). that way, when one of those goes dead we will inc
|
||||||
|
// the backoff (crawldelay) and a new proxy that we haven't
|
||||||
|
// used for this url's IP will take it's place. and such
|
||||||
|
// new proxies will only have the new backoff count used
|
||||||
|
// through them. that way, we don't get ALL of our proxies
|
||||||
|
// banned at about the same time since we do somewhat uniform
|
||||||
|
// load balancing over them.
|
||||||
|
if ( skipDead && count > aliveProxyCandidates / 2 )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// count the alive/non-banned candidates
|
||||||
|
count++;
|
||||||
|
|
||||||
|
// if all hosts were "dead" because they all had
|
||||||
|
// m_lastDownloadError set then minCount will be 999999
|
||||||
|
// and nobody should continue from this statement:
|
||||||
|
if ( sp->m_countForThisIp > minCount && minCount>=0 ) continue;
|
||||||
|
// then go by last download time for this ip
|
||||||
|
if ( sp->m_countForThisIp == minCount && minCount>=0 &&
|
||||||
|
sp->m_lastTimeUsedForThisIp >= oldest )
|
||||||
|
continue;
|
||||||
|
|
||||||
// pick the spider proxy used longest ago
|
// pick the spider proxy used longest ago
|
||||||
oldest = sp->m_lastTimeUsedForThisIp;
|
oldest = sp->m_lastTimeUsedForThisIp;
|
||||||
|
minCount = sp->m_countForThisIp;
|
||||||
// got a new winner
|
// got a new winner
|
||||||
winnersp = sp;
|
winnersp = sp;
|
||||||
}
|
}
|
||||||
|
@ -27,6 +27,7 @@ void Summary::reset() {
|
|||||||
m_bitScoresBufSize = 0;
|
m_bitScoresBufSize = 0;
|
||||||
}
|
}
|
||||||
m_summaryLen = 0;
|
m_summaryLen = 0;
|
||||||
|
m_displayLen = 0;
|
||||||
//m_bufMaxLen = 0;
|
//m_bufMaxLen = 0;
|
||||||
//m_bufLen = 0;
|
//m_bufLen = 0;
|
||||||
//m_buf = NULL;
|
//m_buf = NULL;
|
||||||
|
@ -80,8 +80,8 @@ bool User::verifyPageNum ( uint16_t pageNum ){
|
|||||||
}
|
}
|
||||||
// check if pageNum is of dummy page
|
// check if pageNum is of dummy page
|
||||||
bool isDummy = true;
|
bool isDummy = true;
|
||||||
if ( pageNum > PAGE_PUBLIC )
|
//if ( pageNum > PAGE_PUBLIC )
|
||||||
isDummy = false;
|
isDummy = false;
|
||||||
//
|
//
|
||||||
if ( m_allPages && !isDummy )
|
if ( m_allPages && !isDummy )
|
||||||
return true;
|
return true;
|
||||||
@ -93,8 +93,9 @@ bool User::verifyPageNum ( uint16_t pageNum ){
|
|||||||
long User::firstPage ( ){
|
long User::firstPage ( ){
|
||||||
// return first allowed page
|
// return first allowed page
|
||||||
for ( uint16_t i = 0; i < m_numPages; i++ )
|
for ( uint16_t i = 0; i < m_numPages; i++ )
|
||||||
if ( ! (m_pages[i] & 0x8000) &&
|
if ( ! (m_pages[i] & 0x8000) ) //&&
|
||||||
(m_pages[i]&0x7fff) > PAGE_PUBLIC ) return m_pages[i];
|
// (m_pages[i]&0x7fff) > PAGE_PUBLIC )
|
||||||
|
return m_pages[i];
|
||||||
|
|
||||||
// if all pages is set then just return the root page
|
// if all pages is set then just return the root page
|
||||||
if ( m_allPages ) return PAGE_ROOT;
|
if ( m_allPages ) return PAGE_ROOT;
|
||||||
|
81
Xml.cpp
81
Xml.cpp
@ -196,6 +196,35 @@ void Xml::reset ( ) {
|
|||||||
m_allocSize = 0;
|
m_allocSize = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Xml::getCompoundName ( long node , SafeBuf *sb ) {
|
||||||
|
XmlNode *buf[256];
|
||||||
|
XmlNode *xn = &m_nodes[node];
|
||||||
|
long np = 0;
|
||||||
|
for ( ; xn ; xn = xn->m_parent ) {
|
||||||
|
if ( ! xn->m_nodeId ) continue;
|
||||||
|
if ( np >= 256 ) {g_errno = EBUFTOOSMALL;return false;}
|
||||||
|
buf[np++] = xn;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ignore that initial <?xml ..> tag they all have
|
||||||
|
if ( np > 0 &&
|
||||||
|
buf[np-1]->m_tagNameLen == 3 &&
|
||||||
|
strncasecmp(buf[np-1]->m_tagName,"xml",3) == 0 )
|
||||||
|
np--;
|
||||||
|
|
||||||
|
for ( long i = np - 1 ; i >= 0 ; i-- ) {
|
||||||
|
XmlNode *xn = buf[i];
|
||||||
|
sb->safeMemcpy ( xn->m_tagName , xn->m_tagNameLen );
|
||||||
|
sb->pushChar('.');
|
||||||
|
}
|
||||||
|
// remove last '.'
|
||||||
|
if ( sb->length() ) sb->m_length--;
|
||||||
|
sb->nullTerm();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#include "HttpMime.h" // CT_JSON
|
#include "HttpMime.h" // CT_JSON
|
||||||
|
|
||||||
// "s" must be in utf8
|
// "s" must be in utf8
|
||||||
@ -258,6 +287,10 @@ bool Xml::set ( char *s ,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// override
|
||||||
|
if ( contentType == CT_XML )
|
||||||
|
pureXml = true;
|
||||||
|
|
||||||
|
|
||||||
QUICKPOLL((niceness));
|
QUICKPOLL((niceness));
|
||||||
long i;
|
long i;
|
||||||
@ -310,6 +343,11 @@ bool Xml::set ( char *s ,
|
|||||||
logf(LOG_TIMING,
|
logf(LOG_TIMING,
|
||||||
"build: xml: set: 4c. %llu",gettimeofdayInMilliseconds());
|
"build: xml: set: 4c. %llu",gettimeofdayInMilliseconds());
|
||||||
|
|
||||||
|
XmlNode *parent = NULL;
|
||||||
|
XmlNode *parentStackStart[256];
|
||||||
|
XmlNode **parentStackPtr = &parentStackStart[0];
|
||||||
|
XmlNode **parentStackEnd = &parentStackStart[256];
|
||||||
|
|
||||||
// . TODO: do this on demand
|
// . TODO: do this on demand
|
||||||
// . now fill our nodes array
|
// . now fill our nodes array
|
||||||
// . loop over the xml
|
// . loop over the xml
|
||||||
@ -320,14 +358,51 @@ bool Xml::set ( char *s ,
|
|||||||
QUICKPOLL(niceness);
|
QUICKPOLL(niceness);
|
||||||
// remember oldi
|
// remember oldi
|
||||||
oldi = i;
|
oldi = i;
|
||||||
|
|
||||||
|
// convenience ptr
|
||||||
|
XmlNode *xi = &m_nodes[m_numNodes];
|
||||||
|
|
||||||
// set that node
|
// set that node
|
||||||
i += m_nodes[m_numNodes].set (&m_xml[i],pureXml,version);
|
i += xi->set (&m_xml[i],pureXml,version);
|
||||||
|
|
||||||
|
|
||||||
|
// set his parent xml node if is xml
|
||||||
|
xi->m_parent = parent;
|
||||||
|
|
||||||
|
// if not text node then he's the new parent
|
||||||
|
if ( pureXml &&
|
||||||
|
xi->m_nodeId &&
|
||||||
|
xi->m_nodeId != TAG_COMMENT ) {
|
||||||
|
|
||||||
|
// if we are a back tag pop the stack
|
||||||
|
if ( ! xi->isFrontTag() ) {
|
||||||
|
// pop old parent
|
||||||
|
if ( parentStackPtr > parentStackStart )
|
||||||
|
parent = *(--parentStackPtr);
|
||||||
|
}
|
||||||
|
// we are a front tag...
|
||||||
|
else {
|
||||||
|
// did we overflow?
|
||||||
|
if ( parentStackPtr >= parentStackEnd ) {
|
||||||
|
log("xml: xml parent overflow");
|
||||||
|
g_errno = EBUFTOOSMALL;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// push the old parent ptr
|
||||||
|
if ( parent ) *parentStackPtr++ = parent;
|
||||||
|
// set the new parent to us
|
||||||
|
parent = xi;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// in script?
|
// in script?
|
||||||
if ( m_nodes[m_numNodes].m_nodeId != TAG_SCRIPT ) {
|
if ( xi->m_nodeId != TAG_SCRIPT ) {
|
||||||
m_numNodes++;
|
m_numNodes++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if ( ! m_nodes[m_numNodes].isFrontTag() ) {
|
if ( ! xi->isFrontTag() ) {
|
||||||
m_numNodes++;
|
m_numNodes++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
8
Xml.h
8
Xml.h
@ -86,8 +86,12 @@ class Xml {
|
|||||||
// . ie. "xml.country.state.city"
|
// . ie. "xml.country.state.city"
|
||||||
// . fullTag option returns the entire node text
|
// . fullTag option returns the entire node text
|
||||||
// . ie. "<xml>.<country>.<state abbrev="true">.<city arg="foo">
|
// . ie. "<xml>.<country>.<state abbrev="true">.<city arg="foo">
|
||||||
long getCompoundName ( long n , char *buf , long bufMaxLen,
|
//long getCompoundName ( long n , char *buf , long bufMaxLen,
|
||||||
bool fullTag = false ) ;
|
// bool fullTag = false ) ;
|
||||||
|
|
||||||
|
// get like compound name like "node1.node2.node3\0"
|
||||||
|
bool getCompoundName ( long node , class SafeBuf *sb ) ;
|
||||||
|
|
||||||
|
|
||||||
// . used for parsing xml conf files
|
// . used for parsing xml conf files
|
||||||
// . used for getting the title in an html doc, etc.
|
// . used for getting the title in an html doc, etc.
|
||||||
|
356
XmlDoc.cpp
356
XmlDoc.cpp
@ -186,6 +186,8 @@ XmlDoc::~XmlDoc() {
|
|||||||
static long long s_lastTimeStart = 0LL;
|
static long long s_lastTimeStart = 0LL;
|
||||||
|
|
||||||
void XmlDoc::reset ( ) {
|
void XmlDoc::reset ( ) {
|
||||||
|
|
||||||
|
m_isImporting = false;
|
||||||
|
|
||||||
m_printedMenu = false;
|
m_printedMenu = false;
|
||||||
|
|
||||||
@ -1335,7 +1337,13 @@ bool XmlDoc::set4 ( SpiderRequest *sreq ,
|
|||||||
// similar to set3() above
|
// similar to set3() above
|
||||||
m_setFromDocId = true;
|
m_setFromDocId = true;
|
||||||
// use content and ip from old title rec to save time
|
// use content and ip from old title rec to save time
|
||||||
m_recycleContent = true;
|
// . crap this is making the query reindex not actually
|
||||||
|
// re-download the content.
|
||||||
|
// . we already check the m_deleteFromIndex flag below
|
||||||
|
// in getUtf8Content() and use the old content in that case
|
||||||
|
// so i'm not sure why we are recycling here, so take
|
||||||
|
// this out. MDW 9/25/2014.
|
||||||
|
//m_recycleContent = true;
|
||||||
// sanity
|
// sanity
|
||||||
if ( m_docId == 0LL ) { char *xx=NULL;*xx=0; }
|
if ( m_docId == 0LL ) { char *xx=NULL;*xx=0; }
|
||||||
}
|
}
|
||||||
@ -3192,6 +3200,36 @@ long *XmlDoc::getIndexCode2 ( ) {
|
|||||||
if ( m_sreqValid && m_sreq.m_ignoreDocUnchangedError )
|
if ( m_sreqValid && m_sreq.m_ignoreDocUnchangedError )
|
||||||
check = false;
|
check = false;
|
||||||
if ( check ) {
|
if ( check ) {
|
||||||
|
// check inlinks now too!
|
||||||
|
LinkInfo *info1 = getLinkInfo1 ();
|
||||||
|
if ( ! info1 || info1 == (LinkInfo *)-1 ) return (long *)info1;
|
||||||
|
LinkInfo *info2 = od->getLinkInfo1 ();
|
||||||
|
if ( ! info2 || info2 == (LinkInfo *)-1 ) return (long *)info2;
|
||||||
|
Inlink *k1 = NULL;
|
||||||
|
Inlink *k2 = NULL;
|
||||||
|
char *s1, *s2;
|
||||||
|
long len1,len2;
|
||||||
|
if ( info1->getNumGoodInlinks() !=
|
||||||
|
info2->getNumGoodInlinks() )
|
||||||
|
goto changed;
|
||||||
|
for ( ; k1=info1->getNextInlink(k1) ,
|
||||||
|
k2=info2->getNextInlink(k2); ) {
|
||||||
|
if ( ! k1 )
|
||||||
|
break;
|
||||||
|
if ( ! k2 )
|
||||||
|
break;
|
||||||
|
if ( k1->m_siteNumInlinks != k2->m_siteNumInlinks )
|
||||||
|
goto changed;
|
||||||
|
s1 = k1->ptr_linkText;
|
||||||
|
len1 = k1->size_linkText - 1; // exclude \0
|
||||||
|
s2 = k2->ptr_linkText;
|
||||||
|
len2 = k2->size_linkText - 1; // exclude \0
|
||||||
|
if ( len1 != len2 )
|
||||||
|
goto changed;
|
||||||
|
if ( memcmp(s1,s2,len1) != 0 )
|
||||||
|
goto changed;
|
||||||
|
}
|
||||||
|
// no change in link text, look for change in page content now
|
||||||
long *ch32 = getContentHash32();
|
long *ch32 = getContentHash32();
|
||||||
if ( ! ch32 || ch32 == (void *)-1 ) return (long *)ch32;
|
if ( ! ch32 || ch32 == (void *)-1 ) return (long *)ch32;
|
||||||
if ( *ch32 == od->m_contentHash32 ) {
|
if ( *ch32 == od->m_contentHash32 ) {
|
||||||
@ -3201,6 +3239,7 @@ long *XmlDoc::getIndexCode2 ( ) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
changed:
|
||||||
// words
|
// words
|
||||||
Words *words = getWords();
|
Words *words = getWords();
|
||||||
if ( ! words || words == (Words *)-1 ) return (long *)words;
|
if ( ! words || words == (Words *)-1 ) return (long *)words;
|
||||||
@ -15222,6 +15261,16 @@ char **XmlDoc::getHttpReply2 ( ) {
|
|||||||
if ( od )
|
if ( od )
|
||||||
r->m_contentHash32 = od->m_contentHash32;
|
r->m_contentHash32 = od->m_contentHash32;
|
||||||
|
|
||||||
|
// force floater usage on even if "use spider proxies" parms is off
|
||||||
|
// if we're a diffbot crawl and use robots is off.
|
||||||
|
//if ( cr && ! cr->m_useRobotsTxt && cr->m_isCustomCrawl )
|
||||||
|
// r->m_forceUseFloaters = true;
|
||||||
|
|
||||||
|
// for beta testing, make it a collection specific parm for diffbot
|
||||||
|
// so we can turn on manually
|
||||||
|
if ( cr->m_forceUseFloaters )
|
||||||
|
r->m_forceUseFloaters = true;
|
||||||
|
|
||||||
// eventgurubot is the max
|
// eventgurubot is the max
|
||||||
//char *userAgent = g_conf.m_spiderUserAgent;
|
//char *userAgent = g_conf.m_spiderUserAgent;
|
||||||
// hardcode it
|
// hardcode it
|
||||||
@ -15766,7 +15815,10 @@ char **XmlDoc::getContent ( ) {
|
|||||||
// if we were set from a title rec use that we do not have the original
|
// if we were set from a title rec use that we do not have the original
|
||||||
// content, and caller should be calling getUtf8Content() anyway!!
|
// content, and caller should be calling getUtf8Content() anyway!!
|
||||||
if ( m_setFromTitleRec ) { char *xx=NULL; *xx=0; }
|
if ( m_setFromTitleRec ) { char *xx=NULL; *xx=0; }
|
||||||
if ( m_setFromDocId ) { char *xx=NULL; *xx=0; }
|
|
||||||
|
// query reindex has m_setFromDocId to true and we WANT to re-download
|
||||||
|
// the content... so why did i have this here? MDW 9/25/2014
|
||||||
|
//if ( m_setFromDocId ) { char *xx=NULL; *xx=0; }
|
||||||
|
|
||||||
// recycle?
|
// recycle?
|
||||||
//if ( m_recycleContent ) { char *xx=NULL; *xx=0; }
|
//if ( m_recycleContent ) { char *xx=NULL; *xx=0; }
|
||||||
@ -17603,7 +17655,9 @@ char **XmlDoc::getUtf8Content ( ) {
|
|||||||
// all tags like <title> or <link> to <gbtitle> or <gblink> so we
|
// all tags like <title> or <link> to <gbtitle> or <gblink> so we
|
||||||
// know they are xml tags. because stuff like <br> will
|
// know they are xml tags. because stuff like <br> will
|
||||||
// become <br> and will be within its xml tag like <gbdescription>
|
// become <br> and will be within its xml tag like <gbdescription>
|
||||||
// or <gbtitle>
|
// or <gbtitle>.
|
||||||
|
// MDW: 9/28/2014. no longer do this since i added hashXmlFields().
|
||||||
|
/*
|
||||||
if ( m_contentType == CT_XML ) {
|
if ( m_contentType == CT_XML ) {
|
||||||
// count the xml tags
|
// count the xml tags
|
||||||
char *p = m_expandedUtf8Content;
|
char *p = m_expandedUtf8Content;
|
||||||
@ -17659,6 +17713,7 @@ char **XmlDoc::getUtf8Content ( ) {
|
|||||||
// free esbuf if we were referencing that to save mem
|
// free esbuf if we were referencing that to save mem
|
||||||
m_esbuf.purge();
|
m_esbuf.purge();
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
// richmondspca.org has " in some tags and we do not like
|
// richmondspca.org has " in some tags and we do not like
|
||||||
// expanding that to " because it messes up XmlNode::getTagLen()
|
// expanding that to " because it messes up XmlNode::getTagLen()
|
||||||
@ -17675,11 +17730,15 @@ char **XmlDoc::getUtf8Content ( ) {
|
|||||||
// utf8 chars so that Xml::set(), etc. still work properly and don't
|
// utf8 chars so that Xml::set(), etc. still work properly and don't
|
||||||
// add any more html tags than it should
|
// add any more html tags than it should
|
||||||
// . this will decode in place
|
// . this will decode in place
|
||||||
long n = htmlDecode(m_expandedUtf8Content,//ptr_utf8Content,
|
// . MDW: 9/28/2014. no longer do for xml docs since i added
|
||||||
m_expandedUtf8Content,//ptr_utf8Content,
|
// hashXmlFields()
|
||||||
m_expandedUtf8ContentSize-1,//size_utf8Content-1,
|
long n = m_expandedUtf8ContentSize - 1;
|
||||||
doSpecial,
|
if ( m_contentType != CT_XML )
|
||||||
m_niceness);
|
n = htmlDecode(m_expandedUtf8Content,//ptr_utf8Content,
|
||||||
|
m_expandedUtf8Content,//ptr_utf8Content,
|
||||||
|
m_expandedUtf8ContentSize-1,//size_utf8Con
|
||||||
|
doSpecial,
|
||||||
|
m_niceness);
|
||||||
|
|
||||||
// can't exceed this! n does not include the final \0 even though
|
// can't exceed this! n does not include the final \0 even though
|
||||||
// we do right it out.
|
// we do right it out.
|
||||||
@ -17689,12 +17748,14 @@ char **XmlDoc::getUtf8Content ( ) {
|
|||||||
|
|
||||||
// now rss has crap in it like "&nbsp;" so we have to do another
|
// now rss has crap in it like "&nbsp;" so we have to do another
|
||||||
// decoding pass
|
// decoding pass
|
||||||
if ( m_contentType == CT_XML ) // isRSSExt )
|
// . MDW: 9/28/2014. no longer do for xml docs since i added
|
||||||
n = htmlDecode(m_expandedUtf8Content,//ptr_utf8Content,
|
// hashXmlFields()
|
||||||
m_expandedUtf8Content,//ptr_utf8Content,
|
// if ( m_contentType == CT_XML ) // isRSSExt )
|
||||||
n,
|
// n = htmlDecode(m_expandedUtf8Content,//ptr_utf8Content,
|
||||||
false,//doSpecial,
|
// m_expandedUtf8Content,//ptr_utf8Content,
|
||||||
m_niceness);
|
// n,
|
||||||
|
// false,//doSpecial,
|
||||||
|
// m_niceness);
|
||||||
// sanity
|
// sanity
|
||||||
if ( n > m_expandedUtf8ContentSize-1 ) {char *xx=NULL;*xx=0; }
|
if ( n > m_expandedUtf8ContentSize-1 ) {char *xx=NULL;*xx=0; }
|
||||||
// sanity
|
// sanity
|
||||||
@ -18943,6 +19004,17 @@ char *XmlDoc::getSpiderLinks ( ) {
|
|||||||
// m_spiderLinks2 = false;
|
// m_spiderLinks2 = false;
|
||||||
// m_spiderLinksValid = true ; }
|
// m_spiderLinksValid = true ; }
|
||||||
|
|
||||||
|
// this slows importing down because we end up doing ip lookups
|
||||||
|
// for every outlink if "firstip" not in tagdb.
|
||||||
|
// shoot. set2() already sets m_spiderLinksValid to true so we
|
||||||
|
// have to override if importing.
|
||||||
|
if ( m_isImporting && m_isImportingValid ) {
|
||||||
|
m_spiderLinks = false;
|
||||||
|
m_spiderLinks2 = false;
|
||||||
|
m_spiderLinksValid = true;
|
||||||
|
return &m_spiderLinks2;
|
||||||
|
}
|
||||||
|
|
||||||
// return the valid value
|
// return the valid value
|
||||||
if ( m_spiderLinksValid ) return &m_spiderLinks2;
|
if ( m_spiderLinksValid ) return &m_spiderLinks2;
|
||||||
|
|
||||||
@ -21761,8 +21833,6 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
|
|||||||
// likewise if there error was ENONCANONICAL treat it like that
|
// likewise if there error was ENONCANONICAL treat it like that
|
||||||
if ( m_indexCode == EDOCNONCANONICAL )
|
if ( m_indexCode == EDOCNONCANONICAL )
|
||||||
spideringLinks = true;
|
spideringLinks = true;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// . prepare the outlink info if we are adding links to spiderdb!
|
// . prepare the outlink info if we are adding links to spiderdb!
|
||||||
@ -22273,13 +22343,17 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
|
|||||||
// . LINKDB
|
// . LINKDB
|
||||||
// . linkdb records. assume one per outlink
|
// . linkdb records. assume one per outlink
|
||||||
// . we may index 2 16-byte keys for each outlink
|
// . we may index 2 16-byte keys for each outlink
|
||||||
Links *nl = NULL; if ( spideringLinks ) nl = &m_links;
|
Links *nl2 = NULL;
|
||||||
|
//if ( spideringLinks ) nl2 = &m_links;
|
||||||
|
// if injecting, spideringLinks is false, but then we don't
|
||||||
|
// add the links to linkdb, which causes the qainlinks() test to fail
|
||||||
|
nl2 = &m_links;
|
||||||
// do not bother if deleting. but we do add simplified redirects
|
// do not bother if deleting. but we do add simplified redirects
|
||||||
// to spiderdb as SpiderRequests now.
|
// to spiderdb as SpiderRequests now.
|
||||||
long code = m_indexCode;
|
long code = m_indexCode;
|
||||||
if ( code == EDOCSIMPLIFIEDREDIR ) code = 0;
|
if ( code == EDOCSIMPLIFIEDREDIR ) code = 0;
|
||||||
if ( code == EDOCNONCANONICAL ) code = 0;
|
if ( code == EDOCNONCANONICAL ) code = 0;
|
||||||
if ( code ) nl = NULL;
|
if ( code ) nl2 = NULL;
|
||||||
//Links *ol = NULL; if ( od ) ol = od->getLinks();
|
//Links *ol = NULL; if ( od ) ol = od->getLinks();
|
||||||
// . set key/data size
|
// . set key/data size
|
||||||
// . use a 16 byte key, not the usual 12
|
// . use a 16 byte key, not the usual 12
|
||||||
@ -22288,7 +22362,7 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
|
|||||||
HashTableX kt1;
|
HashTableX kt1;
|
||||||
//HashTableX kt2;
|
//HashTableX kt2;
|
||||||
long nis = 0;
|
long nis = 0;
|
||||||
if ( nl && m_useLinkdb ) nis = nl->getNumLinks() * 4;
|
if ( nl2 && m_useLinkdb ) nis = nl2->getNumLinks() * 4;
|
||||||
// pre-grow table based on # outlinks
|
// pre-grow table based on # outlinks
|
||||||
kt1.set ( sizeof(key224_t),0,nis,NULL,0,false,m_niceness,"link-indx" );
|
kt1.set ( sizeof(key224_t),0,nis,NULL,0,false,m_niceness,"link-indx" );
|
||||||
// use magic to make fast
|
// use magic to make fast
|
||||||
@ -22307,7 +22381,7 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
|
|||||||
// but this will have to be for adding to Linkdb. basically take a
|
// but this will have to be for adding to Linkdb. basically take a
|
||||||
// lot of it from Linkdb::fillLinkdbList()
|
// lot of it from Linkdb::fillLinkdbList()
|
||||||
// . these return false with g_errno set on error
|
// . these return false with g_errno set on error
|
||||||
if ( m_useLinkdb && nl && ! hashLinksForLinkdb(&kt1) ) return NULL;
|
if ( m_useLinkdb && nl2 && ! hashLinksForLinkdb(&kt1) ) return NULL;
|
||||||
//if ( add2 && ol && ! !od->m_skipIndexing &&
|
//if ( add2 && ol && ! !od->m_skipIndexing &&
|
||||||
// ol->hash(&kt2,od,m_niceness) )
|
// ol->hash(&kt2,od,m_niceness) )
|
||||||
// return NULL;
|
// return NULL;
|
||||||
@ -22432,6 +22506,8 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
|
|||||||
// if we were set from a titleRec, see if we got
|
// if we were set from a titleRec, see if we got
|
||||||
// a different hash of terms to index this time around...
|
// a different hash of terms to index this time around...
|
||||||
m_setFromTitleRec &&
|
m_setFromTitleRec &&
|
||||||
|
// fix for import log spam
|
||||||
|
! m_isImporting &&
|
||||||
m_version >= 120 &&
|
m_version >= 120 &&
|
||||||
m_metaListCheckSum8 != currentMetaListCheckSum8 )
|
m_metaListCheckSum8 != currentMetaListCheckSum8 )
|
||||||
log("xmldoc: checksum parsing inconsistency for %s",
|
log("xmldoc: checksum parsing inconsistency for %s",
|
||||||
@ -22931,7 +23007,7 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
|
|||||||
// . should also add with a time of now plus 5 seconds to that if
|
// . should also add with a time of now plus 5 seconds to that if
|
||||||
// we spider an outlink linkdb should be update with this doc
|
// we spider an outlink linkdb should be update with this doc
|
||||||
// pointing to it so it can get link text then!!
|
// pointing to it so it can get link text then!!
|
||||||
if ( spideringLinks && nl && ! m_doingConsistencyCheck &&
|
if ( spideringLinks && nl2 && ! m_doingConsistencyCheck &&
|
||||||
m_useSpiderdb && ! forDelete ){
|
m_useSpiderdb && ! forDelete ){
|
||||||
// returns NULL and sets g_errno on error
|
// returns NULL and sets g_errno on error
|
||||||
char *ret = addOutlinkSpiderRecsToMetaList ();
|
char *ret = addOutlinkSpiderRecsToMetaList ();
|
||||||
@ -25894,6 +25970,7 @@ char *XmlDoc::hashAll ( HashTableX *table ) {
|
|||||||
// hash diffbot's json output here
|
// hash diffbot's json output here
|
||||||
uint8_t *ct = getContentType();
|
uint8_t *ct = getContentType();
|
||||||
if ( ! ct ) return NULL;
|
if ( ! ct ) return NULL;
|
||||||
|
/*
|
||||||
if ( *ct == CT_JSON ) { // && m_isDiffbotJSONObject ) {
|
if ( *ct == CT_JSON ) { // && m_isDiffbotJSONObject ) {
|
||||||
// hash the content type for type:json query
|
// hash the content type for type:json query
|
||||||
if ( ! hashContentType ( table ) ) return NULL;
|
if ( ! hashContentType ( table ) ) return NULL;
|
||||||
@ -25911,6 +25988,7 @@ char *XmlDoc::hashAll ( HashTableX *table ) {
|
|||||||
// and the json itself
|
// and the json itself
|
||||||
return hashJSON ( table );
|
return hashJSON ( table );
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
if ( ! hashContentType ( table ) ) return NULL;
|
if ( ! hashContentType ( table ) ) return NULL;
|
||||||
if ( ! hashUrl ( table ) ) return NULL;
|
if ( ! hashUrl ( table ) ) return NULL;
|
||||||
@ -25936,12 +26014,31 @@ char *XmlDoc::hashAll ( HashTableX *table ) {
|
|||||||
if ( ! hashNoSplit ( table ) ) return NULL;
|
if ( ! hashNoSplit ( table ) ) return NULL;
|
||||||
|
|
||||||
|
|
||||||
// global index unless this is a json object in which case it is
|
// MDW: i think we just inject empty html with a diffbotreply into
|
||||||
// hased above in the call to hashJSON(). this will decrease disk
|
// global index now, so don't need this... 9/28/2014
|
||||||
// usage by about half, posdb* files are pretty big.
|
|
||||||
if ( cr->m_isCustomCrawl || ! cr->m_indexBody ) return (char *)1;
|
|
||||||
|
|
||||||
|
// global index unless this is a json object in which case it is
|
||||||
|
// hashed above in the call to hashJSON(). this will decrease disk
|
||||||
|
// usage by about half, posdb* files are pretty big.
|
||||||
|
//if ( cr->m_isCustomCrawl || ! cr->m_indexBody ) return (char *)1;
|
||||||
|
|
||||||
|
// hash json fields
|
||||||
|
if ( *ct == CT_JSON ) {
|
||||||
|
// this hashes both with and without the fieldname
|
||||||
|
hashJSONFields ( table );
|
||||||
|
// hash gblang:de
|
||||||
|
if ( ! hashLanguageString ( table ) ) return NULL;
|
||||||
|
goto skip;
|
||||||
|
}
|
||||||
|
|
||||||
|
// same for xml now, so we can search for field:value like w/ json
|
||||||
|
if ( *ct == CT_XML ) {
|
||||||
|
// this hashes both with and without the fieldname
|
||||||
|
hashXMLFields ( table );
|
||||||
|
// hash gblang:de
|
||||||
|
if ( ! hashLanguageString ( table ) ) return NULL;
|
||||||
|
goto skip;
|
||||||
|
}
|
||||||
|
|
||||||
// hash the body of the doc first so m_dist is 0 to match
|
// hash the body of the doc first so m_dist is 0 to match
|
||||||
// the rainbow display of sections
|
// the rainbow display of sections
|
||||||
@ -25971,6 +26068,8 @@ char *XmlDoc::hashAll ( HashTableX *table ) {
|
|||||||
// somewhere.
|
// somewhere.
|
||||||
if ( ! hashMetaSummary(table) ) return NULL;
|
if ( ! hashMetaSummary(table) ) return NULL;
|
||||||
|
|
||||||
|
skip:
|
||||||
|
|
||||||
// this will only increment the scores of terms already in the table
|
// this will only increment the scores of terms already in the table
|
||||||
// because we neighborhoods are not techincally in the document
|
// because we neighborhoods are not techincally in the document
|
||||||
// necessarily and we do not want to ruin our precision
|
// necessarily and we do not want to ruin our precision
|
||||||
@ -25986,6 +26085,9 @@ char *XmlDoc::hashAll ( HashTableX *table ) {
|
|||||||
if ( ! hashRSSInfo ( table ) ) return NULL;
|
if ( ! hashRSSInfo ( table ) ) return NULL;
|
||||||
if ( ! hashPermalink ( table ) ) return NULL;
|
if ( ! hashPermalink ( table ) ) return NULL;
|
||||||
|
|
||||||
|
// hash gblang:de last for parsing consistency
|
||||||
|
if ( ! hashLanguageString ( table ) ) return NULL;
|
||||||
|
|
||||||
// we set this now in hashWords3()
|
// we set this now in hashWords3()
|
||||||
if ( m_doingSEO )
|
if ( m_doingSEO )
|
||||||
m_wordPosInfoBufValid = true;
|
m_wordPosInfoBufValid = true;
|
||||||
@ -27299,6 +27401,9 @@ bool XmlDoc::hashUrl ( HashTableX *tt , bool isStatusDoc ) {
|
|||||||
// . copied Url2.cpp into here basically, so we can now dump Url2.cpp
|
// . copied Url2.cpp into here basically, so we can now dump Url2.cpp
|
||||||
bool XmlDoc::hashSections ( HashTableX *tt ) {
|
bool XmlDoc::hashSections ( HashTableX *tt ) {
|
||||||
|
|
||||||
|
//if ( ! m_contentTypeValid ) { char *xx=NULL;*xx=0; }
|
||||||
|
//if ( m_contentType == CT_HTML ) return true;
|
||||||
|
|
||||||
setStatus ( "hashing sections" );
|
setStatus ( "hashing sections" );
|
||||||
|
|
||||||
if ( ! m_sectionsValid ) { char *xx=NULL;*xx=0; }
|
if ( ! m_sectionsValid ) { char *xx=NULL;*xx=0; }
|
||||||
@ -28094,6 +28199,30 @@ bool XmlDoc::hashLanguage ( HashTableX *tt ) {
|
|||||||
|
|
||||||
// try lang abbreviation
|
// try lang abbreviation
|
||||||
sprintf(s , "%s ", getLangAbbr(langId) );
|
sprintf(s , "%s ", getLangAbbr(langId) );
|
||||||
|
// go back to broken way to try to fix parsing consistency bug
|
||||||
|
// by adding hashLanguageString() function below
|
||||||
|
//sprintf(s , "%s ", getLangAbbr(langId) );
|
||||||
|
if ( ! hashString ( s, slen, &hi ) ) return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool XmlDoc::hashLanguageString ( HashTableX *tt ) {
|
||||||
|
|
||||||
|
setStatus ( "hashing language string" );
|
||||||
|
|
||||||
|
long langId = (long)*getLangId();
|
||||||
|
|
||||||
|
// update hash parms
|
||||||
|
HashInfo hi;
|
||||||
|
hi.m_tt = tt;
|
||||||
|
hi.m_hashGroup = HASHGROUP_INTAG;
|
||||||
|
hi.m_prefix = "gblang";
|
||||||
|
|
||||||
|
// try lang abbreviation
|
||||||
|
char s[32];
|
||||||
|
long slen = sprintf(s , "%s ", getLangAbbr(langId) );
|
||||||
|
// go back to broken way to try to fix parsing consistency bug
|
||||||
if ( ! hashString ( s, slen, &hi ) ) return false;
|
if ( ! hashString ( s, slen, &hi ) ) return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -29073,7 +29202,8 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
|
|||||||
if ( ! jp || jp == (void *)-1)
|
if ( ! jp || jp == (void *)-1)
|
||||||
return (Msg20Reply *)jp;
|
return (Msg20Reply *)jp;
|
||||||
}
|
}
|
||||||
if ( m_contentType == CT_HTML ) {
|
if ( m_contentType == CT_HTML ||
|
||||||
|
m_contentType == CT_XML ) {
|
||||||
Xml *xml = getXml();
|
Xml *xml = getXml();
|
||||||
if ( ! xml || xml==(void *)-1)
|
if ( ! xml || xml==(void *)-1)
|
||||||
return (Msg20Reply *)xml;
|
return (Msg20Reply *)xml;
|
||||||
@ -29482,11 +29612,11 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
|
|||||||
|
|
||||||
// returns values of specified meta tags
|
// returns values of specified meta tags
|
||||||
if ( ! reply->ptr_dbuf && m_req->size_displayMetas > 1 ) {
|
if ( ! reply->ptr_dbuf && m_req->size_displayMetas > 1 ) {
|
||||||
long dlen; char *d;
|
long dsize; char *d;
|
||||||
d = getDescriptionBuf(m_req->ptr_displayMetas,&dlen);
|
d = getDescriptionBuf(m_req->ptr_displayMetas,&dsize);
|
||||||
if ( ! d || d == (char *)-1 ) return (Msg20Reply *)d;
|
if ( ! d || d == (char *)-1 ) return (Msg20Reply *)d;
|
||||||
reply->ptr_dbuf = d;
|
reply->ptr_dbuf = d;
|
||||||
reply->size_dbuf = dlen + 1;
|
reply->size_dbuf = dsize; // includes \0
|
||||||
}
|
}
|
||||||
|
|
||||||
// breathe
|
// breathe
|
||||||
@ -30370,9 +30500,9 @@ Matches *XmlDoc::getMatches () {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// sender wants meta description, custom tags, etc.
|
// sender wants meta description, custom tags, etc.
|
||||||
char *XmlDoc::getDescriptionBuf ( char *displayMetas , long *dlen ) {
|
char *XmlDoc::getDescriptionBuf ( char *displayMetas , long *dsize ) {
|
||||||
// return the buffer if we got it
|
// return the buffer if we got it
|
||||||
if ( m_dbufValid ) { *dlen = m_dbufLen; return m_dbuf; }
|
if ( m_dbufValid ) { *dsize = m_dbufSize; return m_dbuf; }
|
||||||
Xml *xml = getXml();
|
Xml *xml = getXml();
|
||||||
if ( ! xml || xml == (Xml *)-1 ) return (char *)xml;
|
if ( ! xml || xml == (Xml *)-1 ) return (char *)xml;
|
||||||
// now get the content of the requested display meta tags
|
// now get the content of the requested display meta tags
|
||||||
@ -30416,6 +30546,14 @@ char *XmlDoc::getDescriptionBuf ( char *displayMetas , long *dlen ) {
|
|||||||
gbstrlen(s) , // name len
|
gbstrlen(s) , // name len
|
||||||
"name" , // http-equiv/name
|
"name" , // http-equiv/name
|
||||||
false );// convert &#'s?
|
false );// convert &#'s?
|
||||||
|
dptr[wlen] = '\0';
|
||||||
|
|
||||||
|
// test it out
|
||||||
|
if ( ! verifyUtf8 ( dptr ) ) {
|
||||||
|
log("xmldoc: invalid utf8 content for meta tag %s.",s);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// advance and NULL terminate
|
// advance and NULL terminate
|
||||||
dptr += wlen;
|
dptr += wlen;
|
||||||
*dptr++ = '\0';
|
*dptr++ = '\0';
|
||||||
@ -30425,8 +30563,9 @@ char *XmlDoc::getDescriptionBuf ( char *displayMetas , long *dlen ) {
|
|||||||
"was encountered. Truncating.",dbufEnd-m_dbuf);
|
"was encountered. Truncating.",dbufEnd-m_dbuf);
|
||||||
}
|
}
|
||||||
// what is the size of the content of displayed meta tags?
|
// what is the size of the content of displayed meta tags?
|
||||||
m_dbufLen = dptr - m_dbuf;
|
m_dbufSize = dptr - m_dbuf;
|
||||||
m_dbufValid = true;
|
m_dbufValid = true;
|
||||||
|
*dsize = m_dbufSize;
|
||||||
return m_dbuf;
|
return m_dbuf;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -30519,6 +30658,15 @@ Title *XmlDoc::getTitle ( ) {
|
|||||||
Summary *XmlDoc::getSummary () {
|
Summary *XmlDoc::getSummary () {
|
||||||
if ( m_summaryValid ) return &m_summary;
|
if ( m_summaryValid ) return &m_summary;
|
||||||
|
|
||||||
|
// xml and json docs have empty summaries for now
|
||||||
|
uint8_t *ct = getContentType();
|
||||||
|
if ( ! ct || ct == (void *)-1 ) return (Summary *)ct;
|
||||||
|
|
||||||
|
if ( *ct == CT_JSON || *ct == CT_XML ) {
|
||||||
|
m_summaryValid = true;
|
||||||
|
return &m_summary;
|
||||||
|
}
|
||||||
|
|
||||||
// need a buncha crap
|
// need a buncha crap
|
||||||
Words *ww = getWords();
|
Words *ww = getWords();
|
||||||
if ( ! ww || ww == (Words *)-1 ) return (Summary *)ww;
|
if ( ! ww || ww == (Words *)-1 ) return (Summary *)ww;
|
||||||
@ -35813,7 +35961,7 @@ char **XmlDoc::getRootTitleBuf ( ) {
|
|||||||
char *src = NULL;
|
char *src = NULL;
|
||||||
long srcSize = 0;
|
long srcSize = 0;
|
||||||
|
|
||||||
if ( ptr_rootTitleBuf ) {
|
if ( ptr_rootTitleBuf || m_setFromTitleRec ) {
|
||||||
src = ptr_rootTitleBuf;
|
src = ptr_rootTitleBuf;
|
||||||
srcSize = size_rootTitleBuf;
|
srcSize = size_rootTitleBuf;
|
||||||
}
|
}
|
||||||
@ -48352,9 +48500,9 @@ Json *XmlDoc::getParsedJson ( ) {
|
|||||||
|
|
||||||
#include "Json.h"
|
#include "Json.h"
|
||||||
|
|
||||||
char *XmlDoc::hashJSON ( HashTableX *table ) {
|
char *XmlDoc::hashJSONFields ( HashTableX *table ) {
|
||||||
|
|
||||||
setStatus ( "hashing json" );
|
setStatus ( "hashing json fields" );
|
||||||
|
|
||||||
HashInfo hi;
|
HashInfo hi;
|
||||||
hi.m_tt = table;
|
hi.m_tt = table;
|
||||||
@ -48515,6 +48663,58 @@ char *XmlDoc::hashJSON ( HashTableX *table ) {
|
|||||||
return (char *)0x01;
|
return (char *)0x01;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char *XmlDoc::hashXMLFields ( HashTableX *table ) {
|
||||||
|
|
||||||
|
setStatus ( "hashing xml fields" );
|
||||||
|
|
||||||
|
HashInfo hi;
|
||||||
|
hi.m_tt = table;
|
||||||
|
hi.m_desc = "xml object";
|
||||||
|
hi.m_hashGroup = HASHGROUP_BODY;
|
||||||
|
|
||||||
|
|
||||||
|
Xml *xml = getXml();
|
||||||
|
long n = xml->getNumNodes();
|
||||||
|
XmlNode *nodes = xml->getNodes ();
|
||||||
|
|
||||||
|
SafeBuf nameBuf;
|
||||||
|
|
||||||
|
// scan the xml nodes
|
||||||
|
for ( long i = 0 ; i < n ; i++ ) {
|
||||||
|
|
||||||
|
// breathe
|
||||||
|
QUICKPOLL(m_niceness);
|
||||||
|
|
||||||
|
// . skip if it's a tag not text node skip it
|
||||||
|
// . we just want the "text" nodes
|
||||||
|
if ( nodes[i].isTag() ) continue;
|
||||||
|
|
||||||
|
// assemble the full parent name
|
||||||
|
// like "tag1.tag2.tag3"
|
||||||
|
nameBuf.reset();
|
||||||
|
xml->getCompoundName ( i , &nameBuf );
|
||||||
|
|
||||||
|
// this is \0 terminated
|
||||||
|
char *tagName = nameBuf.getBufStart();
|
||||||
|
|
||||||
|
// get the utf8 text
|
||||||
|
char *val = nodes[i].m_node;
|
||||||
|
long vlen = nodes[i].m_nodeLen;
|
||||||
|
|
||||||
|
// index like "title:whatever"
|
||||||
|
if ( tagName && tagName[0] ) {
|
||||||
|
hi.m_prefix = tagName;
|
||||||
|
hashString ( val , vlen , &hi );
|
||||||
|
}
|
||||||
|
|
||||||
|
// hash without the field name as well
|
||||||
|
hi.m_prefix = NULL;
|
||||||
|
hashString ( val , vlen , &hi );
|
||||||
|
}
|
||||||
|
|
||||||
|
return (char *)0x01;
|
||||||
|
}
|
||||||
|
|
||||||
// if our url is that of a subdoc, then get the url of the parent doc
|
// if our url is that of a subdoc, then get the url of the parent doc
|
||||||
// from which we were a subsection
|
// from which we were a subsection
|
||||||
char *XmlDoc::getDiffbotParentUrl( char *myUrl ) {
|
char *XmlDoc::getDiffbotParentUrl( char *myUrl ) {
|
||||||
@ -48561,6 +48761,9 @@ bool XmlDoc::storeFacetValues ( char *qs , SafeBuf *sb , FacetValHash_t fvh ) {
|
|||||||
if ( m_contentType == CT_HTML )
|
if ( m_contentType == CT_HTML )
|
||||||
return storeFacetValuesHtml ( qs , sb , fvh );
|
return storeFacetValuesHtml ( qs , sb , fvh );
|
||||||
|
|
||||||
|
if ( m_contentType == CT_XML )
|
||||||
|
return storeFacetValuesXml ( qs , sb , fvh );
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -48702,6 +48905,89 @@ bool XmlDoc::storeFacetValuesHtml(char *qs, SafeBuf *sb, FacetValHash_t fvh ) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool XmlDoc::storeFacetValuesXml(char *qs, SafeBuf *sb, FacetValHash_t fvh ) {
|
||||||
|
|
||||||
|
Xml *xml = getXml();
|
||||||
|
|
||||||
|
long qsLen = gbstrlen(qs);
|
||||||
|
|
||||||
|
bool isString = false;
|
||||||
|
if ( strncmp(qs-4,"str:",4) == 0 ) isString = true;
|
||||||
|
|
||||||
|
long i = 0;
|
||||||
|
|
||||||
|
bool uniqueField = false;
|
||||||
|
|
||||||
|
SafeBuf nameBuf;
|
||||||
|
|
||||||
|
// find the first meta summary node
|
||||||
|
for ( i = 0 ; i < xml->m_numNodes ; i++ ) {
|
||||||
|
|
||||||
|
// skip text nodes
|
||||||
|
if ( xml->m_nodes[i].m_nodeId == 0 ) continue;
|
||||||
|
|
||||||
|
// assemble the full parent name
|
||||||
|
// like "tag1.tag2.tag3"
|
||||||
|
nameBuf.reset();
|
||||||
|
xml->getCompoundName ( i , &nameBuf );
|
||||||
|
long nameLen = nameBuf.length();
|
||||||
|
char *s = nameBuf.getBufStart();
|
||||||
|
|
||||||
|
// . does it have a type field that's "summary"
|
||||||
|
// . <meta name=summary content="...">
|
||||||
|
// . <meta http-equiv="refresh" content="0;URL=http://y.com/">
|
||||||
|
//s = xml->getString ( i , "name", &nameLen );
|
||||||
|
|
||||||
|
// "s" can be "summary","description","keywords",...
|
||||||
|
if ( nameLen != qsLen ) continue;
|
||||||
|
if ( strncasecmp ( s , qs , qsLen ) != 0 ) continue;
|
||||||
|
|
||||||
|
// got it...
|
||||||
|
|
||||||
|
// wtf?
|
||||||
|
if ( i + 1 >= xml->m_numNodes ) continue;
|
||||||
|
|
||||||
|
// point to the content! this is a text node?
|
||||||
|
|
||||||
|
// skip if not a text node, we don't return tag nodes i guess
|
||||||
|
if ( xml->m_nodes[i+1].m_nodeId ) continue;
|
||||||
|
|
||||||
|
char *content = xml->m_nodes[i+1].m_node;
|
||||||
|
long contentLen = xml->m_nodes[i+1].m_nodeLen;
|
||||||
|
|
||||||
|
// skip if empty
|
||||||
|
if ( ! content || contentLen <= 0 ) continue;
|
||||||
|
|
||||||
|
// skip commen cases too! like white space
|
||||||
|
if ( contentLen == 1 && is_wspace_a(content[0]) ) continue;
|
||||||
|
|
||||||
|
// hash it to match it if caller specified a particular hash
|
||||||
|
// because they are coming from Msg40::lookUpFacets() function
|
||||||
|
// to convert the hashes to strings, like for rendering in
|
||||||
|
// the facets box to the left of the search results
|
||||||
|
FacetValHash_t val32 = hash32 ( content, contentLen);
|
||||||
|
if ( fvh && fvh != val32 ) continue;
|
||||||
|
|
||||||
|
// otherwise add facet FIELD to our buf
|
||||||
|
if ( ! sb->safeStrcpy(qs) ) return false;
|
||||||
|
if ( ! sb->pushChar('\0') ) return false;
|
||||||
|
|
||||||
|
// then add facet VALUE
|
||||||
|
if ( isString && !sb->safePrintf("%lu,",(unsigned long)val32))
|
||||||
|
return false;
|
||||||
|
if ( !sb->safeMemcpy(content,contentLen) ) return false;
|
||||||
|
if ( !sb->pushChar('\0') ) return false;
|
||||||
|
|
||||||
|
// if only one specified, we are done
|
||||||
|
if ( fvh ) return true;
|
||||||
|
|
||||||
|
if ( uniqueField ) return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool XmlDoc::storeFacetValuesJSON (char *qs, SafeBuf *sb,FacetValHash_t fvh ) {
|
bool XmlDoc::storeFacetValuesJSON (char *qs, SafeBuf *sb,FacetValHash_t fvh ) {
|
||||||
|
|
||||||
// use new json parser
|
// use new json parser
|
||||||
|
8
XmlDoc.h
8
XmlDoc.h
@ -802,6 +802,7 @@ class XmlDoc {
|
|||||||
bool linksToGigablast ( ) ;
|
bool linksToGigablast ( ) ;
|
||||||
bool searchboxToGigablast ( ) ;
|
bool searchboxToGigablast ( ) ;
|
||||||
bool hashLanguage ( class HashTableX *table ) ;
|
bool hashLanguage ( class HashTableX *table ) ;
|
||||||
|
bool hashLanguageString ( class HashTableX *table ) ;
|
||||||
bool hashCountry ( class HashTableX *table ) ;
|
bool hashCountry ( class HashTableX *table ) ;
|
||||||
bool hashSiteNumInlinks ( class HashTableX *table ) ;
|
bool hashSiteNumInlinks ( class HashTableX *table ) ;
|
||||||
bool hashCharset ( class HashTableX *table ) ;
|
bool hashCharset ( class HashTableX *table ) ;
|
||||||
@ -917,6 +918,8 @@ class XmlDoc {
|
|||||||
FacetValHash_t fvh ) ;
|
FacetValHash_t fvh ) ;
|
||||||
bool storeFacetValuesHtml ( char *qs , class SafeBuf *sb ,
|
bool storeFacetValuesHtml ( char *qs , class SafeBuf *sb ,
|
||||||
FacetValHash_t fvh ) ;
|
FacetValHash_t fvh ) ;
|
||||||
|
bool storeFacetValuesXml ( char *qs , class SafeBuf *sb ,
|
||||||
|
FacetValHash_t fvh ) ;
|
||||||
bool storeFacetValuesJSON ( char *qs , class SafeBuf *sb ,
|
bool storeFacetValuesJSON ( char *qs , class SafeBuf *sb ,
|
||||||
FacetValHash_t fvh ) ;
|
FacetValHash_t fvh ) ;
|
||||||
|
|
||||||
@ -1695,7 +1698,8 @@ class XmlDoc {
|
|||||||
//bool doesUrlMatchDiffbotProcessPattern() ;
|
//bool doesUrlMatchDiffbotProcessPattern() ;
|
||||||
bool doesPageContentMatchDiffbotProcessPattern() ;
|
bool doesPageContentMatchDiffbotProcessPattern() ;
|
||||||
long *getDiffbotTitleHashes ( long *numHashes ) ;
|
long *getDiffbotTitleHashes ( long *numHashes ) ;
|
||||||
char *hashJSON ( HashTableX *table );
|
char *hashJSONFields ( HashTableX *table );
|
||||||
|
char *hashXMLFields ( HashTableX *table );
|
||||||
long *nukeJSONObjects ( long *newTitleHashes , long numNewHashes ) ;
|
long *nukeJSONObjects ( long *newTitleHashes , long numNewHashes ) ;
|
||||||
|
|
||||||
long m_joc;
|
long m_joc;
|
||||||
@ -2032,7 +2036,7 @@ class XmlDoc {
|
|||||||
Query m_query;
|
Query m_query;
|
||||||
Matches m_matches;
|
Matches m_matches;
|
||||||
// meta description buf
|
// meta description buf
|
||||||
long m_dbufLen;
|
long m_dbufSize;
|
||||||
char m_dbuf[1024];
|
char m_dbuf[1024];
|
||||||
SafeBuf m_htb;
|
SafeBuf m_htb;
|
||||||
Title m_title;
|
Title m_title;
|
||||||
|
@ -95,6 +95,7 @@ class XmlNode {
|
|||||||
// . use for <a href> xml nodes only right now
|
// . use for <a href> xml nodes only right now
|
||||||
// . used so XmlDoc.cpp::getContactUsLink() works better
|
// . used so XmlDoc.cpp::getContactUsLink() works better
|
||||||
//long m_linkNum;
|
//long m_linkNum;
|
||||||
|
class XmlNode *m_parent;
|
||||||
};
|
};
|
||||||
|
|
||||||
// . does "s" start a tag? (regular tag , back tag or comment tag)
|
// . does "s" start a tag? (regular tag , back tag or comment tag)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
gb (1.14-1) unstable; urgency=low
|
gb (1.16-1) unstable; urgency=low
|
||||||
|
|
||||||
* More bug fixes.
|
* More bug fixes.
|
||||||
|
|
||||||
-- mwells <gigablast@mail.com> Tue, 19 Sep 2014 21:38:35 -0700
|
-- mwells <gigablast@mail.com> Tue, 24 Sep 2014 21:38:35 -0700
|
||||||
|
24
hash.cpp
24
hash.cpp
@ -2,17 +2,30 @@
|
|||||||
|
|
||||||
unsigned long long g_hashtab[256][256] ;
|
unsigned long long g_hashtab[256][256] ;
|
||||||
|
|
||||||
|
// . now we explicitly specify the zobrist table so we are compatible
|
||||||
|
// with cygwin and apple environments
|
||||||
|
// . no, let's just define the rand2() function to be compatible then
|
||||||
|
//#include "hashtab.cpp"
|
||||||
|
|
||||||
// . used for computing zobrist hash of a string up to 256 chars long
|
// . used for computing zobrist hash of a string up to 256 chars long
|
||||||
// . first array component is the max length, 256, of the string
|
// . first array component is the max length, 256, of the string
|
||||||
bool hashinit () {
|
bool hashinit () {
|
||||||
static bool s_initialized = false;
|
static bool s_initialized = false;
|
||||||
// bail if we already called this
|
// bail if we already called this
|
||||||
if ( s_initialized ) return true;
|
if ( s_initialized ) return true;
|
||||||
|
|
||||||
// show RAND_MAX
|
// show RAND_MAX
|
||||||
//printf("RAND_MAX = %lu\n", RAND_MAX ); it's 0x7fffffff
|
//printf("RAND_MAX = %lu\n", RAND_MAX ); it's 0x7fffffff
|
||||||
// seed with same value so we get same rand sequence for all
|
// seed with same value so we get same rand sequence for all
|
||||||
srand ( 1945687 );
|
srand ( 1945687 );
|
||||||
for ( long i = 0 ; i < 256 ; i++ )
|
|
||||||
|
//if ( g_hashtab[0][0] != 6720717044602784129LL ) return false;
|
||||||
|
//s_initialized = true;
|
||||||
|
//return true;
|
||||||
|
|
||||||
|
//fprintf(stdout,"g_hashtab[256][256]={\n");
|
||||||
|
for ( long i = 0 ; i < 256 ; i++ ) {
|
||||||
|
//fprintf(stdout,"{");
|
||||||
for ( long j = 0 ; j < 256 ; j++ ) {
|
for ( long j = 0 ; j < 256 ; j++ ) {
|
||||||
g_hashtab [i][j] = (unsigned long long)rand();
|
g_hashtab [i][j] = (unsigned long long)rand();
|
||||||
// the top bit never gets set, so fix
|
// the top bit never gets set, so fix
|
||||||
@ -23,8 +36,17 @@ bool hashinit () {
|
|||||||
// the top bit never gets set, so fix
|
// the top bit never gets set, so fix
|
||||||
if ( rand() > (0x7fffffff / 2) )
|
if ( rand() > (0x7fffffff / 2) )
|
||||||
g_hashtab[i][j] |= 0x80000000;
|
g_hashtab[i][j] |= 0x80000000;
|
||||||
|
// fixes for cygwin/apple
|
||||||
|
//fprintf(stdout,"%lluULL",g_hashtab[i][j]);
|
||||||
|
//if ( j+1<256 ) fprintf(stdout,",");
|
||||||
}
|
}
|
||||||
|
//fprintf(stdout,"},\n");
|
||||||
|
}
|
||||||
|
//fprintf(stdout,"};\n");
|
||||||
|
//fflush ( stdout );
|
||||||
|
|
||||||
if ( g_hashtab[0][0] != 6720717044602784129LL ) return false;
|
if ( g_hashtab[0][0] != 6720717044602784129LL ) return false;
|
||||||
|
|
||||||
s_initialized = true;
|
s_initialized = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -22,7 +22,7 @@ with the least amount of hardware possible. Gigablast provides large-scale,
|
|||||||
</p>
|
</p>
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Fr more information, <a href=/contact.html>contact Gigablast</a>.
|
For more information, <a href=/contact.html>contact Gigablast</a>.
|
||||||
</P>
|
</P>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
50
html/adv.html
Normal file
50
html/adv.html
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
<br><br><br>
|
||||||
|
<form method=GET action=/search name=f>
|
||||||
|
|
||||||
|
<table width=605 border=0 align=center cellpadding=5 cellspacing=3>
|
||||||
|
|
||||||
|
<tbody>
|
||||||
|
|
||||||
|
<tr align=left valign=middle><th colspan=3>Search for...</th></tr><tr align=left valign=middle><td><strong>all</strong> of these words</td><td><input type=text id=q name=plus size=40 /></td><td><div onclick=document.f.submit(); onmouseover="this.style.backgroundColor='lightgreen';this.style.color='black';" onmouseout="this.style.backgroundColor='green';this.style.color='white';" style=border-radius:28px;cursor:pointer;cursor:hand;border-color:white;border-style:solid;border-width:3px;padding:12px;width:20px;height:20px;display:inline-block;background-color:green;color:white;><b style=margin-left:-5px;font-size:18px;>GO</b></div></td></tr><tr align=left valign=middle><td>this <strong>exact phrase</strong></td><td colspan=2><input type=text name=quotea size=40 /></td></tr><tr align=left valign=middle><td>and this <strong>exact phrase</strong></td><td colspan=2><input type=text name=quoteb size=40 /></td></tr>
|
||||||
|
|
||||||
|
<tr align=left valign=middle><td><strong>any</strong> of these words</td><td colspan=2><input type=text name=any size=40 /></td></tr>
|
||||||
|
|
||||||
|
<tr align=left valign=middle><td><strong>none</strong> of these words</td><td colspan=2><input type=text name=minus size=40 /></td></tr>
|
||||||
|
|
||||||
|
<!--<tr align=left valign=middle><td>Family Filter</td><td colspan=2><input type=radio name=ff value=1/>yes <input type=radio name=ff value=0 checked/>no</td></tr>-->
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<tr align=left valign=middle><td>In this language:</td><td colspan=2>
|
||||||
|
|
||||||
|
<select name=gblang style=width:415px;>
|
||||||
|
<option value=-1>Any</option>
|
||||||
|
<option value=0>Unknown</option>
|
||||||
|
|
||||||
|
<option value=1>English</option><option value=2>French</option><option value=3>Spanish</option><option value=4>Russian</option><option value=5>Turkish</option><option value=6>Japanese</option><option value=7>ChineseTrad</option><option value=8>ChineseSimp</option><option value=9>Korean</option><option value=10>German</option><option value=11>Dutch</option><option value=12>Italian</option><option value=13>Finnish</option><option value=14>Swedish</option><option value=15>Norwegian</option><option value=16>Portuguese</option><option value=17>Vietnamese</option><option value=18>Arabic</option><option value=19>Hebrew</option><option value=20>Indonesian</option><option value=21>Greek</option><option value=22>Thai</option><option value=23>Hindi</option><option value=24>Bengala</option><option value=25>Polish</option><option value=26>Tagalog</option></select></td></tr>
|
||||||
|
|
||||||
|
|
||||||
|
<tr align=left valign=middle><td>Pages that link to this URL</td><td colspan=2><input type=text name=link size=40 /></td></tr>
|
||||||
|
|
||||||
|
<tr><td>Search these collections</td><td><input type=text name=c size=40></td></tr>
|
||||||
|
|
||||||
|
|
||||||
|
<tr align=left valign=middle><td>Site Clustering</td><td colspan=2><input type=radio name=sc value=1/>yes <input type=radio name=sc value=0 checked/>no</td></tr>
|
||||||
|
|
||||||
|
<tr align=left valign=middle><td>Number of summary excerpts</td><td colspan=2><input type=radio name=ns value=0>0 <input type=radio name=ns value=1>1 <input type=radio name=ns value=2>2 <input type=radio name=ns value=3 checked>3 <input type=radio name=ns value=4>4 <input type=radio name=ns value=5>5</td></tr>
|
||||||
|
|
||||||
|
<tr align=left valign=middle><td>Results per Page</td>
|
||||||
|
|
||||||
|
<td colspan=2><input type=radio name=n value=10 checked/>10 <input type=radio name=n value=20 />20 <input type=radio name=n value=30 />30 <input type=radio name=n value=40 />40 <input type=radio name=n value=50 />50 <input type=radio name=n value=100 />100</td></tr>
|
||||||
|
|
||||||
|
|
||||||
|
<tr align=left valign=middle><td>Restrict to these Sites</td><td colspan=2><textarea rows=10 cols=56 name=sites></textarea></td></tr>
|
||||||
|
|
||||||
|
<tr><td></td><td><input type=submit></td></tr>
|
||||||
|
|
||||||
|
</tbody></table>
|
||||||
|
|
||||||
|
</form>
|
||||||
|
<br>
|
||||||
|
|
||||||
|
<br><br>
|
@ -137,7 +137,7 @@ Good luck!
|
|||||||
<td><b>HTTP API</b></td>
|
<td><b>HTTP API</b></td>
|
||||||
<!-- gb install -->
|
<!-- gb install -->
|
||||||
<td>
|
<td>
|
||||||
<a href=/api2.html>here</a>
|
<a href=/admin/api>here</a>
|
||||||
</td>
|
</td>
|
||||||
<!-- solr install-->
|
<!-- solr install-->
|
||||||
<td>
|
<td>
|
||||||
@ -262,7 +262,7 @@ Many different packages quilted together. Apache, MySQL, Lucene, Tika, Zookeeper
|
|||||||
<!--gigablast-->
|
<!--gigablast-->
|
||||||
<td>
|
<td>
|
||||||
<font color=green><b>
|
<font color=green><b>
|
||||||
Use curl using args (including <i>delim</i>) listed <a href=/api2.html#/admin/inject>here</a>
|
Use curl using args (including <i>delim</i>) listed <a href=/admin/api#/admin/inject>here</a>
|
||||||
</b></font>
|
</b></font>
|
||||||
<br>
|
<br>
|
||||||
</td>
|
</td>
|
||||||
@ -282,7 +282,7 @@ unsupported
|
|||||||
<!--gigablast-->
|
<!--gigablast-->
|
||||||
<td>
|
<td>
|
||||||
Use curl to post the content of the file with args listed
|
Use curl to post the content of the file with args listed
|
||||||
<a href=/api2.html#/admin/inject>here</a>
|
<a href=/admin/api#/admin/inject>here</a>
|
||||||
</td>
|
</td>
|
||||||
<!--solr-->
|
<!--solr-->
|
||||||
<td>
|
<td>
|
||||||
@ -300,7 +300,7 @@ You can index individual local files as such:
|
|||||||
<!--gigablast-->
|
<!--gigablast-->
|
||||||
<td>
|
<td>
|
||||||
Use curl to inject the url with args listed
|
Use curl to inject the url with args listed
|
||||||
<a href=/api2.html#/admin/inject>here</a>
|
<a href=/admin/api#/admin/inject>here</a>
|
||||||
|
|
||||||
</td>
|
</td>
|
||||||
<!--solr-->
|
<!--solr-->
|
||||||
@ -317,7 +317,7 @@ Use curl to inject the url with args listed
|
|||||||
<!--gigablast-->
|
<!--gigablast-->
|
||||||
<td>
|
<td>
|
||||||
Use one curl command for each url, using the interface described
|
Use one curl command for each url, using the interface described
|
||||||
<a href=/api2.html#/admin/inject>here</a></b>
|
<a href=/admin/api#/admin/inject>here</a></b>
|
||||||
</td>
|
</td>
|
||||||
<!--solr-->
|
<!--solr-->
|
||||||
<td>
|
<td>
|
||||||
@ -335,7 +335,7 @@ Use one curl command for each url, using the interface described
|
|||||||
<!--gigablast-->
|
<!--gigablast-->
|
||||||
<td>
|
<td>
|
||||||
Use curl command to delete a url, using the interface described
|
Use curl command to delete a url, using the interface described
|
||||||
<a href=/api2.html#/admin/inject>here</a></b>
|
<a href=/admin/api#/admin/inject>here</a></b>
|
||||||
</td>
|
</td>
|
||||||
<!--solr-->
|
<!--solr-->
|
||||||
<td>
|
<td>
|
||||||
@ -351,7 +351,7 @@ You can delete individual documents by specifying queries that match just those
|
|||||||
<td><b>Getting Results via cmdline</b></td>
|
<td><b>Getting Results via cmdline</b></td>
|
||||||
<td>
|
<td>
|
||||||
Use curl command to do a search, using the interface described
|
Use curl command to do a search, using the interface described
|
||||||
<a href=/api2.html#/search>here</a></b>
|
<a href=/admin/api#/search>here</a></b>
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
???
|
???
|
||||||
@ -882,3 +882,4 @@ and federated search across them.
|
|||||||
|
|
||||||
|
|
||||||
</table>
|
</table>
|
||||||
|
<br><br><br>
|
||||||
|
103
html/faq.html
103
html/faq.html
File diff suppressed because one or more lines are too long
BIN
html/rocket16.png
Normal file
BIN
html/rocket16.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 784 B |
13
html/searchbar.xml
Normal file
13
html/searchbar.xml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/" xmlns:moz="http://www.mozilla.org/2006/browser/search/">
|
||||||
|
<ShortName>GigaBlast</ShortName>
|
||||||
|
<Description>The Search Engine</Description>
|
||||||
|
<Language>en-us</Language>
|
||||||
|
<OutputEncoding>UTF-8</OutputEncoding>
|
||||||
|
<InputEncoding>UTF-8</InputEncoding>
|
||||||
|
<Image width="16" height="16" type="image/png">http://www.gigablast.com/rocket16.png</Image>
|
||||||
|
<Url type="text/html" method="GET" template="http://www.gigablast.com/search?q={searchTerms}&"></Url>
|
||||||
|
<Url type="application/xhtml+xml" indexOffset="0" template="http://www.gigablast.com/search?q={searchTerms}&"></Url>
|
||||||
|
<moz:SearchForm>www.gigablast.com</moz:SearchForm>
|
||||||
|
</OpenSearchDescription>
|
||||||
|
|
@ -1,7 +1,7 @@
|
|||||||
|
|
||||||
<br><br><br>
|
<br><br><br>
|
||||||
|
|
||||||
<h1>People that Use Gigablast</h1>
|
<h1>Gigablast Open Source Users</h1>
|
||||||
|
|
||||||
<table cellpadding=10 style=max-width:500px;>
|
<table cellpadding=10 style=max-width:500px;>
|
||||||
|
|
||||||
@ -55,4 +55,34 @@ search engine."
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</table>
|
||||||
|
|
||||||
|
|
||||||
|
<br><br>
|
||||||
|
|
||||||
|
<h1>Gigablast pre Open Source Users</h1>
|
||||||
|
|
||||||
|
<table cellpadding=10 style=max-width:500px;>
|
||||||
|
|
||||||
|
|
||||||
|
<tr><td>
|
||||||
|
<img width=80 height=120 src=/user1.jpeg>
|
||||||
|
</td><td>
|
||||||
|
Snap.com powered its web search technology by running Gigablast on over 100 servers. Snap was started my founder of Overture/GoTo.com, Bill Gross. Before it closed, Snap help pioneer the marketplace for ads that popup when you mouse over a link.
|
||||||
|
</td></tr>
|
||||||
|
|
||||||
|
|
||||||
|
<tr><td>
|
||||||
|
<img width=80 height=120 src=/user1.jpeg>
|
||||||
|
</td><td>
|
||||||
|
GlobalSpec.com used Gigablast to index and search over millions of technical products to help grow it into one of the market leaders for online technical and industrial product information.
|
||||||
|
</td></tr>
|
||||||
|
|
||||||
|
|
||||||
|
<tr><td>
|
||||||
|
<img width=80 height=120 src=/user1.jpeg>
|
||||||
|
</td><td>
|
||||||
|
MetaLincs embedded Gigablast into its commercial solution for E-mail discovery before being acquired by Seagate, Inc.
|
||||||
|
</td></tr>
|
||||||
|
|
||||||
</table>
|
</table>
|
||||||
|
5
main.cpp
5
main.cpp
@ -2691,7 +2691,8 @@ int main2 ( int argc , char *argv[] ) {
|
|||||||
// hash the term itself
|
// hash the term itself
|
||||||
termId = hash64n(targ);
|
termId = hash64n(targ);
|
||||||
// hash prefix with termhash
|
// hash prefix with termhash
|
||||||
termId = hash64(termId,prefix64);
|
if ( prefix64 )
|
||||||
|
termId = hash64(termId,prefix64);
|
||||||
termId &= TERMID_MASK;
|
termId &= TERMID_MASK;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -5598,6 +5599,7 @@ bool registerMsgHandlers2(){
|
|||||||
if(! g_udpServer.registerHandler(0x3f,handleRequest3f)) return false;
|
if(! g_udpServer.registerHandler(0x3f,handleRequest3f)) return false;
|
||||||
|
|
||||||
if ( ! g_udpServer.registerHandler(0x25,handleRequest25)) return false;
|
if ( ! g_udpServer.registerHandler(0x25,handleRequest25)) return false;
|
||||||
|
if ( ! g_udpServer.registerHandler(0x07,handleRequest7)) return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
@ -12886,6 +12888,7 @@ void dumpPosdb (char *coll,long startFileNum,long numFiles,bool includeTree,
|
|||||||
if ( termId >= 0 ) {
|
if ( termId >= 0 ) {
|
||||||
g_posdb.makeStartKey ( &startKey, termId );
|
g_posdb.makeStartKey ( &startKey, termId );
|
||||||
g_posdb.makeEndKey ( &endKey, termId );
|
g_posdb.makeEndKey ( &endKey, termId );
|
||||||
|
printf("termid=%llu\n",termId);
|
||||||
printf("startkey=%s\n",KEYSTR(&startKey,sizeof(POSDBKEY)));
|
printf("startkey=%s\n",KEYSTR(&startKey,sizeof(POSDBKEY)));
|
||||||
printf("endkey=%s\n",KEYSTR(&endKey,sizeof(POSDBKEY)));
|
printf("endkey=%s\n",KEYSTR(&endKey,sizeof(POSDBKEY)));
|
||||||
}
|
}
|
||||||
|
720
qa.cpp
720
qa.cpp
@ -211,6 +211,11 @@ void processReply ( char *reply , long replyLen ) {
|
|||||||
// # of collections in the admin page: ..."4 Collections"
|
// # of collections in the admin page: ..."4 Collections"
|
||||||
markOut(content,"px;color:black;\"><center><nobr><b>");
|
markOut(content,"px;color:black;\"><center><nobr><b>");
|
||||||
|
|
||||||
|
markOut(content,"spider is done (");
|
||||||
|
markOut(content,"spider is paused (");
|
||||||
|
markOut(content,"spider is active (");
|
||||||
|
markOut(content,"spider queue empty (");
|
||||||
|
|
||||||
// make checksum. we ignore back to back spaces so this
|
// make checksum. we ignore back to back spaces so this
|
||||||
// hash works for <docsInCollection>10 vs <docsInCollection>9
|
// hash works for <docsInCollection>10 vs <docsInCollection>9
|
||||||
long contentCRC = 0;
|
long contentCRC = 0;
|
||||||
@ -502,8 +507,6 @@ static long *s_flags = NULL;
|
|||||||
//
|
//
|
||||||
bool qainject1 ( ) {
|
bool qainject1 ( ) {
|
||||||
|
|
||||||
//if ( ! s_callback ) s_callback = qainject1;
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// delete the 'qatest123' collection
|
// delete the 'qatest123' collection
|
||||||
//
|
//
|
||||||
@ -520,7 +523,8 @@ bool qainject1 ( ) {
|
|||||||
//static bool s_x2 = false;
|
//static bool s_x2 = false;
|
||||||
if ( ! s_flags[1] ) {
|
if ( ! s_flags[1] ) {
|
||||||
s_flags[1] = true;
|
s_flags[1] = true;
|
||||||
if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" ,
|
if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1&"
|
||||||
|
"collectionips=127.0.0.1" ,
|
||||||
// checksum of reply expected
|
// checksum of reply expected
|
||||||
238170006 ) )
|
238170006 ) )
|
||||||
return false;
|
return false;
|
||||||
@ -529,13 +533,12 @@ bool qainject1 ( ) {
|
|||||||
// turn off images thumbnails
|
// turn off images thumbnails
|
||||||
if ( ! s_flags[17] ) {
|
if ( ! s_flags[17] ) {
|
||||||
s_flags[17] = true;
|
s_flags[17] = true;
|
||||||
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0",
|
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1",
|
||||||
// checksum of reply expected
|
// checksum of reply expected
|
||||||
238170006 ) )
|
238170006 ) )
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// this only loads once
|
// this only loads once
|
||||||
loadUrls();
|
loadUrls();
|
||||||
long max = s_ubuf2.length()/(long)sizeof(char *);
|
long max = s_ubuf2.length()/(long)sizeof(char *);
|
||||||
@ -605,6 +608,55 @@ bool qainject1 ( ) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// adv.html test
|
||||||
|
//
|
||||||
|
// query for 'test' using adv.html advanced search interface
|
||||||
|
if ( ! s_flags[27] ) {
|
||||||
|
s_flags[27] = true;
|
||||||
|
if ( ! getUrl (
|
||||||
|
"/search?c=qatest123&qa=17&format=xml&"
|
||||||
|
"dr=1&pss=50&sc=1&hacr=1"ea=web+site&"
|
||||||
|
"gblang=1&minus=transcripts&n=150",
|
||||||
|
123 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// &sites= test
|
||||||
|
if ( ! s_flags[28] ) {
|
||||||
|
s_flags[28] = true;
|
||||||
|
if ( ! getUrl (
|
||||||
|
"/search?c=qatest123&qa=17&format=xml&q=web&"
|
||||||
|
"sortby=2&"
|
||||||
|
// html only:
|
||||||
|
"sw=20&"
|
||||||
|
"filetype=html&"
|
||||||
|
"ff=1&"
|
||||||
|
"facet=gbfacetint:gbhopcount&"
|
||||||
|
"sites=mindtools.com+www.redcross.org"
|
||||||
|
, 123 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// html test of summary width
|
||||||
|
if ( ! s_flags[29] ) {
|
||||||
|
s_flags[29] = true;
|
||||||
|
if ( ! getUrl (
|
||||||
|
"/search?c=qatest123&qa=17&format=html&q=web&"
|
||||||
|
// html only:
|
||||||
|
"sw=20&tml=10&ns=1&smxcpl=30&qh=0&n=100&"
|
||||||
|
"dt=keywords+description&"
|
||||||
|
"facet=gbfacetint:gbspiderdate&"
|
||||||
|
, 123 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// stop for now
|
||||||
|
//return true; //
|
||||||
|
|
||||||
//
|
//
|
||||||
// eject/delete the urls
|
// eject/delete the urls
|
||||||
//
|
//
|
||||||
@ -682,7 +734,7 @@ bool qainject2 ( ) {
|
|||||||
// turn off images thumbnails
|
// turn off images thumbnails
|
||||||
if ( ! s_flags[17] ) {
|
if ( ! s_flags[17] ) {
|
||||||
s_flags[17] = true;
|
s_flags[17] = true;
|
||||||
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0",
|
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1",
|
||||||
// checksum of reply expected
|
// checksum of reply expected
|
||||||
238170006 ) )
|
238170006 ) )
|
||||||
return false;
|
return false;
|
||||||
@ -758,7 +810,7 @@ bool qainject2 ( ) {
|
|||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// mdw: query reindex test
|
// mdw: query DELETE test
|
||||||
//
|
//
|
||||||
if ( ! s_flags[30] ) {
|
if ( ! s_flags[30] ) {
|
||||||
s_flags[30] = true;
|
s_flags[30] = true;
|
||||||
@ -824,6 +876,406 @@ bool qainject2 ( ) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool qaimport () {
|
||||||
|
|
||||||
|
//
|
||||||
|
// delete the 'qatest123' collection
|
||||||
|
//
|
||||||
|
//static bool s_x1 = false;
|
||||||
|
if ( ! s_flags[0] ) {
|
||||||
|
s_flags[0] = true;
|
||||||
|
if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// add the 'qatest123' collection
|
||||||
|
//
|
||||||
|
//static bool s_x2 = false;
|
||||||
|
if ( ! s_flags[1] ) {
|
||||||
|
s_flags[1] = true;
|
||||||
|
if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" ,
|
||||||
|
// checksum of reply expected
|
||||||
|
238170006 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// turn spiders off so it doesn't spider while we are importing
|
||||||
|
if ( ! s_flags[18] ) {
|
||||||
|
s_flags[18] = true;
|
||||||
|
if ( ! getUrl ( "/admin/spider?cse=0&c=qatest123",
|
||||||
|
// checksum of reply expected
|
||||||
|
238170006 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// set the import dir and # inject threads
|
||||||
|
if ( ! s_flags[17] ) {
|
||||||
|
s_flags[17] = true;
|
||||||
|
if ( ! getUrl ( "/admin/import?c=qatest123&importdir=%2Fhome%2Fmwells%2Ftesting%2Fimport%2F&numimportinjects=3&import=1&action=submit",
|
||||||
|
// checksum of reply expected
|
||||||
|
238170006 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// wait for importloop to "kick in" so it can set cr->m_importState
|
||||||
|
if ( ! s_flags[3] ) {
|
||||||
|
wait(1.0);
|
||||||
|
s_flags[3] = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// import must be done!
|
||||||
|
if ( ! s_flags[19] ) {
|
||||||
|
CollectionRec *cr = g_collectiondb.getRec("qatest123");
|
||||||
|
// if still importing this will be non-null
|
||||||
|
if ( cr->m_importState ) {
|
||||||
|
wait(1.0);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// all done then
|
||||||
|
s_flags[19] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// wait for absorption of index
|
||||||
|
if ( ! s_flags[28] ) {
|
||||||
|
wait(2.0);
|
||||||
|
s_flags[28] = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// test query
|
||||||
|
if ( ! s_flags[16] ) {
|
||||||
|
s_flags[16] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe"
|
||||||
|
"&dsrt=500",
|
||||||
|
702467314 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// test site clustering
|
||||||
|
if ( ! s_flags[29] ) {
|
||||||
|
s_flags[29] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
|
||||||
|
"q=mediapost&dsrt=0&sc=1",
|
||||||
|
702467314 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//static bool s_fee2 = false;
|
||||||
|
if ( ! s_flags[13] ) {
|
||||||
|
s_flags[13] = true;
|
||||||
|
log("qa: SUCCESSFULLY COMPLETED DATA "
|
||||||
|
"IMPORT TEST");
|
||||||
|
//if ( s_callback == qainject ) exit(0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool qainlinks() {
|
||||||
|
|
||||||
|
//
|
||||||
|
// delete the 'qatest123' collection
|
||||||
|
//
|
||||||
|
//static bool s_x1 = false;
|
||||||
|
if ( ! s_flags[0] ) {
|
||||||
|
s_flags[0] = true;
|
||||||
|
if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// add the 'qatest123' collection
|
||||||
|
//
|
||||||
|
//static bool s_x2 = false;
|
||||||
|
if ( ! s_flags[1] ) {
|
||||||
|
s_flags[1] = true;
|
||||||
|
if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" ,
|
||||||
|
// checksum of reply expected
|
||||||
|
238170006 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// turn spiders off so it doesn't spider while we are importing
|
||||||
|
if ( ! s_flags[18] ) {
|
||||||
|
s_flags[18] = true;
|
||||||
|
if ( ! getUrl ( "/admin/spider?cse=0&c=qatest123",
|
||||||
|
// checksum of reply expected
|
||||||
|
238170006 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// inject youtube
|
||||||
|
if ( ! s_flags[2] ) {
|
||||||
|
s_flags[2] = true;
|
||||||
|
SafeBuf sb;
|
||||||
|
sb.safePrintf( "/admin/inject?c=qatest123&"
|
||||||
|
"format=xml&u=www.youtube.com");
|
||||||
|
if ( ! getUrl ( sb.getBufStart() , 999 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// test query
|
||||||
|
if ( ! s_flags[3] ) {
|
||||||
|
s_flags[3] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=youtube"
|
||||||
|
,702467314 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// scrape inlinkers
|
||||||
|
if ( ! s_flags[4] ) {
|
||||||
|
s_flags[4] = true;
|
||||||
|
SafeBuf sb;
|
||||||
|
sb.safePrintf( "/admin/inject?c=qatest123&"
|
||||||
|
"format=xml&qts=link:www.youtube.com&n=100");
|
||||||
|
if ( ! getUrl ( sb.getBufStart() , 999 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// inject better inlinkers
|
||||||
|
if ( ! s_flags[20] ) {
|
||||||
|
s_flags[20] = true;
|
||||||
|
SafeBuf sb;
|
||||||
|
sb.safePrintf( "/admin/inject?c=qatest123&"
|
||||||
|
"format=xml&"
|
||||||
|
"url=www.freebsd.org%%2Fcommunity.html");
|
||||||
|
if ( ! getUrl ( sb.getBufStart() , 999 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// wait a second for linkdb absorption
|
||||||
|
if ( ! s_flags[5] ) {
|
||||||
|
wait(1.0);
|
||||||
|
s_flags[5] = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// RE-inject youtube
|
||||||
|
if ( ! s_flags[6] ) {
|
||||||
|
s_flags[6] = true;
|
||||||
|
SafeBuf sb;
|
||||||
|
sb.safePrintf( "/admin/inject?c=qatest123&"
|
||||||
|
"format=xml&u=www.youtube.com");
|
||||||
|
if ( ! getUrl ( sb.getBufStart() , 999 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// wait a second term freq stabilization
|
||||||
|
if ( ! s_flags[9] ) {
|
||||||
|
wait(2.0);
|
||||||
|
s_flags[9] = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// test query
|
||||||
|
if ( ! s_flags[7] ) {
|
||||||
|
s_flags[7] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&"
|
||||||
|
"format=xml&q=youtube"
|
||||||
|
// get scoring info
|
||||||
|
"&scores=1"
|
||||||
|
,702467314 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//static bool s_fee2 = false;
|
||||||
|
if ( ! s_flags[13] ) {
|
||||||
|
s_flags[13] = true;
|
||||||
|
log("qa: SUCCESSFULLY COMPLETED INLINK TEST");
|
||||||
|
//if ( s_callback == qainject ) exit(0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// query reindex test
|
||||||
|
bool qareindex() {
|
||||||
|
|
||||||
|
//
|
||||||
|
// delete the 'qatest123' collection
|
||||||
|
//
|
||||||
|
//static bool s_x1 = false;
|
||||||
|
if ( ! s_flags[0] ) {
|
||||||
|
s_flags[0] = true;
|
||||||
|
if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// add the 'qatest123' collection
|
||||||
|
//
|
||||||
|
//static bool s_x2 = false;
|
||||||
|
if ( ! s_flags[1] ) {
|
||||||
|
s_flags[1] = true;
|
||||||
|
if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" ,
|
||||||
|
// checksum of reply expected
|
||||||
|
238170006 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// turn off images thumbnails
|
||||||
|
if ( ! s_flags[17] ) {
|
||||||
|
s_flags[17] = true;
|
||||||
|
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1",
|
||||||
|
// checksum of reply expected
|
||||||
|
238170006 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// this only loads once
|
||||||
|
loadUrls();
|
||||||
|
long max = s_ubuf2.length()/(long)sizeof(char *);
|
||||||
|
//max = 1;
|
||||||
|
|
||||||
|
//
|
||||||
|
// inject urls, return false if not done yet
|
||||||
|
//
|
||||||
|
//static bool s_x4 = false;
|
||||||
|
if ( ! s_flags[2] ) {
|
||||||
|
// TODO: try delimeter based injection too
|
||||||
|
//static long s_ii = 0;
|
||||||
|
for ( ; s_flags[20] < max ; ) {
|
||||||
|
// inject using html api
|
||||||
|
SafeBuf sb;
|
||||||
|
sb.safePrintf("&c=qatest123&deleteurl=0&"
|
||||||
|
"format=xml&u=");
|
||||||
|
sb.urlEncode ( s_urlPtrs[s_flags[20]] );
|
||||||
|
// the content
|
||||||
|
sb.safePrintf("&hasmime=1");
|
||||||
|
// sanity
|
||||||
|
//if ( strstr(s_urlPtrs[s_flags[20]],"wdc.htm") )
|
||||||
|
// log("hey");
|
||||||
|
sb.safePrintf("&content=");
|
||||||
|
sb.urlEncode(s_contentPtrs[s_flags[20]] );
|
||||||
|
sb.nullTerm();
|
||||||
|
// pre-inc it in case getUrl() blocks
|
||||||
|
s_flags[20]++;//ii++;
|
||||||
|
if ( ! getUrl("/admin/inject",
|
||||||
|
0, // no idea what crc to expect
|
||||||
|
sb.getBufStart()) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
s_flags[2] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// wait for absorption
|
||||||
|
if ( ! s_flags[3] ) {
|
||||||
|
wait(1.5);
|
||||||
|
s_flags[3] = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// query for 'test'
|
||||||
|
if ( ! s_flags[27] ) {
|
||||||
|
s_flags[27] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=17&format=xml&q=test&icc=1",
|
||||||
|
-1672870556 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// make 2nd url filter !isreindex just have 0 spiders so we do
|
||||||
|
// not spider the links from the REINDEXED PAGES
|
||||||
|
if ( ! s_flags[4] ) {
|
||||||
|
s_flags[4] = true;
|
||||||
|
SafeBuf sb;
|
||||||
|
sb.safePrintf("&c=qatest123&"
|
||||||
|
// make it the custom filter
|
||||||
|
"ufp=custom&"
|
||||||
|
// zero spiders if not isreindex
|
||||||
|
"fe1=default&hspl1=0&hspl1=1&fsf1=1.000000&"
|
||||||
|
"mspr1=0&mspi1=0&xg1=1000&fsp1=45&"
|
||||||
|
);
|
||||||
|
if ( ! getUrl ( "/admin/filters",0,sb.getBufStart()) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// do the query reindex on 'test'
|
||||||
|
if ( ! s_flags[16] ) {
|
||||||
|
s_flags[16] = true;
|
||||||
|
if ( ! getUrl ( "/admin/reindex?c=qatest123&qa=16&"
|
||||||
|
"format=xml&q=test"
|
||||||
|
, 702467314 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
checkagain2:
|
||||||
|
// wait until spider finishes. check the spider status page
|
||||||
|
// in json to see when completed
|
||||||
|
if ( ! s_flags[5] ) {
|
||||||
|
wait(3.0);
|
||||||
|
s_flags[5] = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// wait for all spiders to stop
|
||||||
|
if ( ! s_flags[15] ) {
|
||||||
|
s_flags[15] = true;
|
||||||
|
if ( ! getUrl ( "/admin/status?format=json&c=qatest123",0) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
//static bool s_k2 = false;
|
||||||
|
if ( ! s_flags[6] ) {
|
||||||
|
// ensure spiders are done.
|
||||||
|
// "Nothing currently available to spider"
|
||||||
|
if ( s_content&&!strstr(s_content,"Nothing currently avail")){
|
||||||
|
s_flags[5] = false;
|
||||||
|
s_flags[15] = false;
|
||||||
|
goto checkagain2;
|
||||||
|
}
|
||||||
|
s_flags[6] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// query for 'test' again after the reindex
|
||||||
|
//
|
||||||
|
if ( ! s_flags[14] ) {
|
||||||
|
s_flags[14] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=14&format=xml&q=test&icc=1",
|
||||||
|
-1672870556 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
//static bool s_fee2 = false;
|
||||||
|
if ( ! s_flags[13] ) {
|
||||||
|
s_flags[13] = true;
|
||||||
|
log("qa: SUCCESSFULLY COMPLETED "
|
||||||
|
"QUERY REINDEX");
|
||||||
|
//if ( s_callback == qainject ) exit(0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
static char *s_urls1 =
|
static char *s_urls1 =
|
||||||
" walmart.com"
|
" walmart.com"
|
||||||
@ -954,9 +1406,10 @@ bool qaspider1 ( ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// turn off images thumbnails
|
// turn off images thumbnails
|
||||||
|
// set max spiders to 1 for consistency!
|
||||||
if ( ! s_flags[24] ) {
|
if ( ! s_flags[24] ) {
|
||||||
s_flags[24] = true;
|
s_flags[24] = true;
|
||||||
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0",
|
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1",
|
||||||
// checksum of reply expected
|
// checksum of reply expected
|
||||||
238170006 ) )
|
238170006 ) )
|
||||||
return false;
|
return false;
|
||||||
@ -1220,7 +1673,7 @@ bool qaspider2 ( ) {
|
|||||||
// turn off images thumbnails
|
// turn off images thumbnails
|
||||||
if ( ! s_flags[24] ) {
|
if ( ! s_flags[24] ) {
|
||||||
s_flags[24] = true;
|
s_flags[24] = true;
|
||||||
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0",
|
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1",
|
||||||
// checksum of reply expected
|
// checksum of reply expected
|
||||||
238170006 ) )
|
238170006 ) )
|
||||||
return false;
|
return false;
|
||||||
@ -1417,7 +1870,7 @@ bool qascrape ( ) {
|
|||||||
// turn off images thumbnails
|
// turn off images thumbnails
|
||||||
if ( ! s_flags[24] ) {
|
if ( ! s_flags[24] ) {
|
||||||
s_flags[24] = true;
|
s_flags[24] = true;
|
||||||
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0",
|
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1",
|
||||||
// checksum of reply expected
|
// checksum of reply expected
|
||||||
238170006 ) )
|
238170006 ) )
|
||||||
return false;
|
return false;
|
||||||
@ -1536,7 +1989,7 @@ bool qajson ( ) {
|
|||||||
// turn off images thumbnails
|
// turn off images thumbnails
|
||||||
if ( ! s_flags[24] ) {
|
if ( ! s_flags[24] ) {
|
||||||
s_flags[24] = true;
|
s_flags[24] = true;
|
||||||
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0",
|
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1",
|
||||||
// checksum of reply expected
|
// checksum of reply expected
|
||||||
238170006 ) )
|
238170006 ) )
|
||||||
return false;
|
return false;
|
||||||
@ -1716,6 +2169,225 @@ bool qajson ( ) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static char *s_ubuf5 =
|
||||||
|
"http://www.thompsoncancer.com/News/RSSLocation2.ashx?sid=7 "
|
||||||
|
"http://www.jdlculaval.com/xmlrpc.php?rsd "
|
||||||
|
"http://pharmacept.com/feed/ "
|
||||||
|
"http://www.web-erfolg.net/feed/ "
|
||||||
|
"http://www.extremetriathlon.org/site/feed/ "
|
||||||
|
"http://www.pilatesplusdublin.ie/wp-includes/wlwmanifest.xml "
|
||||||
|
"http://www.youtube.com/oembed?url=http%3A//www.youtube.com/watch?v%3Dv0lZQVaXSyM&format=xml "
|
||||||
|
"http://www.ehow.com/feed/home/garden-lawn/lawn-mowers.rss "
|
||||||
|
"http://www.functionaltrainingpro.com/xmlrpc.php?rsd "
|
||||||
|
"http://mississippisociety.com/index.php/feed "
|
||||||
|
;
|
||||||
|
;
|
||||||
|
|
||||||
|
bool qaxml ( ) {
|
||||||
|
//
|
||||||
|
// delete the 'qatest123' collection
|
||||||
|
//
|
||||||
|
//static bool s_x1 = false;
|
||||||
|
if ( ! s_flags[0] ) {
|
||||||
|
s_flags[0] = true;
|
||||||
|
if ( ! getUrl ( "/admin/delcoll?xml=1&delcoll=qatest123" ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// add the 'qatest123' collection
|
||||||
|
//
|
||||||
|
//static bool s_x2 = false;
|
||||||
|
if ( ! s_flags[1] ) {
|
||||||
|
s_flags[1] = true;
|
||||||
|
if ( ! getUrl ( "/admin/addcoll?addcoll=qatest123&xml=1" ,
|
||||||
|
// checksum of reply expected
|
||||||
|
238170006 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// turn off images thumbnails
|
||||||
|
if ( ! s_flags[24] ) {
|
||||||
|
s_flags[24] = true;
|
||||||
|
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1",
|
||||||
|
// checksum of reply expected
|
||||||
|
238170006 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// add the 50 urls
|
||||||
|
if ( ! s_flags[3] ) {
|
||||||
|
s_flags[3] = true;
|
||||||
|
SafeBuf sb;
|
||||||
|
|
||||||
|
sb.safePrintf("&c=qatest123"
|
||||||
|
"&format=json"
|
||||||
|
"&strip=1"
|
||||||
|
"&spiderlinks=0"
|
||||||
|
"&urls="//www.walmart.com+ibm.com"
|
||||||
|
);
|
||||||
|
sb.urlEncode ( s_ubuf5 );
|
||||||
|
// . now a list of websites we want to spider
|
||||||
|
// . the space is already encoded as +
|
||||||
|
if ( ! getUrl ( "/admin/addurl",0,sb.getBufStart()) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// wait for spidering to stop
|
||||||
|
//
|
||||||
|
checkagain:
|
||||||
|
|
||||||
|
// wait until spider finishes. check the spider status page
|
||||||
|
// in json to see when completed
|
||||||
|
//static bool s_k1 = false;
|
||||||
|
if ( ! s_flags[5] ) {
|
||||||
|
// wait 5 seconds, call sleep timer... then call qatest()
|
||||||
|
//usleep(5000000); // 5 seconds
|
||||||
|
wait(3.0);
|
||||||
|
s_flags[5] = true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! s_flags[15] ) {
|
||||||
|
s_flags[15] = true;
|
||||||
|
if ( ! getUrl ( "/admin/status?format=json&c=qatest123",0) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
//static bool s_k2 = false;
|
||||||
|
if ( ! s_flags[6] ) {
|
||||||
|
// ensure spiders are done.
|
||||||
|
// "Nothing currently available to spider"
|
||||||
|
if ( s_content&&!strstr(s_content,"Nothing currently avail")){
|
||||||
|
s_flags[5] = false;
|
||||||
|
s_flags[15] = false;
|
||||||
|
goto checkagain;
|
||||||
|
}
|
||||||
|
s_flags[6] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if ( ! s_flags[7] ) {
|
||||||
|
s_flags[7] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
|
||||||
|
"q=type%3Axml+oembed.type%3Avideo",
|
||||||
|
-1310551262 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! s_flags[8] ) {
|
||||||
|
s_flags[8] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
|
||||||
|
"q=video",
|
||||||
|
-1310551262 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! s_flags[9] ) {
|
||||||
|
s_flags[9] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
|
||||||
|
"q=oembed.thumbnail_height%3A360",
|
||||||
|
-1310551262 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! s_flags[10] ) {
|
||||||
|
s_flags[10] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
|
||||||
|
"q=gbminint%3Aoembed.thumbnail_height%3A380",
|
||||||
|
-1310551262 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// other query tests...
|
||||||
|
if ( ! s_flags[12] ) {
|
||||||
|
s_flags[12] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
|
||||||
|
"q=gbmaxint%3Aoembed.thumbnail_height%3A380",
|
||||||
|
-1310551262 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! s_flags[13] ) {
|
||||||
|
s_flags[13] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
|
||||||
|
"q=rss.channel.item.title%3Abests",
|
||||||
|
-1310551262 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if ( ! s_flags[14] ) {
|
||||||
|
s_flags[14] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
|
||||||
|
"q=gbfacetstr%3Arss.channel.title",
|
||||||
|
-1310551262 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
if ( ! s_flags[15] ) {
|
||||||
|
s_flags[15] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
|
||||||
|
"q=gbfieldmatch%3Astrings.key"
|
||||||
|
"%3A\"Maemo+Browser\"",
|
||||||
|
-1310551262 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! s_flags[16] ) {
|
||||||
|
s_flags[16] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
|
||||||
|
"q=gbfieldmatch%3Astrings.key"
|
||||||
|
"%3A\"Google+Wireless+Transcoder\"",
|
||||||
|
-1310551262 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// this should have no results, not capitalized
|
||||||
|
if ( ! s_flags[17] ) {
|
||||||
|
s_flags[17] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
|
||||||
|
"q=gbfieldmatch%3Astrings.key%3A\"samsung\"",
|
||||||
|
-1310551262 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! s_flags[18] ) {
|
||||||
|
s_flags[18] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
|
||||||
|
"q=gbfieldmatch%3Astrings.key%3ASamsung",
|
||||||
|
-1310551262 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! s_flags[18] ) {
|
||||||
|
s_flags[18] = true;
|
||||||
|
if ( ! getUrl ( "/search?c=qatest123&qa=1&format=json&"
|
||||||
|
"q=gbfieldmatch%3Astrings.key%3A\"Samsung\"",
|
||||||
|
-1310551262 ) )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
//static bool s_fee2 = false;
|
||||||
|
if ( ! s_flags[20] ) {
|
||||||
|
s_flags[20] = true;
|
||||||
|
log("qa: SUCCESSFULLY COMPLETED "
|
||||||
|
"QA XML TEST");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
bool qaspider ( ) {
|
bool qaspider ( ) {
|
||||||
@ -1739,7 +2411,8 @@ static QATest s_qatests[] = {
|
|||||||
{qainject1,
|
{qainject1,
|
||||||
"injectTest1",
|
"injectTest1",
|
||||||
"Test injection api. Test injection of multiple urls with content. "
|
"Test injection api. Test injection of multiple urls with content. "
|
||||||
"Test deletion of urls via inject api."},
|
"Test deletion of urls via inject api. Test most query api parms. "
|
||||||
|
"Test advanced search parms."},
|
||||||
|
|
||||||
{qainject2,
|
{qainject2,
|
||||||
"injectTest2",
|
"injectTest2",
|
||||||
@ -1760,9 +2433,26 @@ static QATest s_qatests[] = {
|
|||||||
"Scrape and inject results from google and bing."},
|
"Scrape and inject results from google and bing."},
|
||||||
|
|
||||||
{qajson,
|
{qajson,
|
||||||
"jsontest",
|
"jsonTest",
|
||||||
"Add Url some JSON pages and test json-ish queries. Test facets over "
|
"Add Url some JSON pages and test json-ish queries. Test facets over "
|
||||||
"json docs."}
|
"json docs."},
|
||||||
|
|
||||||
|
{qaxml,
|
||||||
|
"xmlTest",
|
||||||
|
"Add Url some XML pages and test xml-ish queries. Test facets over "
|
||||||
|
"xml docs."},
|
||||||
|
|
||||||
|
{qaimport,
|
||||||
|
"importDataTest",
|
||||||
|
"Test data import functionality. Test site clustering."},
|
||||||
|
|
||||||
|
{qainlinks,
|
||||||
|
"inlinksTest",
|
||||||
|
"Test youtube inlinks. Test EDOCUNCHANGED iff just inlinks change."},
|
||||||
|
|
||||||
|
{qareindex,
|
||||||
|
"queryReindexTest",
|
||||||
|
"Test query reindex function. Ensure changed docs are updated."}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -2014,5 +2704,3 @@ bool sendPageQA ( TcpSocket *sock , HttpRequest *hr ) {
|
|||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user