spider proxy updates

This commit is contained in:
mwells 2014-06-02 13:18:18 -07:00
parent 0b9b77ea46
commit 806cf79b73
9 changed files with 96 additions and 26 deletions

View File

@ -87,7 +87,10 @@ bool HttpRequest::set (char *url,long offset,long size,time_t ifModifiedSince,
char *userAgent , char *proto , bool doPost ,
char *cookie , char *additionalHeader ,
// if posting something, how many bytes is it?
long postContentLen ) {
long postContentLen ,
// are we sending the request through an http proxy?
// if so this will be non-zero
long proxyIp ) {
m_reqBufValid = false;
@ -96,6 +99,9 @@ bool HttpRequest::set (char *url,long offset,long size,time_t ifModifiedSince,
char *hptr = getHostFast ( url , &hlen , &port );
char *path = getPathFast ( url );
// use the full url if sending to an http proxy
if ( proxyIp ) path = url;
char *pathEnd = NULL;
char *postData = NULL;
if ( doPost ) {

View File

@ -54,7 +54,8 @@ class HttpRequest {
bool doPost = false ,
char *cookie = NULL ,
char *additionalHeader = NULL , // does not incl \r\n
long postContentLen = -1 ); // for content-length of POST
long postContentLen = -1 , // for content-length of POST
long proxyIp = 0 );
// use this
SafeBuf m_reqBuf;

View File

@ -136,6 +136,10 @@ bool HttpServer::getDoc ( char *url ,
if ( ip == -1 )
log("http: you probably didn't mean to set ip=-1 did you? "
"try setting to 0.");
// ignore if -1 as well
if ( proxyIp == -1 ) proxyIp = 0;
//log(LOG_WARN, "http: get doc %s", url->getUrl());
// use the HttpRequest class
HttpRequest r;
@ -165,7 +169,12 @@ bool HttpServer::getDoc ( char *url ,
if ( ! fullRequest ) {
if ( ! r.set ( url , offset , size , ifModifiedSince ,
userAgent , proto , doPost , cookie ,
additionalHeader , pcLen ) ) return true;
// pass in proxyIp because if it is a
// request being sent to a proxy we have to
// say "GET http://www.xyz.com/" the full
// url, not just a relative path.
additionalHeader , pcLen , proxyIp ) )
return true;
reqSize = r.getRequestLen();
req = (char *) mmalloc( reqSize + pcLen ,"HttpServer");
if ( req )

View File

@ -1127,6 +1127,9 @@ bool Parms::printParmTable ( SafeBuf *sb , TcpSocket *s , HttpRequest *r ) {
// this must be outside of table, submit button follows
if ( fmt == FORMAT_HTML ) sb->safePrintf ( "<br>\n" );
if ( page == PAGE_SPIDERPROXIES )
printSpiderProxyTable ( sb );
// url filter page has a test table
if ( page == PAGE_FILTERS && fmt == FORMAT_HTML ) {
// wrap up the form, print a submit button

View File

@ -587,10 +587,6 @@ bool Process::isAnyTreeSaving ( ) {
void powerMonitorWrapper ( int fd , void *state ) {
if ( g_isYippy ) return;
// also download test urls from spider proxies to ensure they
// are up and running properly
downloadTestUrlFromProxies();
// only if in matt wells datacenter
if ( ! g_conf.m_isMattWells )
return;
@ -849,6 +845,11 @@ void doneCmdWrapper ( void *state ) {
}
void hdtempWrapper ( int fd , void *state ) {
// also download test urls from spider proxies to ensure they
// are up and running properly
downloadTestUrlFromProxies();
// reset this... why?
g_errno = 0;
// do not get if already getting
@ -1789,6 +1790,9 @@ bool Process::saveBlockingFiles1 ( ) {
// save the login table
g_users.save();
// save stats on spider proxies if any
saveSpiderProxyStats();
// save the query log buffer if it was modified by the
// runSeoQueryLoop() in seo.cpp which updates its
// QueryLogEntry::m_minTop50Score member and corresponding timestamp

View File

@ -5156,6 +5156,11 @@ char *Proxy::storeLoginBar ( char *reply ,
newReply[len] = c;
return newReply;
}
// temp fix take it out because it is not working right
mp[0] = 'x';
return newReply;
// point to first digit in there
mp += 16;
// store our new content length as ascii into test buf

View File

@ -3378,23 +3378,26 @@ bool SafeBuf::base64Encode ( char *sx , long len , long niceness ) {
}
// "ts" is a delta-t in seconds
bool SafeBuf::printTimeAgo ( long ts , long now ) {
bool SafeBuf::printTimeAgo ( long ago , long now ) {
// Jul 23, 1971
if ( ! reserve2x(200) ) return false;
// for printing
long secs = 1000;
long mins = 1000;
long hrs = 1000;
long days ;
if ( ts > 0 ) {
mins = (long)((now - ts)/60);
hrs = (long)((now - ts)/3600);
days = (long)((now - ts)/(3600*24));
if ( ago > 0 ) {
secs = (long)((ago)/1);
mins = (long)((ago)/60);
hrs = (long)((ago)/3600);
days = (long)((ago)/(3600*24));
if ( mins < 0 ) mins = 0;
if ( hrs < 0 ) hrs = 0;
if ( days < 0 ) days = 0;
}
// print the time ago
if ( mins ==1)safePrintf("%li minute ago",mins);
if ( mins==0 ) safePrintf("%li seconds ago",secs);
else if ( mins ==1)safePrintf("%li minute ago",mins);
else if (mins<60)safePrintf ( "%li minutes ago",mins);
else if ( hrs == 1 )safePrintf ( "%li hour ago",hrs);
else if ( hrs < 24 )safePrintf ( "%li hours ago",hrs);
@ -3402,7 +3405,8 @@ bool SafeBuf::printTimeAgo ( long ts , long now ) {
else if (days< 7 )safePrintf ( "%li days ago",days);
// do not show if more than 1 wk old! we want to seem as
// fresh as possible
else if ( ts > 0 ) { // && si->m_isAdmin ) {
else if ( ago > 0 ) { // && si->m_isAdmin ) {
long ts = now - ago;
struct tm *timeStruct = localtime ( &ts );
char tmp[100];
strftime(tmp,100,"%b %d %Y",timeStruct);

View File

@ -131,8 +131,8 @@ bool buildProxyTable ( ) {
msg = "not enough digits for an ip";
if ( pc > 1 )
msg = "too many colons";
if ( dc != 4 )
msg = "need 4 dots for an ip address";
if ( dc != 3 )
msg = "need 3 dots for an ip address";
if ( bc )
msg = "got illegal char in ip:port listing";
if ( msg ) {
@ -155,7 +155,7 @@ bool buildProxyTable ( ) {
// and the port default is 80
long port = 80;
if ( portStr ) port = atol2(portStr,s-portStr);
if ( portStr ) port = atol2(portStr+1,s-portStr-1);
if ( port < 0 || port > 65535 ) {
log("spider: got bad proxy port for %s",p);
return false;
@ -174,6 +174,9 @@ bool buildProxyTable ( ) {
// see if in table
long islot = s_iptab.getSlot( &ipKey);
// advance p
p = s;
// if in there, keep it as is
if ( islot >= 0 ) continue;
@ -206,6 +209,17 @@ bool buildProxyTable ( ) {
return true;
}
// save the stats
bool saveSpiderProxyStats ( ) {
// save hash table
return s_iptab.save(g_hostdb.m_dir,"spiderproxystats.dat");
}
bool loadSpiderProxyStats ( ) {
// save hash table
return s_iptab.load(g_hostdb.m_dir,"spiderproxystats.dat");
}
// . we call this from Parms.cpp which prints out the proxy related controls
// and this table below them...
// . allows user to see the stats of each spider proxy
@ -265,7 +279,8 @@ bool printSpiderProxyTable ( SafeBuf *sb ) {
char *bg = LIGHT_BLUE;
// mark with light red bg if last test url attempt failed
if ( sp->m_lastDownloadTookMS == -1 )
if ( sp->m_lastDownloadTookMS == -1 &&
sp->m_lastDownloadTestAttemptMS>0 )
bg = "ffa6a6";
// print it
@ -279,23 +294,31 @@ bool printSpiderProxyTable ( SafeBuf *sb ) {
);
// last SUCCESSFUL download time ago. when it completed.
long ago = now - sp->m_lastSuccessfulTestMS;
long ago = now - sp->m_lastSuccessfulTestMS/1000;
sb->safePrintf("<td>");
// like 1 minute ago etc.
sb->printTimeAgo ( ago , now );
if ( sp->m_lastSuccessfulTestMS <= 0 )
sb->safePrintf("none");
else
sb->printTimeAgo ( ago , now );
sb->safePrintf("</td>");
// last download time ago
ago = now - sp->m_lastDownloadTestAttemptMS;
ago = now - sp->m_lastDownloadTestAttemptMS/1000;
sb->safePrintf("<td>");
// like 1 minute ago etc.
sb->printTimeAgo ( ago , now );
if ( sp->m_lastDownloadTestAttemptMS<= 0 )
sb->safePrintf("none");
else
sb->printTimeAgo ( ago , now );
sb->safePrintf("</td>");
// how long to download the test url?
if ( sp->m_lastDownloadTookMS != -1 )
sb->safePrintf("<td>%lims</td>",
(long)sp->m_lastDownloadTookMS);
else if ( sp->m_lastDownloadTestAttemptMS<= 0 )
sb->safePrintf("<td>unknown</td>");
else
sb->safePrintf("<td>"
"<font color=red>FAILED</font>"
@ -304,7 +327,7 @@ bool printSpiderProxyTable ( SafeBuf *sb ) {
sb->safePrintf("</tr>\n");
}
sb->safePrintf("</table>");
sb->safePrintf("</table><br>");
return true;
}
@ -320,6 +343,10 @@ void gotTestUrlReplyWrapper ( void *state , TcpSocket *s ) {
// free that thing
//mfree ( ss , sizeof(spip) ,"spip" );
// note it
log("sproxy: got test url reply: %s",
s->m_readBuf);
// we can get the spider proxy ip/port from the socket because
// we sent this url download request to that spider proxy
unsigned long long key = (unsigned long)s->m_ip;
@ -360,7 +387,7 @@ bool downloadTestUrlFromProxies ( ) {
Host *h0 = g_hostdb.getFirstAliveHost();
if ( g_hostdb.m_myHost != h0 ) return true;
long nowms = gettimeofdayInMillisecondsLocal();
long long nowms = gettimeofdayInMillisecondsLocal();
for ( long i = 0 ; i < s_iptab.getNumSlots() ; i++ ) {
@ -371,8 +398,8 @@ bool downloadTestUrlFromProxies ( ) {
long long elapsed = nowms - sp->m_lastDownloadTestAttemptMS;
// hit test url once per minute
if ( elapsed < 60 ) continue;
// hit test url once per 31 seconds
if ( elapsed < 31000 ) continue;
// or if never came back yet!
if ( sp->m_isWaiting ) continue;
@ -641,6 +668,11 @@ bool initSpiderProxyStuff() {
if ( ! g_udpServer.registerHandler ( 0x55, handleRequest55 ))
return false;
// key is ip/port
s_iptab.set(8,sizeof(SpiderProxy),0,NULL,0,false,0,"siptab");
loadSpiderProxyStats();
// build the s_iptab hashtable for the first time
buildProxyTable ();

View File

@ -11,4 +11,10 @@ bool downloadTestUrlFromProxies();
// called by Parms.cpp when user changes the list of proxyips
bool buildProxyTable ( );
// show spider proxy stats, called by Parms.cpp
bool printSpiderProxyTable ( SafeBuf *sb ) ;
// save stats on the spider proxies if any
bool saveSpiderProxyStats();
#endif