add qa page

This commit is contained in:
mwells 2014-07-25 17:39:29 -07:00
parent 0460c3cc45
commit 2a094accff
8 changed files with 179 additions and 48 deletions

View File

@ -248,7 +248,7 @@ bool Conf::init ( char *dir ) { // , long hostId ) {
// hack this off until the overrun bug is fixed // hack this off until the overrun bug is fixed
g_conf.m_datedbMaxCacheMem = 0; g_conf.m_datedbMaxCacheMem = 0;
g_conf.m_qaBuildMode = true;// false //g_conf.m_qaBuildMode = true;// false
// force on for now // force on for now
g_conf.m_useStatsdb = true; g_conf.m_useStatsdb = true;

2
Conf.h
View File

@ -339,7 +339,7 @@ class Conf {
// for caching exact quotas in Msg36.cpp // for caching exact quotas in Msg36.cpp
// used by qa.cpp and Msg13.cpp // used by qa.cpp and Msg13.cpp
bool m_qaBuildMode; //bool m_qaBuildMode;
//long m_quotaTableMaxMem; //long m_quotaTableMaxMem;

View File

@ -741,7 +741,8 @@ bool Images::downloadImage ( ) {
r->m_urlIp = m_latestIp; r->m_urlIp = m_latestIp;
if ( ! strcmp(cr->m_coll,"qatest123")) { if ( ! strcmp(cr->m_coll,"qatest123")) {
r->m_useTestCache = 1; r->m_useTestCache = 1;
if ( g_conf.m_qaBuildMode ) r->m_addToTestCache = 1; //if ( g_conf.m_qaBuildMode ) r->m_addToTestCache = 1;
r->m_addToTestCache = 1;
} }
// url is the most important // url is the most important
strcpy(r->m_url,m_imageUrl.getUrl()); strcpy(r->m_url,m_imageUrl.getUrl());

View File

@ -90,7 +90,8 @@ static WebPage s_pages[] = {
{ PAGE_DIRECTORY , "dir" , 0 , "directory" , 0 , 0 , { PAGE_DIRECTORY , "dir" , 0 , "directory" , 0 , 0 ,
//USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_CLIENT, //USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_CLIENT,
"directory", "directory",
sendPageDirectory , 0 ,NULL,NULL,0}, // until api is ready, take this out of the menu
sendPageDirectory , 0 ,NULL,NULL,PG_NOAPI},
{ PAGE_REPORTSPAM , "reportspam" , 0 , "report spam" , 0 , 0 , { PAGE_REPORTSPAM , "reportspam" , 0 , "report spam" , 0 , 0 ,
//USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_PROXY | USER_CLIENT, //USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_PROXY | USER_CLIENT,
"report spam", "report spam",
@ -155,7 +156,7 @@ static WebPage s_pages[] = {
{ PAGE_REPAIR , "admin/repair" , 0 , "repair" , 1 , 0 , { PAGE_REPAIR , "admin/repair" , 0 , "repair" , 1 , 0 ,
//USER_MASTER , //USER_MASTER ,
"repair data", "repair data",
sendPageGeneric , 0 ,NULL,NULL,0}, sendPageGeneric , 0 ,NULL,NULL,PG_NOAPI},
// { PAGE_SITES , "admin/sites", 0 , "site list" , 1 , 1, // { PAGE_SITES , "admin/sites", 0 , "site list" , 1 , 1,
// "what sites can be spidered", // "what sites can be spidered",
// sendPageGeneric , 0 ,NULL,NULL,PG_NOAPI}, // sendPageBasicSettings // sendPageGeneric , 0 ,NULL,NULL,PG_NOAPI}, // sendPageBasicSettings
@ -195,22 +196,22 @@ static WebPage s_pages[] = {
{ PAGE_GRAPH , "admin/graph" , 0 , "graph" , 0 , 0 , { PAGE_GRAPH , "admin/graph" , 0 , "graph" , 0 , 0 ,
//USER_MASTER , //USER_MASTER ,
"query stats graph", "query stats graph",
sendPageGraph , 2 /*niceness*/ ,NULL,NULL,PG_STATUS}, sendPageGraph , 2 /*niceness*/ ,NULL,NULL,PG_STATUS|PG_NOAPI},
{ PAGE_PERF , "admin/perf" , 0 , "performance" , 0 , 0 , { PAGE_PERF , "admin/perf" , 0 , "performance" , 0 , 0 ,
//USER_MASTER | USER_PROXY , //USER_MASTER | USER_PROXY ,
"function performance graph", "function performance graph",
sendPagePerf , 0 ,NULL,NULL,PG_STATUS}, sendPagePerf , 0 ,NULL,NULL,PG_STATUS|PG_NOAPI},
{ PAGE_SOCKETS , "admin/sockets" , 0 , "sockets" , 0 , 0 , { PAGE_SOCKETS , "admin/sockets" , 0 , "sockets" , 0 , 0 ,
//USER_MASTER | USER_PROXY, //USER_MASTER | USER_PROXY,
"sockets", "sockets",
sendPageSockets , 0 ,NULL,NULL,PG_STATUS}, sendPageSockets , 0 ,NULL,NULL,PG_STATUS|PG_NOAPI},
{ PAGE_LOGVIEW , "admin/logview" , 0 , "log view" , 0 , 0 , { PAGE_LOGVIEW , "admin/logview" , 0 , "log view" , 0 , 0 ,
//USER_MASTER , //USER_MASTER ,
"logview", "logview",
sendPageLogView , 0 ,NULL,NULL,PG_STATUS}, sendPageLogView , 0 ,NULL,NULL,PG_STATUS|PG_NOAPI},
// { PAGE_SYNC , "master/sync" , 0 , "sync" , 0 , 0 , // { PAGE_SYNC , "master/sync" , 0 , "sync" , 0 , 0 ,
// //USER_MASTER , // //USER_MASTER ,
// "sync", // "sync",
@ -219,7 +220,7 @@ static WebPage s_pages[] = {
{ PAGE_AUTOBAN ,"admin/autoban" , 0 , "autoban" , 1 , M_POST , { PAGE_AUTOBAN ,"admin/autoban" , 0 , "autoban" , 1 , M_POST ,
//USER_MASTER | USER_PROXY , //USER_MASTER | USER_PROXY ,
"autobanned ips", "autobanned ips",
sendPageAutoban , 0 ,NULL,NULL,0}, sendPageAutoban , 0 ,NULL,NULL,PG_NOAPI},
/* /*
{ PAGE_SPIDERLOCKS,"admin/spiderlocks" , 0 , "spider locks" , 0 , 0 , { PAGE_SPIDERLOCKS,"admin/spiderlocks" , 0 , "spider locks" , 0 , 0 ,
USER_MASTER , sendPageSpiderLocks , 0 ,NULL,NULL,PG_NOAPI}, USER_MASTER , sendPageSpiderLocks , 0 ,NULL,NULL,PG_NOAPI},
@ -231,7 +232,7 @@ static WebPage s_pages[] = {
{ PAGE_THREADS , "admin/threads" , 0 , "threads" , 0 , 0 , { PAGE_THREADS , "admin/threads" , 0 , "threads" , 0 , 0 ,
//USER_MASTER , //USER_MASTER ,
"threads", "threads",
sendPageThreads , 0 ,NULL,NULL,PG_STATUS}, sendPageThreads , 0 ,NULL,NULL,PG_STATUS|PG_NOAPI},
//{ PAGE_THESAURUS, "admin/thesaurus", 0 , "thesaurus", 0 , 0 , //{ PAGE_THESAURUS, "admin/thesaurus", 0 , "thesaurus", 0 , 0 ,
// //USER_MASTER , // //USER_MASTER ,
// "thesaurus", // "thesaurus",
@ -243,10 +244,14 @@ static WebPage s_pages[] = {
// //USER_MASTER | USER_ADMIN , // //USER_MASTER | USER_ADMIN ,
// "overview", // "overview",
// sendPageOverview , 0 ,NULL,NULL,PG_NOAPI}, // sendPageOverview , 0 ,NULL,NULL,PG_NOAPI},
{ PAGE_QA , "admin/qa" , 0 , "qa" , 0 , 0 ,
"quality assurance", sendPageQA , 0 ,NULL,NULL,PG_NOAPI},
{ PAGE_API , "admin/api" , 0 , "api" , 0 , 0 , { PAGE_API , "admin/api" , 0 , "api" , 0 , 0 ,
//USER_MASTER | USER_ADMIN , //USER_MASTER | USER_ADMIN ,
"api", "api", sendPageAPI , 0 ,NULL,NULL,PG_NOAPI},
sendPageAPI , 0 ,NULL,NULL,PG_NOAPI},
{ PAGE_RULES , "admin/siterules", 0 , "site rules", 1, M_POST, { PAGE_RULES , "admin/siterules", 0 , "site rules", 1, M_POST,
//USER_ADMIN | USER_MASTER , //USER_ADMIN | USER_MASTER ,
"site rules", "site rules",
@ -268,7 +273,7 @@ static WebPage s_pages[] = {
{ PAGE_SPIDERDB , "admin/spiderdb" , 0 , "spider queue" , 0 , 0 , { PAGE_SPIDERDB , "admin/spiderdb" , 0 , "spider queue" , 0 , 0 ,
//USER_ADMIN | USER_MASTER , //USER_ADMIN | USER_MASTER ,
"spider queue", "spider queue",
sendPageSpiderdb , 0 ,NULL,NULL,PG_STATUS}, sendPageSpiderdb , 0 ,NULL,NULL,PG_STATUS|PG_NOAPI},
//{ PAGE_PRIORITIES, "admin/priorities" , 0 , "priority controls",1,1, //{ PAGE_PRIORITIES, "admin/priorities" , 0 , "priority controls",1,1,
// //USER_ADMIN | USER_MASTER , // //USER_ADMIN | USER_MASTER ,
// "spider priorities", // "spider priorities",
@ -303,7 +308,7 @@ static WebPage s_pages[] = {
{ PAGE_SITEDB , "admin/tagdb" , 0 , "tagdb" , 0 , M_POST, { PAGE_SITEDB , "admin/tagdb" , 0 , "tagdb" , 0 , M_POST,
//USER_MASTER | USER_ADMIN, //USER_MASTER | USER_ADMIN,
"add/remove/get tags for sites/urls", "add/remove/get tags for sites/urls",
sendPageTagdb , 0 ,NULL,NULL,0}, sendPageTagdb , 0 ,NULL,NULL,PG_NOAPI},
{ PAGE_CATDB , "admin/catdb" , 0 , "catdb" , 0,M_POST, { PAGE_CATDB , "admin/catdb" , 0 , "catdb" , 0,M_POST,
//USER_MASTER | USER_ADMIN, //USER_MASTER | USER_ADMIN,
"catdb", "catdb",
@ -2616,8 +2621,8 @@ bool printApiForPage ( SafeBuf *sb , long PAGENUM , CollectionRec *cr ) {
sb->safePrintf("<div style=padding-left:10%%>" sb->safePrintf("<div style=padding-left:10%%>"
"<font size=+2><b><a href=/%s>/%s</a></b></font>" "<font size=+2><b><a href=/%s?c=%s>/%s</a></b></font>"
,pageStr,pageStr); ,pageStr,cr->m_coll,pageStr);
sb->safePrintf("</a>"); sb->safePrintf("</a>");
// show settings? // show settings?
@ -2641,7 +2646,7 @@ bool printApiForPage ( SafeBuf *sb , long PAGENUM , CollectionRec *cr ) {
// show input parms to provide // show input parms to provide
//if ( PAGENUM == PAGE_ADDURL2 ) //if ( PAGENUM == PAGE_ADDURL2 )
if ( ! (s_pages[PAGENUM].m_pgflags & PG_STATUS) ) //if ( ! (s_pages[PAGENUM].m_pgflags & PG_STATUS) )
sb->safePrintf("<font size=-0> - %s " sb->safePrintf("<font size=-0> - %s "
" &nbsp; " " &nbsp; "
"[ <b>show parms in</b> " "[ <b>show parms in</b> "
@ -2792,8 +2797,8 @@ bool printApiForPage ( SafeBuf *sb , long PAGENUM , CollectionRec *cr ) {
if ( parm->m_type == TYPE_COMMENT ) continue; if ( parm->m_type == TYPE_COMMENT ) continue;
if ( parm->m_flags & PF_DUP ) continue; if ( parm->m_flags & PF_DUP ) continue;
// do not show on html page? // do not show on html page? this isn't the html page...
if ( parm->m_flags & PF_NOHTML ) continue; //if ( parm->m_flags & PF_NOHTML ) continue;
if ( parm->m_flags & PF_NOAPI ) continue; if ( parm->m_flags & PF_NOAPI ) continue;
if ( parm->m_flags & PF_DIFFBOT ) continue; if ( parm->m_flags & PF_DIFFBOT ) continue;
//if ( ! (parm->m_flags & PF_API) ) continue; //if ( ! (parm->m_flags & PF_API) ) continue;

View File

@ -87,6 +87,7 @@ bool sendPageWordVec ( TcpSocket *s , HttpRequest *r );
bool sendPageQualityAgent ( TcpSocket *s , HttpRequest *r ); bool sendPageQualityAgent ( TcpSocket *s , HttpRequest *r );
bool sendPageThesaurus ( TcpSocket *s , HttpRequest *r ); bool sendPageThesaurus ( TcpSocket *s , HttpRequest *r );
bool sendPageGraph ( TcpSocket *s , HttpRequest *r ); bool sendPageGraph ( TcpSocket *s , HttpRequest *r );
bool sendPageQA ( TcpSocket *sock , HttpRequest *hr ) ;
// values for m_usePost: // values for m_usePost:
#define M_GET 0x00 #define M_GET 0x00
@ -356,6 +357,8 @@ enum {
// PAGE_THESAURUS , // PAGE_THESAURUS ,
PAGE_QA,
// . non master-admin pages (collection controls) // . non master-admin pages (collection controls)
// . PAGE_OVERVIEW acts as a cutoff point (search Parms.cpp for it) // . PAGE_OVERVIEW acts as a cutoff point (search Parms.cpp for it)
//PAGE_OVERVIEW , //25 //PAGE_OVERVIEW , //25

View File

@ -1878,21 +1878,21 @@ bool Parms::printParm ( SafeBuf* sb,
if ( format == FORMAT_XML ) { if ( format == FORMAT_XML ) {
sb->safePrintf ( "\t<parm>\n"); sb->safePrintf ( "\t<parm>\n");
sb->safePrintf ( "\t\t<title>"); sb->safePrintf ( "\t\t<title><![CDATA[");
sb->htmlEncode ( m->m_title ); sb->cdataEncode ( m->m_title );
sb->safePrintf ( "</title>\n"); sb->safePrintf ( "]]></title>\n");
sb->safePrintf ( "\t\t<desc>"); sb->safePrintf ( "\t\t<desc><![CDATA[");
sb->htmlEncode ( m->m_desc ); sb->cdataEncode ( m->m_desc );
sb->safePrintf ( "</desc>\n"); sb->safePrintf ( "]]></desc>\n");
if ( m->m_flags & PF_REQUIRED ) if ( m->m_flags & PF_REQUIRED )
sb->safePrintf("\t\t<required>1</required>\n"); sb->safePrintf("\t\t<required>1</required>\n");
sb->safePrintf ( "\t\t<cgi>%s</cgi>\n",m->m_cgi); sb->safePrintf ( "\t\t<cgi>%s</cgi>\n",m->m_cgi);
// and default value if it exists // and default value if it exists
char *def = m->m_def; char *def = m->m_def;
if ( ! def ) def = ""; if ( ! def ) def = "";
sb->safePrintf ( "\t\t<defaultValue>"); sb->safePrintf ( "\t\t<defaultValue><![CDATA[");
sb->htmlEncode ( def ); sb->cdataEncode ( def );
sb->safePrintf ( "</defaultValue>\n"); sb->safePrintf ( "]]></defaultValue>\n");
if ( page == PAGE_MASTER || if ( page == PAGE_MASTER ||
page == PAGE_SEARCH || page == PAGE_SEARCH ||
page == PAGE_SPIDER || page == PAGE_SPIDER ||
@ -1901,11 +1901,11 @@ bool Parms::printParm ( SafeBuf* sb,
page == PAGE_SECURITY || page == PAGE_SECURITY ||
page == PAGE_REPAIR || page == PAGE_REPAIR ||
page == PAGE_LOG ) { page == PAGE_LOG ) {
sb->safePrintf ( "\t\t<currentValue>"); sb->safePrintf ( "\t\t<currentValue><![CDATA[");
SafeBuf xb; SafeBuf xb;
m->printVal ( &xb , collnum , 0 );//occNum m->printVal ( &xb , collnum , 0 );//occNum
sb->htmlEncode ( xb.getBufStart() ); sb->cdataEncode ( xb.getBufStart() );
sb->safePrintf ( "</currentValue>\n"); sb->safePrintf ( "]]></currentValue>\n");
} }
sb->safePrintf ( "\t</parm>\n"); sb->safePrintf ( "\t</parm>\n");
return true; return true;
@ -4749,6 +4749,18 @@ void Parms::init ( ) {
m->m_off = (char *)&gr.m_coll - (char *)&gr; m->m_off = (char *)&gr.m_coll - (char *)&gr;
m++; m++;
m->m_title = "collection";
m->m_desc = "Inject into this collection.";
m->m_cgi = "c";
m->m_obj = OBJ_GBREQUEST;
m->m_type = TYPE_CHARPTR;
m->m_def = NULL;
// PF_COLLDEFAULT: so it gets set to default coll on html page
m->m_flags = PF_API|PF_REQUIRED|PF_NOHTML;
m->m_page = PAGE_INJECT;
m->m_off = (char *)&gr.m_coll - (char *)&gr;
m++;
// // // //
// // more global-ish parms // // more global-ish parms
// // // //
@ -5472,6 +5484,7 @@ void Parms::init ( ) {
//m->m_type = TYPE_BOOL; //m->m_type = TYPE_BOOL;
//m++; //m++;
/*
m->m_title = "qa build mode"; m->m_title = "qa build mode";
m->m_desc = "When on Msg13.cpp saves docs in the qatest123 coll " m->m_desc = "When on Msg13.cpp saves docs in the qatest123 coll "
"to qa/ subdir, when off " "to qa/ subdir, when off "
@ -5485,6 +5498,7 @@ void Parms::init ( ) {
m->m_obj = OBJ_CONF; m->m_obj = OBJ_CONF;
m->m_flags = PF_NOAPI | PF_HIDDEN; m->m_flags = PF_NOAPI | PF_HIDDEN;
m++; m++;
*/
m->m_title = "read only mode"; m->m_title = "read only mode";
m->m_desc = "Read only mode does not allow spidering."; m->m_desc = "Read only mode does not allow spidering.";
@ -14257,18 +14271,6 @@ void Parms::init ( ) {
m->m_off = (char *)&gr.m_spiderLinks - (char *)&gr; m->m_off = (char *)&gr.m_spiderLinks - (char *)&gr;
m++; m++;
m->m_title = "collection";
m->m_desc = "Inject into this collection.";
m->m_cgi = "c";
m->m_obj = OBJ_GBREQUEST;
m->m_type = TYPE_CHARPTR;
m->m_def = NULL;
// PF_COLLDEFAULT: so it gets set to default coll on html page
m->m_flags = PF_API|PF_REQUIRED|PF_NOHTML;
m->m_page = PAGE_INJECT;
m->m_off = (char *)&gr.m_coll - (char *)&gr;
m++;
m->m_title = "short reply"; m->m_title = "short reply";
m->m_desc = "Should the injection response be short and simple?"; m->m_desc = "Should the injection response be short and simple?";
m->m_cgi = "quick"; m->m_cgi = "quick";

View File

@ -15091,8 +15091,9 @@ char **XmlDoc::getHttpReply2 ( ) {
r->m_ifModifiedSince = 0; r->m_ifModifiedSince = 0;
r->m_skipHammerCheck = 0; r->m_skipHammerCheck = 0;
if ( g_conf.m_qaBuildMode ) r->m_addToTestCache = true; //if ( g_conf.m_qaBuildMode ) r->m_addToTestCache = true;
else r->m_addToTestCache = false; //else r->m_addToTestCache = false;
r->m_addToTestCache = (bool)useTestCache;
// . this is -1 if unknown. none found in robots.txt or provided // . this is -1 if unknown. none found in robots.txt or provided
// in the custom crawl parms. // in the custom crawl parms.

123
qa.cpp
View File

@ -927,6 +927,30 @@ bool qaspider ( ) {
return true; return true;
} }
class QATest {
public:
bool (* m_func)();
char *m_testName;
char *m_testDesc;
};
static QATest s_qatests[] = {
{qainject,
"injectTest",
"test injection code"},
{qaspider1,
"spiderSitePagesTest",
"test spidering walmart.com and ibm.com using sitepages quota"},
{qaspider2,
"spiderHopCountTest",
"test spidering walmart.com and ibm.com using hopcount limit"}
};
// . run a series of tests to ensure that gb is functioning properly // . run a series of tests to ensure that gb is functioning properly
// . uses the ./qa subdirectory to hold archive pages, ips, spider dates to // . uses the ./qa subdirectory to hold archive pages, ips, spider dates to
// ensure consistency between tests for exact replays // ensure consistency between tests for exact replays
@ -935,11 +959,106 @@ bool qatest ( ) {
if ( ! s_callback ) s_callback = qatest; if ( ! s_callback ) s_callback = qatest;
// returns true when done, false when blocked // returns true when done, false when blocked
if ( ! qainject ( ) ) return false; //if ( ! qainject ( ) ) return false;
// returns true when done, false when blocked // returns true when done, false when blocked
if ( ! qaspider ( ) ) return false; //if ( ! qaspider ( ) ) return false;
long n = sizeof(s_qatests)/sizeof(QATest);
for ( long i = 0 ; i < n ; i++ ) {
QATest *qt = &s_qatests[i];
// call the qatest
if ( ! qt->m_func ) return false;
}
return true; return true;
} }
#include "Parms.h"
#include "Pages.h"
bool sendPageQA ( TcpSocket *sock , HttpRequest *hr ) {
char pbuf[32768];
SafeBuf sb(pbuf, 32768);
//char format = hr->getReplyFormat();
// set this. also sets gr->m_hr
GigablastRequest gr;
// this will fill in GigablastRequest so all the parms we need are set
g_parms.setGigablastRequest ( sock , hr , &gr );
// get collection rec
CollectionRec *cr = g_collectiondb.getRec ( gr.m_coll );
// bitch if no collection rec found
if ( ! cr ) {
g_errno = ENOCOLLREC;
//log("build: Injection from %s failed. "
// "Collection \"%s\" does not exist.",
// iptoa(s->m_ip),coll);
// g_errno should be set so it will return an error response
return g_httpServer.sendErrorReply (sock,g_errno,mstrerror(g_errno));
}
// show tests, all checked by default, to perform
g_pages.printAdminTop ( &sb , sock , hr );
sb.safePrintf("<SCRIPT LANGUAGE=\"javascript\">"
"function checkAll(name, num) "
"{ "
" for (var i = 0; i < num; i++) {"
" var e = document.getElementById(name + i);"
//"alert(name+i);"
" e.checked = !e.checked ; "
"}"
"} "
"</SCRIPT> ");
//sb.safePrintf("<form name=\"fo\">");
sb.safePrintf("\n<table %s>\n",TABLE_STYLE);
sb.safePrintf("<tr class=hdrow><td colspan=2>"
"<center><b>QA Tests</b></center>"
"</td></tr>");
long n = sizeof(s_qatests)/sizeof(QATest);
// header row
sb.safePrintf("<tr><td><input type=\"button\" value=\"X\" "
"onclick=\"checkAll('test', %li);\">",n);
sb.safePrintf("</td><td>qa test name</td></tr>\n");
// . we keep the ptr to each test in an array
// . print out each qa function
for ( long i = 0 ; i < n ; i++ ) {
QATest *qt = &s_qatests[i];
char *bg;
if ( i % 2 == 0 ) bg = LIGHT_BLUE;
else bg = DARK_BLUE;
sb.safePrintf("<tr bgcolor=#%s>"
"<td><input type=checkbox name=test%li "
"id=test%li checked></td>"
"<td>%s</td>"
"</tr>\n"
, bg
, i
, i
, qt->m_testName
);
}
sb.safePrintf("</table>\n");
// "</form>\n");
g_pages.printAdminBottom ( &sb , hr );
g_httpServer.sendDynamicPage(sock,
sb.getBufStart(),
sb.length(),
-1/*cachetime*/);
return true;
}