#include "gb-include.h" //#include "GBVersion.h" #include "Pages.h" #include "Parms.h" #include "Collectiondb.h" //#include "CollectionRec.h" #include "Tagdb.h" #include "Categories.h" #include "Proxy.h" #include "PageParser.h" // g_inPageParser #include "Users.h" #include "Rebalance.h" // a global class extern'd in Pages.h Pages g_pages; //const char *GBVersion; // error message thingy used by HttpServer.cpp for logging purposes char *g_msg; /* class WebPage { public: char m_pageNum; // see enum array below for this char *m_filename; long m_flen; char *m_name; // for printing the links to the pages in admin sect. bool m_cast; // broadcast input to all hosts? bool m_usePost; // use a POST request/reply instead of GET? // used because GET's input is limited to a few k. //char m_perm; // permissions, see USER_* #define's below char *m_desc; // page description bool (* m_function)(TcpSocket *s , HttpRequest *r); long m_niceness; }; */ // . list of all dynamic pages, their path names, permissions and callback // functions that generate that page // . IMPORTANT: these must be in the same order as the PAGE_* enum in Pages.h // otherwise you'll get a malformed error when running static long s_numPages = 0; static WebPage s_pages[] = { /* // dummy pages { PAGE_NOHOSTLINKS , "nohostlinks", 0, "host links", 0, 0, "dummy page - if set in the users row then host links will not be " " shown", NULL, 0 ,NULL,NULL, PG_NOAPI}, { PAGE_ADMIN , "colladmin", 0, "master=0", 0, 0, "dummy page - if set in the users row then user will have master=0 and " " collection links will be highlighted in red", NULL, 0 ,NULL,NULL, PG_NOAPI}, //{ PAGE_QUALITY , "quality", 0, "quality", 0, 0, // "dummy page - if set in the users row then \"Quality Control\"" // " will be printed besides the logo for certain pages", // NULL, 0 ,NULL,NULL,PG_NOAPI}, { PAGE_PUBLIC , "public", 0, "public", 0, 0, "dummy page - if set in the users row then page function is" " called directly and not through g_parms.setFromRequest", NULL, 0 ,NULL,NULL,PG_NOAPI}, */ // publicly accessible pages { PAGE_ROOT , "index.html" , 0 , "root" , 0 , 0 , "search page to query", sendPageRoot , 0 ,NULL,NULL, PG_NOAPI}, { PAGE_RESULTS , "search" , 0 , "search" , 0 , 0 , "search results page", sendPageResults, 0 ,NULL,NULL, 0}, //{ PAGE_WIDGET , "widget" , 0 , "widget" , 0 , 0 , // "widget page", // sendPageWidget, 0 ,NULL,NULL,PG_NOAPI}, // this is the public addurl, /addurl, if you are using the // api use PAGE_ADDURL2 which is /admin/addurl. so we set PG_NOAPI here { PAGE_ADDURL , "addurl" , 0 , "add url" , 0 , 0 , "Page where you can add url for spidering", sendPageAddUrl, 0 ,NULL,NULL, PG_NOAPI}, { PAGE_GET , "get" , 0 , "get" , 0 , 0 , //USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_CLIENT, "gets cached web page", sendPageGet , 0 ,NULL,NULL, 0}, { PAGE_LOGIN , "login" , 0 , "login" , 0 , 0 , //USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_SPAM | USER_CLIENT, "login", sendPageLogin, 0 ,NULL,NULL, PG_NOAPI}, { PAGE_DIRECTORY , "dir" , 0 , "directory" , 0 , 0 , //USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_CLIENT, "directory", // until api is ready, take this out of the menu sendPageDirectory , 0 ,NULL,NULL, PG_NOAPI}, { PAGE_REPORTSPAM , "reportspam" , 0 , "report spam" , 0 , 0 , //USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_PROXY | USER_CLIENT "report spam", sendPageReportSpam , 0 ,NULL,NULL,PG_NOAPI}, //{ PAGE_WORDVECTOR, "vec" , 0 , "word vectors" , 0 , 1 , // //USER_PUBLIC | USER_MASTER | USER_ADMIN , // "word vectors", // sendPageWordVec , 0 ,NULL,NULL,PG_NOAPI}, // use post now for the "site list" which can be big { PAGE_BASIC_SETTINGS, "admin/settings", 0 , "settings",1, M_POST , "basic settings", sendPageGeneric , 0 ,NULL,NULL, PG_NOAPI|PG_COLLADMIN}, { PAGE_BASIC_STATUS, "admin/status", 0 , "status",1, 0 , "basic status", sendPageBasicStatus , 0 ,NULL,NULL, PG_STATUS|PG_COLLADMIN}, //{ PAGE_BASIC_DIFFBOT, "admin/diffbot", 0 , "diffbot",1, 0 , // "Basic diffbot page.", sendPageBasicDiffbot , 0 , //NULL,NULL,PG_NOAPI}, { PAGE_COLLPASSWORDS,//BASIC_SECURITY, "admin/collectionpasswords", 0,"collection passwords",0,0, "passwords", sendPageGeneric , 0 ,NULL,NULL, PG_COLLADMIN}, { PAGE_BASIC_SEARCH, "", 0 , "search",1, 0 , "basic search", sendPageRoot , 0 ,NULL,NULL, PG_NOAPI}, { PAGE_HOSTS , "admin/hosts" , 0 , "hosts" , 0 , 0 , //USER_MASTER | USER_PROXY, "hosts status", sendPageHosts , 0 ,NULL,NULL, PG_STATUS|PG_ROOTADMIN}, { PAGE_MASTER , "admin/master" , 0 , "master controls" , 1 , 0 , //USER_MASTER | USER_PROXY , "master controls", sendPageGeneric , 0 ,NULL,NULL, PG_ROOTADMIN}, // use POST for html head/tail and page root html. might be large. { PAGE_SEARCH , "admin/search" , 0 , "search controls" ,1,M_POST, //USER_ADMIN | USER_MASTER , "search controls", sendPageGeneric , 0 ,NULL,NULL, 0}, // use post now for the "site list" which can be big { PAGE_SPIDER , "admin/spider" , 0 , "spider controls" ,1,M_POST, //USER_ADMIN | USER_MASTER | USER_PROXY , "spider controls", sendPageGeneric , 0 ,NULL,NULL, PG_COLLADMIN}, { PAGE_SPIDERPROXIES,"admin/proxies" , 0 , "proxies" , 1 , 0, "proxies", sendPageGeneric , 0,NULL,NULL, PG_ROOTADMIN } , { PAGE_LOG , "admin/log" , 0 , "log controls" , 1 , 0 , //USER_MASTER | USER_PROXY, "log controls", sendPageGeneric , 0 ,NULL,NULL, PG_ROOTADMIN}, { PAGE_ROOTPASSWORDS, "admin/rootpasswords", 0 , "root passwords" , 1 , 0 , //USER_MASTER | USER_PROXY , "root passwords", sendPageGeneric , 0 ,NULL,NULL, PG_ROOTADMIN}, { PAGE_ADDCOLL , "admin/addcoll" , 0 , "add collection" , 1 , 0 , //USER_MASTER , "add a new collection", sendPageAddColl , 0 ,NULL,NULL, PG_ROOTADMIN}, { PAGE_DELCOLL , "admin/delcoll" , 0 , "delete collections" , 1 ,0, //USER_MASTER , "delete a collection", sendPageDelColl , 0 ,NULL,NULL, PG_COLLADMIN}, { PAGE_CLONECOLL, "admin/clonecoll" , 0 , "clone collection" , 1 ,0, //USER_MASTER , "clone one collection's settings to another", sendPageCloneColl , 0 ,NULL,NULL, PG_ROOTADMIN}, { PAGE_REPAIR , "admin/repair" , 0 , "repair" , 1 , 0 , "repair data", //USER_MASTER , sendPageGeneric , 0 ,NULL,NULL, PG_ROOTADMIN }, { PAGE_FILTERS , "admin/filters", 0 , "url filters" , 1 ,M_POST, "prioritize urls for spidering", sendPageGeneric , 0 ,NULL,NULL, PG_NOAPI|PG_COLLADMIN}, { PAGE_INJECT , "admin/inject" , 0 , "inject url" , 0,M_MULTI , //USER_ADMIN | USER_MASTER , "inject url in the index here", sendPageInject , 2 ,NULL,NULL, 0} , // this is the addurl page the the admin! { PAGE_ADDURL2 , "admin/addurl" , 0 , "add urls" , 0 , 0 , "add url page for admin", sendPageAddUrl2 , 0 ,NULL,NULL, PG_COLLADMIN}, { PAGE_REINDEX , "admin/reindex" , 0 , "query reindex" , 0 , 0 , //USER_ADMIN | USER_MASTER, "query delete/reindex", sendPageReindex , 0 ,NULL,NULL, PG_COLLADMIN}, // master admin pages { PAGE_STATS , "admin/stats" , 0 , "stats" , 0 , 0 , //USER_MASTER | USER_PROXY , "general statistics", sendPageStats , 0 ,NULL,NULL, PG_STATUS|PG_ROOTADMIN}, { PAGE_GRAPH , "admin/graph" , 0 , "graph" , 0 , 0 , //USER_MASTER , "query stats graph", sendPageGraph , 2 ,NULL,NULL, PG_STATUS|PG_NOAPI|PG_ROOTADMIN}, { PAGE_PERF , "admin/perf" , 0 , "performance" , 0 , 0 , //USER_MASTER | USER_PROXY , "function performance graph", sendPagePerf , 0 ,NULL,NULL, PG_STATUS|PG_NOAPI|PG_ROOTADMIN}, { PAGE_SOCKETS , "admin/sockets" , 0 , "sockets" , 0 , 0 , //USER_MASTER | USER_PROXY, "sockets", sendPageSockets , 0 ,NULL,NULL, PG_STATUS|PG_NOAPI|PG_ROOTADMIN}, { PAGE_LOGVIEW , "admin/logview" , 0 , "log view" , 0 , 0 , //USER_MASTER , "logview", sendPageLogView , 0 ,NULL,NULL, PG_STATUS|PG_NOAPI|PG_ROOTADMIN}, // { PAGE_SYNC , "master/sync" , 0 , "sync" , 0 , 0 , // //USER_MASTER , // "sync", // sendPageGeneric , 0 ,NULL,NULL,PG_NOAPI}, { PAGE_AUTOBAN ,"admin/autoban" , 0 , "autoban" , 1 , M_POST , //USER_MASTER | USER_PROXY , "autobanned ips", sendPageAutoban , 0 ,NULL,NULL, PG_NOAPI|PG_ROOTADMIN}, { PAGE_PROFILER , "admin/profiler" , 0 , "profiler" , 0 ,M_POST, //USER_MASTER , "profiler", sendPageProfiler , 0 ,NULL,NULL, PG_NOAPI|PG_ROOTADMIN}, { PAGE_THREADS , "admin/threads" , 0 , "threads" , 0 , 0 , //USER_MASTER , "threads", sendPageThreads , 0 ,NULL,NULL, PG_STATUS|PG_NOAPI|PG_ROOTADMIN}, //{ PAGE_THESAURUS, "admin/thesaurus", 0 , "thesaurus", 0 , 0 , // //USER_MASTER , // "thesaurus", // sendPageThesaurus , 0 ,NULL,NULL,PG_NOAPI}, // collection admin pages //{ PAGE_OVERVIEW , "admin/overview" , 0 , "overview" , 0 , 0, // //USER_MASTER | USER_ADMIN , // "overview", // sendPageOverview , 0 ,NULL,NULL,PG_NOAPI}, { PAGE_QA , "admin/qa" , 0 , "qa" , 0 , 0 , "quality assurance", sendPageQA , 0 ,NULL,NULL, PG_NOAPI|PG_ROOTADMIN}, { PAGE_IMPORT , "admin/import" , 0 , "import" , 0 , 0 , "import documents from another cluster", sendPageGeneric , 0 ,NULL,NULL, PG_NOAPI|PG_ROOTADMIN}, { PAGE_API , "admin/api" , 0 , "api" , 0 , 0 , //USER_MASTER | USER_ADMIN , "api", sendPageAPI , 0 ,NULL,NULL, PG_NOAPI|PG_COLLADMIN}, { PAGE_RULES , "admin/siterules", 0 , "site rules", 1, M_POST, //USER_ADMIN | USER_MASTER , "site rules", sendPageGeneric , 0,NULL,NULL, PG_NOAPI}, { PAGE_INDEXDB , "admin/indexdb" , 0 , "indexdb" , 0 , 0, //USER_MASTER , "indexdb", sendPageIndexdb , 0 ,NULL,NULL, PG_NOAPI|PG_ROOTADMIN}, { PAGE_TITLEDB , "admin/titledb" , 0 , "titledb" , 0 , 0, //USER_MASTER , "titledb", sendPageTitledb , 2,NULL,NULL, PG_NOAPI|PG_ROOTADMIN}, // 1 = usePost { PAGE_CRAWLBOT , "crawlbot" , 0 , "crawlbot" , 1 , 0, "simplified spider controls", sendPageCrawlbot , 0 ,NULL,NULL, PG_NOAPI|PG_ROOTADMIN}, { PAGE_SPIDERDB , "admin/spiderdb" , 0 , "spider queue" , 0 , 0 , //USER_ADMIN | USER_MASTER , "spider queue", sendPageSpiderdb , 0 ,NULL,NULL, PG_STATUS|PG_NOAPI|PG_ROOTADMIN}, //{ PAGE_PRIORITIES, "admin/priorities" , 0 , "priority controls",1,1, // //USER_ADMIN | USER_MASTER , // "spider priorities", // sendPageGeneric , 0 ,NULL,NULL,PG_NOAPI}, //{ PAGE_KEYWORDS, "admin/queries",0,"queries" , 0 , 1 , // "get queries a url matches", // sendPageMatchingQueries , 2 } , #ifndef CYGWIN { PAGE_SEO, "seo",0,"seo" , 0 , 0 , "SEO info", sendPageSEO , 2 ,NULL,NULL, PG_NOAPI|PG_ROOTADMIN}, #else { PAGE_SEO, "seo",0,"seo" , 0 , 0 , "SEO info", sendPageResults , 0 ,NULL,NULL, PG_NOAPI|PG_ROOTADMIN}, #endif { PAGE_ACCESS , "admin/access" , 0 , "access" , 1 , M_POST, //USER_ADMIN | USER_MASTER , "access password, ip, admin ips etc. all goes in here", sendPageGeneric , 0 ,NULL,NULL, PG_NOAPI|PG_ROOTADMIN}, { PAGE_SEARCHBOX , "admin/searchbox", 0 , "search" , 0 , 0 , //USER_ADMIN | USER_MASTER , "search box", sendPageResults , 0 ,NULL,NULL, PG_NOAPI}, { PAGE_PARSER , "admin/parser" , 0 , "parser" , 0,M_POST, //USER_MASTER , "page parser", sendPageParser , 2 ,NULL,NULL, PG_NOAPI|PG_COLLADMIN}, { PAGE_SITEDB , "admin/tagdb" , 0 , "tagdb" , 0 , M_POST, //USER_MASTER | USER_ADMIN, "add/remove/get tags for sites/urls", sendPageTagdb , 0 ,NULL,NULL, PG_NOAPI|PG_COLLADMIN}, { PAGE_CATDB , "admin/catdb" , 0 , "catdb" , 0,M_POST, //USER_MASTER | USER_ADMIN, "catdb", sendPageCatdb , 0 ,NULL,NULL, PG_NOAPI|PG_ROOTADMIN}, //{ PAGE_LOGIN2 , "admin/login" , 0 , "login" , 0 , 0, // //USER_PUBLIC | USER_MASTER | USER_ADMIN | USER_SPAM | USER_CLIENT, //"login link - also logoffs user", // sendPageLogin,0} // { PAGE_TOPDOCS , "admin/topdocs" , 0 , "top docs" , 1 , 1 , // //USER_ADMIN | USER_MASTER, // "top documents", // sendPageTopDocs , 0 ,NULL,NULL,PG_NOAPI}, // { PAGE_TOPICS , "admin/topics" , 0 , "topics" , 0 , 1 , // USER_ADMIN | USER_MASTER , sendPageTopics , 0 ,NULL,NULL,PG_NOAPI}, // { PAGE_SPAM , "admin/spam" , 0 , "spam weights" , 1 , 1 , // USER_ADMIN | USER_MASTER , sendPageSpam , 0 ,NULL,NULL,PG_NOAPI}, //{ PAGE_QAGENT , "admin/qagent" , 0 , "quality agent" , 1 , 1 , // //USER_ADMIN | USER_MASTER , // "quality agent", // sendPageQualityAgent, 2 ,NULL,NULL,PG_NOAPI}, // MDW: take out for now since we are fully split and don't need // network to transport termlists any more //{ PAGE_NETTEST , "admin/nettest" , 0 , "net test" , 1 , 1 , // //USER_ADMIN | USER_MASTER, // "net test", // sendPageNetTest , 0 ,NULL,NULL,PG_NOAPI}, //{ PAGE_ADFEED , "admin/adfeed" , 0 , "ad feed" , 1 , 1 , // //USER_ADMIN | USER_MASTER, // "ad feed control", // sendPageGeneric , 0 ,NULL,NULL,PG_NOAPI}, //{ PAGE_TURK2 , "pageturkhome" , 0 , "page turk" , 0 , 0 , // "page turk home", // sendPageTurkHome, 0 } }; WebPage *Pages::getPage ( long page ) { return &s_pages[page]; } char *Pages::getPath ( long page ) { return s_pages[page].m_filename; } long Pages::getNumPages(){ return s_numPages; } void Pages::init ( ) { // array of dynamic page descriptions s_numPages = sizeof(s_pages) / sizeof(WebPage); // sanity check, ensure PAGE_* corresponds to position for ( long i = 0 ; i < s_numPages ; i++ ) if ( s_pages[i].m_pageNum != i ) { log(LOG_LOGIC,"conf: Bad engineer. WebPage array is " "malformed. It must be 1-1 with the " "WebPage enum in Pages.h."); char *xx=NULL;*xx=0; //exit ( -1 ); } // set the m_flen member for ( long i = 0 ; i < s_numPages ; i++ ) s_pages[i].m_flen = gbstrlen ( s_pages[i].m_filename ); } // Used by Users.cpp to get PAGE_* from the given filename long Pages::getPageNumber ( char *filename ){ // static bool s_init = false; static char s_buff[8192]; static HashTableX s_ht; if ( !s_init ){ s_ht.set(8,4,256,s_buff,8192,false,0,"pgnummap"); for ( long i=0; i < PAGE_NONE; i++ ){ if ( ! s_pages[i].m_filename ) continue; if ( s_pages[i].m_flen <= 0 ) continue; long long pageHash = hash64( s_pages[i].m_filename, s_pages[i].m_flen ); if ( ! s_ht.addKey(&pageHash,&i) ){ char *xx = NULL; *xx = 0; } } s_init = true; // make sure stay in s_buff if ( s_ht.m_buf != s_buff ) { char *xx=NULL;*xx=0; } } long long pageHash = hash64(filename,gbstrlen(filename)); long slot = s_ht.getSlot(&pageHash); if ( slot== -1 ) return -1; long value = *(long *)s_ht.getValueFromSlot(slot); return value; } // return the PAGE_* number thingy long Pages::getDynamicPageNumber ( HttpRequest *r ) { char *path = r->getFilename(); long pathLen = r->getFilenameLen(); if ( pathLen > 0 && path[0]=='/' ) { path++; pathLen--; } // historical backwards compatibility fix if ( pathLen == 9 && strncmp ( path , "cgi/0.cgi" , 9 ) == 0 ) { path = "search"; pathLen = gbstrlen(path); } if ( pathLen == 9 && strncmp ( path , "cgi/1.cgi" , 9 ) == 0 ) { path = "addurl"; pathLen = gbstrlen(path); } if ( pathLen == 6 && strncmp ( path , "inject" , 6 ) == 0 ) { path = "admin/inject"; pathLen = gbstrlen(path); } if ( pathLen == 9 && strncmp ( path , "index.php" , 9 ) == 0 ) { path = "search"; pathLen = gbstrlen(path); } if ( pathLen == 10 && strncmp ( path , "search.csv" , 10 ) == 0 ) { path = "search"; pathLen = gbstrlen(path); } // if it is like /GA/Atlanta then call sendPageResults // and that should be smart enough to set the m_where in // SearchInput.cpp from the path!! // this messes up /qa/* files // if ( path && // // "filename" does not start with '/' for some reason // //path[0] && // //path[0] == '/' && // path[0] && // is_alpha_a(path[0]) && // is_alpha_a(path[1]) && // pathLen<64 && // // "GET /NM" // (path[2] == '/' || path[2]=='\0' || path[2]==' ') ) // return PAGE_RESULTS; // go down the list comparing the pathname to dynamic page names for ( long i = 0 ; i < s_numPages ; i++ ) { if ( pathLen != s_pages[i].m_flen ) continue; if ( strncmp ( path , s_pages[i].m_filename , pathLen ) == 0 ) return i; } // check to see if the path is a category path = r->getPath(); pathLen = r->getPathLen(); // truncate if we would breech if ( pathLen >= MAX_HTTP_FILENAME_LEN ) pathLen = MAX_HTTP_FILENAME_LEN - 1; // decode the path char decodedPath[MAX_HTTP_FILENAME_LEN]; long decodedPathLen = urlDecode(decodedPath, path, pathLen); // remove cgi for (long i = 0; i < decodedPathLen; i++) { if (decodedPath[i] == '?') { decodedPathLen = i; break; } } // sanity if ( ! g_categories ) log("process: no categories loaded"); // // dmoz - look it up for a category // if ( g_categories && g_categories->getIndexFromPath(decodedPath, decodedPathLen) >= 0) return PAGE_DIRECTORY; // just go to PAGE_DIRECTORY for other request //return PAGE_DIRECTORY; // not found in our list of dynamic page filenames return -1; } // once all hosts have received the parms, or we've at least tried to send // them to all hosts, then come here to return the page content back to // the client browser void doneBroadcastingParms ( void *state ) { TcpSocket *sock = (TcpSocket *)state; // free this mem sock->m_handyBuf.purge(); // set another http request again HttpRequest r; //bool status = r.set ( sock->m_readBuf , sock->m_readOffset , sock ) ; r.set ( sock->m_readBuf , sock->m_readOffset , sock ) ; // we stored the page # below WebPage *pg = &s_pages[sock->m_pageNum]; // call the page specifc function which will send data back on socket pg->m_function ( sock , &r ); } // . returns false if blocked, true otherwise // . send an error page on error bool Pages::sendDynamicReply ( TcpSocket *s , HttpRequest *r , long page ) { // error out if page number out of range if ( page < PAGE_ROOT || page >= s_numPages ) return g_httpServer.sendErrorReply ( s , 505 , "Bad Request"); // map root page to results page for event searching //if ( page == PAGE_ROOT ) { // char *coll = r->getString("c"); // // ensure it exists // CollectionRec *cr = g_collectiondb.getRec ( coll ); // if ( cr && cr->m_indexEventsOnly ) page = PAGE_RESULTS; //} // did they supply correct password for given username? //bool userAccess = g_users.verifyUser(s,r); // does public have permission? bool publicPage = false; if ( page == PAGE_ROOT ) publicPage = true; // do not deny /NM/Albuquerque urls if ( page == PAGE_RESULTS ) publicPage = true; if ( page == PAGE_SEO ) publicPage = true; if ( page == PAGE_ADDURL ) publicPage = true; if ( page == PAGE_GET ) publicPage = true; if ( page == PAGE_CRAWLBOT ) publicPage = true; // get our host //Host *h = g_hostdb.m_myHost; // now use this... bool isRootAdmin = g_conf.isRootAdmin ( s , r ); CollectionRec *cr = g_collectiondb.getRec ( r , true ); //////////////////// //////////////////// // // if it is an administrative page it requires permission! // //////////////////// //////////////////// // no longer, we let anyone snoop around to check out the gui //char guest = r->getLong("guest",0); //if ( ! publicPage && ! isRootAdmin && ! guest ) // return sendPageLogin ( s , r ); if ( page == PAGE_CRAWLBOT && ! isRootAdmin ) log("pages: accessing a crawlbot page without admin privs. " "no parms can be changed."); /* // is request coming from a local ip? bool isLocal = false; bool isLoopback = false; if ( iptop(s->m_ip) == iptop(h->m_ip ) ) isLocal = true; if ( iptop(s->m_ip) == iptop(h->m_ipShotgun) ) isLocal = true; // shortcut uint8_t *p = (uint8_t *)&s->m_ip; // 127.0.0.1 if ( s->m_ip == 16777343 ) { isLocal = true; isLoopback = true; } // 127 is local if ( g_conf.isConnectIp ( s->m_ip ) ) isLocal = true; // try this too so steve's comcast home ip works if ( r->isLocal() ) isLocal = true; // don't treat the ones below as local any more because we might // be a compression proxy running on a dedicated server and we do // not want other customers on that network to hit us! if you want // to access it from your browser then stick your tunnel's IP into // the list in gb.conf. // crap, but for now zak and partap need to be able to hit the // machines, so at least allow or 10.* addresses through, usually // the dedicates hosts are 192.168.*.* // this is local if ( p[0] == 10 ) isLocal = true; // this is local //if ( p[0] == 192 && p[1] == 168 ) isLocal = true; bool forbidIp = false; if ( ! publicPage && ! isLocal ) forbidIp = true; // allow zak though so he can add tags using tagdb to docid/eventIds // no, zak should be hitting the spider compression proxy or whatever, // even so, we should add zak's ips to the security page of // connect ips at least... i don't want to override this check because // it is our biggest security point //if ( page == PAGE_SITEDB ) forbidIp = false; //if ( page == PAGE_LOGIN ) forbidIp = false; //if ( page == PAGE_INJECT ) forbidIp = false; //if ( page == PAGE_REINDEX ) forbidIp = false; //if ( page == PAGE_ROOT ) forbidIp = false; //if ( page == PAGE_RESULTS ) forbidIp = false; //if ( page == PAGE_GET ) forbidIp = false; */ // if the page is restricted access then they must be coming from // an internal ip. our ip masked with 0xffff0000 is good. we assume // that all administrators tunnel in through router0 and thus get a // local ip. // PAGE_TAGDB: allow zak to access tagdb, etc. /* if ( forbidIp ) { log("admin: must admin from internal ip"); log("login: access denied 1 from ip=%s",iptoa(s->m_ip)); return sendPageLogin( s, r, "Access Denied. IP not local."); // "in list of connect ips on security " // "tab."); } */ // . does client have permission for this page? they are coming from // an internal ip and they provided the correct password for their // username (or the page is publically accessible) // . BUT allow anyone to see it regardless if page is public! because // often times my cookie says username=mwells but i am not logged // in and i don't want to type my password to see the root page, // or any other public page //if ( ! publicPage && ! g_users.hasPermission( r, page , s ) && // ! isLoopback ) { // log("login: access denied 2 from ip=%s",iptoa(s->m_ip)); // return sendPageLogin ( s , r, "Access Denied. No permission."); //} //if ( ! publicPage && ! userAccess && ! isLoopback ) { // log("login: access denied 3 from ip=%s",iptoa(s->m_ip)); // return sendPageLogin(s,r,"Access Denied. Bad or no password."); //} //if ( ! publicPage && ! isLocal && ! isLoopback ) { // log("login: access denied 2 from ip=%s",iptoa(s->m_ip)); // return sendPageLogin ( s , r, "Access Denied. No permission."); //} g_errno = 0; WebPage *pg = &s_pages[page]; // now we require a username for all "admin" type pages /*bool pub = pg->m_perm & USER_PUBLIC; if ( ! pub ) { // just get from cookie so it is not broadcast over the web // via a referral url char *username = r->getStringFromCookie("username"); // if it is a broadcast, get from request directly (Msg28.cpp) if ( ! username ) username = r->getString("username"); if ( ! username ) { log("admin: Permission denied. You must supply a " "username."); return sendPageLogin ( s , r ); } }*/ //g_errno = 0; // page parser is now niceness 2 and calls a bunch of functions // with niceness 2, so if we allow another to be launched we risk // a quick poll within a quickpoll. we assume all http request handlers // are niceness 0, except this one. // if ( g_loop.m_inQuickPoll && // // even if not in page parser, we could be in a quickpoll // // and that messes us up enough // //g_inPageParser && // pg->m_function == sendPageParser ) { // g_errno = ETRYAGAIN; // return g_httpServer.sendErrorReply(s,505,mstrerror(g_errno)); // } // // CLOUD SEARCH ENGINE SUPPORT // // if not the root admin only all user to change settings, etc. // if the collection rec is a guest collection. i.e. in the cloud. // //bool isRootAdmin = g_conf.isRootAdmin(sock,hr); bool isRootColl = false; if ( cr && strcmp(cr->m_coll,"main")==0 ) isRootColl = true; if ( cr && strcmp(cr->m_coll,"dmoz")==0 ) isRootColl = true; if ( cr && strcmp(cr->m_coll,"demo")==0 ) isRootColl = true; // the main,dmoz and demo collections are root admin only // if ( ! isRootAdmin && isRootColl ) { // g_errno = ENOPERM; // return log("parms: root admin can only change main/dmoz/demo" // " collections."); // } // just knowing the collection name is enough for a cloud user to // modify the collection's parms. however, to modify the master // controls or stuff in g_conf, you have to be root admin. // if ( ! g_conf.m_allowCloudUsers && ! isRootAdmin ) { // //g_errno = ENOPERM; // //return log("parms: permission denied for user"); // return sendPageLogin ( s , r ); // } // get safebuf stored in TcpSocket class SafeBuf *parmList = &s->m_handyBuf; // chuck this in there s->m_pageNum = page; //////// // // the new way to set and distribute parm settings // //////// // . convert http request to list of parmdb records // . will only add parm recs we have permission to modify!!! // . if no collection supplied will just return true with no g_errno if ( //isRootAdmin && ! g_parms.convertHttpRequestToParmList ( r, parmList, page, s)) return g_httpServer.sendErrorReply(s,505,mstrerror(g_errno)); // . add parmList using Parms::m_msg4 to all hosts! // . returns true and sets g_errno on error // . returns false if would block // . just returns true if parmList is empty // . so then doneBroadcastingParms() is called when all hosts // have received the updated parms, unless a host is dead, // in which case he should sync up when he comes back up if ( //isCollAdmin && ! g_parms.broadcastParmList ( parmList , s , // state is socket i guess doneBroadcastingParms ) ) // this would block, so return false return false; // free the mem if we didn't block s->m_handyBuf.purge(); // on error from broadcast, bail here if ( g_errno ) return g_httpServer.sendErrorReply(s,505,mstrerror(g_errno)); // if this is a save & exit request we must log it here because it // will never return in order to log it in HttpServer.cpp // TODO: make this a function we can call. if ( g_conf.m_logHttpRequests && page == PAGE_MASTER ) { //&& pg->m_function==CommandSaveAndExit ) { // get time format: 7/23/1971 10:45:32 time_t tt ;//= getTimeGlobal(); if ( isClockInSync() ) tt = getTimeGlobal(); else tt = getTimeLocal(); struct tm *timeStruct = localtime ( &tt ); char buf[100]; strftime ( buf , 100 , "%b %d %T", timeStruct); // what url refered user to this one? char *ref = r->getReferer(); // skip over http:// in the referer if ( strncasecmp ( ref , "http://" , 7 ) == 0 ) ref += 7; // save ip in case "s" gets destroyed long ip = s->m_ip; logf (LOG_INFO,"http: %s %s %s %s %s", buf,iptoa(ip),r->getRequest(),ref, r->getUserAgent()); } // if we did not block... maybe there were no parms to broadcast return pg->m_function ( s , r ); /* // broadcast request to ALL hosts if we should // should this request be broadcasted? long cast = r->getLong("cast",-1) ; // 0 is the default // UNLESS we are the crawlbot page, john does not send a &cast=1 // on his requests and they LIKELY need to go to each host in the // network like for adding/deleting/resetting collections and updating // coll parms like "alias" and "maxtocrawl" and "maxtoprocess" if ( cast == -1 ) { cast = 0; if ( page == PAGE_CRAWLBOT ) cast = 1; } */ // proxy can only handle certain pages. it has logic in Proxy.cpp // to use the 0xfd msg type to forward certain page requests to // host #0, like // PAGE_ROOT // PAGE_GET // PAGE_RESULTS // PAGE_INJECT // PAGE_REINDEX // PAGE_DIRECTORY // PAGE_ADDURL // so all other pages should be nixed by us here.. unless its // page admin or page master because we need those to adminster // the proxy.. /* if ( page != PAGE_ROOT && page != PAGE_GET && page != PAGE_RESULTS && page != PAGE_INJECT && page != PAGE_REINDEX && page != PAGE_DIRECTORY && page != PAGE_ADDURL && page != PAGE_MASTER && page != PAGE_ADMIN ) { log("admin: page %s not allowed through proxy",pg->m_name ); g_errno = EBADENGINEER; return g_httpServer.sendErrorReply(s,505,mstrerror(g_errno)); } */ //but if we're a proxy don't broadcast //if ( userType == USER_PROXY ) //if ( g_proxy.isProxyRunning() && // (g_conf.isMasterAdmin( s, r ) || g_hostdb.getProxyByIp(s->m_ip)) ) // cast = false; /* if ( g_proxy.isProxy () ) cast = 0; // this only returns true on error. uses msg28 to send the http request // verbatim to all hosts in network, using tcpserver. the spawned msg28 // requests will come through this same path and be identical to this request // but their cast will be "0" this time to break any recursion. if ( cast ) if ( ! broadcastRequest ( s , r , page ) ) return false; // on error from broadcast, bail here, it call sendErrorReply() if ( g_errno ) return g_httpServer.sendErrorReply(s,505,mstrerror(g_errno)); // how did this happen? if ( cast && ! g_errno ) { log(LOG_LOGIC,"admin: broadcast did not block or have error."); return true; } // . if no collection specified, and page depends on collection, error // . allow some pages to use default if no collection explicitly given if ( page > PAGE_OVERVIEW && page != PAGE_TITLEDB && // crawlbot page might just have a token page != PAGE_CRAWLBOT) { char *coll = r->getString("c"); // ensure it exists CollectionRec *cr = g_collectiondb.getRec ( coll ); if ( ! cr ) { if ( ! coll ) coll = ""; log("admin: Invalid collection \"%s\".",coll); return g_httpServer.sendErrorReply(s,505,"No " "collection given."); } } // if this is a save & exit request we must log it here because it // will never return in order to log it in HttpServer.cpp if ( g_conf.m_logHttpRequests && page == PAGE_MASTER ) { //&& pg->m_function==CommandSaveAndExit ) { // get time format: 7/23/1971 10:45:32 time_t tt ;//= getTimeGlobal(); if ( isClockInSync() ) tt = getTimeGlobal(); else tt = getTimeLocal(); struct tm *timeStruct = localtime ( &tt ); char buf[64]; strftime ( buf , 100 , "%b %d %T", timeStruct); // what url refered user to this one? char *ref = r->getReferer(); // skip over http:// in the referer if ( strncasecmp ( ref , "http://" , 7 ) == 0 ) ref += 7; // save ip in case "s" gets destroyed long ip = s->m_ip; logf (LOG_INFO,"http: %s %s %s %s %s", buf,iptoa(ip),r->getRequest(),ref, r->getUserAgent()); } // . we did not have a broadcast, config this host // . this also calls command functions like CommandJustSave() // . commandJustSave and commandJustSaveAndExit has to block // . now, so it can be responsible for calling pg->m_function //if ( userType > USER_PUBLIC ) { // check if user has public page access if ( isLocal ) { //g_users.hasPermission( r, page , s )){ // . this will set various parms // . we know the request came from a host in the cluster // because "isHost" is true. // . this will call CmdJustSave(), etc. too if need be // . this calls the callback pg->m_function() when done! // . if there was a &cast=1 it was have left up above so we // know that this is a &cast=0 request and an endpoint host. if(!g_parms.setFromRequest ( r , //userType, s, pg->m_function)) return false; } // do not call sendPageEvents if not eventwidget //if ( page == PAGE_RESULTS && // ststr ( hostname, "eventwidget.com" ) ) // return sendPageEvents ( s , r ); //if ( page == PAGE_ADDEVENT && // ststr ( hostname, "eventwidget.com" ) ) // return sendPageAddEvent2 ( s , r ); // . these functions MUST always call g_httpServer.sendDynamicPage() // eventually // . returns false if blocked, true otherwise // . sets g_errno on error i think // . false means not called from msg28 return pg->m_function ( s , r ); */ } /* #include "Msg28.h" static Msg28 s_msg28; static TcpSocket *s_s; static HttpRequest s_r; static bool s_locked = false; static long s_page; static void doneWrapper ( void *state ) ; // . all dynamic page requests should call this // . returns false if blocked, true otherwise, // . sets g_errno on error bool Pages::broadcastRequest ( TcpSocket *s , HttpRequest *r , long page ) { // otherwise we may block if ( g_hostdb.m_hostId != 0 ) { log("admin: You can only make config changes from host #0."); g_errno = EBADENGINEER; return true; } // only broadcast one request at a time... for add/del coll really if ( s_locked ) { g_errno = EBADENGINEER; log("admin: Failed to broadcast config change. An " "operation is already in progress."); return true; } // lock it now s_locked = true; // save stuff s_page = page; s_s = s; s_r.copy ( r ); // just a ptr copy really, references s->m_readBuf // . this returns false if blocked // . this removes &cast=1 and adds &cast=0 to the request before sending // to each host in the network if ( ! s_msg28.massConfig ( s_s , &s_r , -1 , NULL , doneWrapper ) ) return false; // did not block s_locked = false; return true; } void doneWrapper ( void *state ) { // release the lock s_locked = false; // . now we can handle the page // . this must call g_httpServer.sendDynamicReply() eventually s_pages[s_page].m_function ( s_s , &s_r ); } */ // certain pages are automatically generated by the g_parms class // because they are menus of configurable parameters for either g_conf // or for a particular CollectionRec record for a collection. bool sendPageGeneric ( TcpSocket *s , HttpRequest *r ) { //long page = g_pages.getDynamicPageNumber ( r ); return g_parms.sendPageGeneric ( s , r );//, page ); } bool Pages::getNiceness ( long page ) { // error out if page number out of range if ( page < 0 || page >= s_numPages ) return 0; return s_pages[page].m_niceness; } /////////////////////////////////////////////////////////// // // Convenient html printing routines // ////////////////////////////////////////////////////////// bool printTopNavButton ( char *text, char *link, bool isHighlighted, char *coll, SafeBuf *sb ) { if ( isHighlighted ) sb->safePrintf( "" "
" "%s" "
" "
" //"
" , link , coll , text ); else sb->safePrintf( "" "
" "%s" "
" //"
" "
" , link , coll , text ); return true; } bool printNavButton ( char *text , char *link , bool isHighlighted , SafeBuf *sb ) { if ( isHighlighted ) sb->safePrintf( "" "
" "%s    " "
" "
" "
" , link , text ); else sb->safePrintf( "" "
" "%s    " "
" "
" "
" , link , text ); return true; } bool Pages::printAdminTop (SafeBuf *sb , TcpSocket *s , HttpRequest *r , char *qs , char* bodyJavascript) { long page = getDynamicPageNumber ( r ); //long user = getUserType ( s , r ); //char *username = g_users.getUsername ( r ); char *username = NULL; //char *coll = r->getString ( "c" ); //if ( ! coll ) coll = "main"; char *coll = g_collectiondb.getDefaultColl(r); //char *pwd = r->getString ( "pwd" ); // get username bool status = true; //User *user = g_users.getUser (username );//,false ); //if ( user ) pwd = user->m_password; char *pwd = NULL; sb->safePrintf("\n"); sb->safePrintf( "\n" "%s | gigablast admin\n" "\n" "\n", s_pages[page].m_name); // print bg colors status &= printColors ( sb, bodyJavascript); // print form to encompass table now //////// // // . the form // //////// // . we cannot use the GET method if there is more than a few k of // parameters, like in the case of the Search Controls page. The // browser simply will not send the request if it is that big. if ( s_pages[page].m_usePost == M_MULTI ) sb->safePrintf ("
tags "ENCTYPE=\"multipart/form-data\" " "action=\"/%s\">\n", s_pages[page].m_filename); else if ( s_pages[page].m_usePost == M_POST ) sb->safePrintf ("\n", s_pages[page].m_filename); else sb->safePrintf ("\n", s_pages[page].m_filename); // pass on this stuff //if ( ! pwd ) pwd = ""; //sb->safePrintf ( "\n",pwd); //if ( ! coll ) coll = ""; sb->safePrintf ( "\n",coll); // sometimes we do not want to be USER_MASTER for testing //if ( user == USER_ADMIN ) { //if ( g_users.hasPermission ( username, PAGE_ADMIN ) ){ // sb->safePrintf("\n"); //} // should any changes be broadcasted to all hosts? //sb->safePrintf ("\n", // (long)s_pages[page].m_cast); // center all //sprintf ( p , "
\n"); //p += gbstrlen ( p ); // table. left column is logo and collection name list. // right column is the other crap. //sb->safePrintf( "" // ""); return true; } bool printGigabotAdvice ( SafeBuf *sb , long page , HttpRequest *hr , char *errMsg ) { char format = hr->getFormat(); if ( format != FORMAT_HTML ) return true; char guide = hr->getLong("guide",0); if ( ! guide ) return true; sb->safePrintf("\n"); // we only show to guest users. if we are logged in as master admin // then skip this step. //if ( hr->isGuestAdmin() ) // return false; // also, only show if running in matt's data cetner //if ( ! g_conf.m_isMattWells ) // return true; // gradient class // yellow box char *box = "
"); // print the logo in upper left corner // this logo sucks, do the new one, a yellow div with a hole in it // for the rocket //status &= printLogo ( sb , coll ); // // DIVIDE INTO TWO PANES, LEFT COLUMN and MAIN COLUMN // sb->safePrintf("" "\n\n"); // // first the nav column // sb->safePrintf("" //""); // // begin the 2nd column of the display // // the controls will go here sb->safePrintf("
" "
" "
" "" "
" "
" "HOME" "
" "
" "
" "
" "
" , GOLD ,coll ); /* sb->safePrintf("


"); sb->safePrintf( "
" ); // collection under that status &= printCollectionNavBar ( sb, page , username , coll,pwd, qs ); */ bool isBasic = false; if ( page == PAGE_BASIC_SETTINGS ) isBasic = true; if ( page == PAGE_BASIC_STATUS ) isBasic = true; //if ( page == PAGE_BASIC_DIFFBOT ) isBasic = true; //if ( page == PAGE_BASIC_SEARCH ) isBasic = true; if ( page == PAGE_COLLPASSWORDS ) isBasic = true; if ( page == PAGE_BASIC_SEARCH ) isBasic = true; //printNavButton ( "BASIC" , "/admin/settings", isBasic , sb ); //printNavButton ( "ADVANCED" , "/admin/master", ! isBasic , sb ); // collections box sb->safePrintf( //"
" "
" ); // collection under that //status&=printCollectionNavBar ( sb, page , username , coll,pwd, qs ); // collection navbar status&=printCollectionNavBar ( sb, page , username, coll,pwd, qs,s,r); // count the statuses long emptyCount = 0; long doneCount = 0; long activeCount = 0; long pauseCount = 0; for (long i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) { CollectionRec *cc = g_collectiondb.m_recs[i]; if ( ! cc ) continue; CrawlInfo *ci = &cc->m_globalCrawlInfo; if ( cc->m_spideringEnabled && ! ci->m_hasUrlsReadyToSpider && ci->m_urlsHarvested ) emptyCount++; else if ( ! ci->m_hasUrlsReadyToSpider ) doneCount++; else if (cc->m_spideringEnabled && ci->m_hasUrlsReadyToSpider ) activeCount++; else if (!cc->m_spideringEnabled && ci->m_hasUrlsReadyToSpider) pauseCount++; } sb->safePrintf("
"); sb->safePrintf("
" "
" "Key" "
" "
" ); sb->safePrintf( "" "● spider is done (%li)" "
" "" "● spider is paused (%li)" "
" "" "● spider is active (%li)" "
" "" "● spider queue empty (%li)" "
" "
" ,doneCount ,pauseCount ,activeCount ,emptyCount ); sb->safePrintf("
" // MDW 9/27/2014: tried to fix that blue border // in MSIE but could not easily make it go away. // seems like the table cell truncates the div's // left border below even if i put a z-index:1000; // on there. // "style=" // "border-color:green;" // "border-left-width:3px;" // "border-style:solid;" // "margin-left:-30px;" // ">" "
" , GOLD , GOLD ); // logout link on far right sb->safePrintf("
" "" // clear the cookie "" "logout" "" "" "
" ); // print the hosts navigation bar status &= printHostLinks ( sb, page , username , pwd , coll, NULL, s->m_ip, qs ); //if ( g_hostdb.getNumHosts() > 1 ) sb->safePrintf("

"); // end table //sb->safePrintf ("

\n");//
\n"); SafeBuf mb; bool added = printRedBox ( &mb , s , r ); // print emergency msg box if ( added ) sb->safePrintf("%s",mb.getBufStart()); // // print breadcrumb. main > Basic > Settings // /* char *menu = "advanced"; if ( isBasic ) menu = "basic"; sb->safePrintf("
"); sb->safePrintf("" "%s > %s > %s " "  " "" "" //"xml " //"json " "

\n", coll, menu, s_pages[page].m_name //,s_pages[page].m_filename , coll //,s_pages[page].m_filename , coll ); */ // print Basic | Advanced links printTopNavButton("BASIC", "/admin/settings", isBasic, // highlighted? coll, sb ); printTopNavButton("ADVANCED", "/admin/master", !isBasic, // highlighted? coll, sb ); sb->safePrintf("
"); // end that yellow/gold div sb->safePrintf(""); // this div will hold the submenu and forms sb->safePrintf( "
" "
" ); // print the menu links under that status &= printAdminLinks ( sb, page , coll , isBasic ); sb->safePrintf("
"); if ( page != PAGE_BASIC_SETTINGS ) return true; // gigabot helper blurb printGigabotAdvice ( sb , page , r , NULL ); // begin 2nd row in big table //sb->safePrintf("
" "
"; char *boxEnd = "
"; char *advice = NULL; if ( page == PAGE_ADDCOLL ) advice = "STEP 1 of 3. " "
" "
" //"Human, I am Gigabot." //"

" "Enter the name of your collection " "(search engine) in the box below then hit " "submit. You can only use alphanumeric characters, " "hyphens or underscores." "
" "
" "Remember this name so you can access the controls " "later." // "Do not deviate from this path or you may " // "be blasted." ; if ( page == PAGE_BASIC_SETTINGS ) advice = "STEP 2 of 3. " "
" "
" "Enter the list of websites you want to be in your " "search engine into the box marked site list " "then click the submit button." // "
" // "
" // "Do not deviate from this path, or, as is always " // "the case, you may " // "be blasted." ; if ( page == PAGE_BASIC_STATUS ) advice = "STEP 3 of 3. " "
" "
" "Ensure you see search results appearing in " "the box below. If not, then you have spider " "problems." "
" "
" "Click on the links in the lower right to expose " "the source code. Copy and paste this code " "into your website to make a search box that " "connects to the search engine you have created. " ; if ( ! advice ) return true; sb->safePrintf("
"); sb->safePrintf("%s",box); // the mean looking robot sb->safePrintf("" "" "" ); if ( errMsg ) sb->safePrintf("%s",errMsg); sb->safePrintf("%s" "" , advice ); sb->safePrintf("%s",boxEnd); sb->safePrintf("

"); return true; } /* bool Pages::printAdminTop2 (SafeBuf *sb , TcpSocket *s , HttpRequest *r , //char *qs ) { char *qs , char *scripts , long scriptsLen ) { long page = getDynamicPageNumber ( r ); //long user = getUserType ( s , r ); char *username =g_users.getUsername(r); char *coll = r->getString ( "c" ); //char *pwd = r->getString ( "pwd" ); long fromIp = s->m_ip; return printAdminTop2 ( sb, page, username, coll, NULL, fromIp , qs , scripts, scriptsLen ); } bool Pages::printAdminTop2 ( SafeBuf *sb , long page , //long user , char *username, char *coll , char *pwd , long fromIp , //char *qs ) { char *qs , char *scripts, long scriptsLen ) { bool status = true; sb->safePrintf( "\n" "\n" "\n" "\n" ); // this allows for inclusion of javascripts and css styles if ( scripts && scriptsLen > 0 ) sb->safeMemcpy( scripts, scriptsLen ); sb->safePrintf( "\n" "Gigablast Admin\n" "\n" "\n" ); // print bg colors status &= printColors3 ( sb ); // master div to align admin-top table(s) sb->safePrintf( "
\n" ); sb->safePrintf( "
\n" ); // center all //sprintf ( p , "
\n"); //p += gbstrlen ( p ); // table sb->safePrintf( "
"); // print the logo in upper left corner status &= printLogo ( sb , coll ); // after logo text //if ( g_users.hasPermission(username,PAGE_QUALITY) ) { // sb->safePrintf( "   " // "Quality Control" ); //} //#ifdef SPLIT_INDEXDB // long split = INDEXDB_SPLIT; //#else // long split = 1; //#endif //long split = g_hostdb.m_indexSplits; // the version info //sb->safePrintf ("
%s", GBVersion ); // . the the hosts // . don't print host buttons if only 1 host //if ( user == USER_MASTER && g_hostdb.m_numHosts > 1 ) { if ( !g_users.hasPermission(username,PAGE_NOHOSTLINKS) ) { // print the hosts navigation bar status &= printHostLinks ( sb, page , username , pwd , coll,NULL, fromIp, qs ); } // end table sb->safePrintf ("


\n"); // print the links status &= printAdminLinks ( sb, page , username , coll , NULL, true ); // collection under that status &= printCollectionNavBar ( sb, page , username , coll ,NULL,qs); // print the links status &= printAdminLinks ( sb, page , username , coll , NULL, false ); sb->safePrintf( "
\n" ); return true; } */ void Pages::printFormTop( SafeBuf *sb, HttpRequest *r ) { long page = getDynamicPageNumber ( r ); // . the form // . we cannot use the GET method if there is more than a few k of // parameters, like in the case of the Search Controls page. The // browser simply will not send the request if it is that big. if ( s_pages[page].m_usePost ) sb->safePrintf ("\n", s_pages[page].m_filename); else sb->safePrintf ("\n", s_pages[page].m_filename); } void Pages::printFormData( SafeBuf *sb, TcpSocket *s, HttpRequest *r ) { long page = getDynamicPageNumber ( r ); //long user = getUserType ( s , r ); //char *username =g_users.getUsername(r); //char *pwd = r->getString ( "pwd" ); char *coll = r->getString ( "c" ); // pass on this stuff //if ( ! pwd ) pwd = ""; //sb->safePrintf ( "\n", pwd); if ( ! coll ) coll = ""; sb->safePrintf ( "\n", coll); // sometimes we do not want to be USER_MASTER for testing //if ( user == USER_ADMIN ) { //if ( g_users.hasPermission( username, PAGE_ADMIN ) ){ // sb->safePrintf( "\n"); //} // should any changes be broadcasted to all hosts? sb->safePrintf ("\n", (long)s_pages[page].m_cast); } /* char *Pages::printAdminBottom ( char *p , char *pend , HttpRequest *r ) { return printAdminBottom ( p , pend ); } char *Pages::printAdminBottom ( char *p , char *pend ) { // update button sprintf ( p, "
" "
" "
\n"); p += gbstrlen ( p ); // end form sprintf ( p, "\n" ); p += gbstrlen ( p ); return p; } */ bool Pages::printAdminBottom ( SafeBuf *sb, HttpRequest *r ) { return printAdminBottom ( sb ); } bool Pages::printSubmit ( SafeBuf *sb ) { // update button return sb->safePrintf ( //"
" "
" "" "
" "
" "\n" ) ; } bool Pages::printAdminBottom ( SafeBuf *sb ) { bool status = true; // update button if ( !sb->safePrintf ( "
" "" "
" "
\n" ) ) status = false; if ( ! sb->safePrintf( "
" // id=pane2 "" "" "\n" "" //"\n" ) ) status = false; // end form if ( ! sb->safePrintf ( "\n\n" ) ) status = false; return status; } bool Pages::printAdminBottom2 ( SafeBuf *sb, HttpRequest *r ) { return printAdminBottom2 ( sb ); } bool Pages::printAdminBottom2 ( SafeBuf *sb ) { bool status = true; sb->safePrintf ( "\n\n\n" ); return status; } /* char *Pages::printTail ( char *p , char *pend , bool isLocal ) { // don't breech the buffer if ( p + 2000 >= pend ) return p; // now print the tail sprintf ( p , //"\n


" "\n
" "
" ); p += gbstrlen ( p ); // return length of bytes we stored return p ; } */ bool Pages::printTail ( SafeBuf* sb, bool isLocal ) { // now print the tail sb->safePrintf ( //"\n


" "\n
" "
" ); // return length of bytes we stored return true ; } bool Pages::printColors ( SafeBuf *sb, char* bodyJavascript ) { // print font and color stuff sb->safePrintf ( "\n" "\n", bodyJavascript); return true; } /* char *Pages::printColors ( char *p , char *pend, char* bodyJavascript ) { // print font and color stuff sprintf ( p , "\n" "\n", bodyJavascript ); p += gbstrlen ( p ); return p; } char *Pages::printColors2 ( char *p , char *pend ) { // print font and color stuff sprintf ( p , "" "\n" "" ); p += gbstrlen ( p ); return p; } */ bool Pages::printColors3 ( SafeBuf *sb ) { // print font and color stuff sb->safePrintf ( "\n" // onLoad=sf()>" ); return true; } /* char *Pages::printFocus ( char *p , char *pend ) { // print the logo in upper right corner sprintf ( p , "\n" ); p += gbstrlen ( p ); return p; } */ bool Pages::printLogo ( SafeBuf *sb, char *coll ) { // print the logo in upper right corner if ( ! coll ) coll = ""; sb->safePrintf ( "" "" "\n",coll); return true; } /* char *Pages::printLogo ( char *p , char *pend , char *coll ) { // print the logo in upper right corner if ( ! coll ) coll = ""; sprintf ( p , "" "" "\n",coll); p += gbstrlen ( p ); return p; } */ bool Pages::printHostLinks ( SafeBuf* sb , long page , char *username , char *password , char *coll , char *pwd , long fromIp , char *qs ) { bool status = true; // ignore if ( ! username ) username = ""; if ( ! password ) { User *user = g_users.getUser (username ); if ( user ) password = user->m_password; } if ( ! password ) password = ""; long total = 0; // add in hosts total += g_hostdb.m_numHosts; // and proxies total += g_hostdb.m_numProxyHosts; // don't print host buttons if only 1 host //if ( total <= 1 ) return status; sb->safePrintf ( //"      " "" "hosts in cluster: "); if ( ! qs ) qs = ""; //if ( ! pwd ) pwd = ""; if ( ! coll ) coll = ""; // print the 64 hosts before and after us long radius = 512;//64; long hid = g_hostdb.m_hostId; long a = hid - radius; long b = hid + radius; long diff ; if ( a < 0 ) { diff = -1 * a; a += diff; b += diff; } if ( b > g_hostdb.m_numHosts ) { diff = b - g_hostdb.m_numHosts; a -= diff; if ( a < 0 ) a = 0; } for ( long i = a ; i < b ; i++ ) { // skip if negative if ( i < 0 ) continue; if ( i >= g_hostdb.m_numHosts ) continue; // get it Host *h = g_hostdb.getHost ( i ); unsigned short port = h->m_httpPort; // use the ip that is not dead, prefer eth0 unsigned long ip = g_hostdb.getBestIp ( h , fromIp ); // convert our current page number to a path char *path = s_pages[page].m_filename; // highlight itself char *ft = ""; char *bt = ""; if ( i == hid && ! g_proxy.isProxy() ) { ft = ""; bt = ""; } // print the link to it sb->safePrintf("%s" "%li%s ", ft,iptoa(ip),port,path, //username,password, coll,qs,i,bt); } // print the proxies for ( long i = 0; i < g_hostdb.m_numProxyHosts; i++ ) { char *ft = ""; char *bt = ""; if ( i == hid && g_proxy.isProxy() ) { ft = ""; bt = ""; } Host *h = g_hostdb.getProxy( i ); unsigned short port = h->m_httpPort; // use the ip that is not dead, prefer eth0 unsigned long ip = g_hostdb.getBestIp ( h , fromIp ); char *path = s_pages[page].m_filename; sb->safePrintf("%s" "proxy%li%s ", ft,iptoa(ip),port,path, //username,password, coll,qs,i,bt); } return status; } // . print the master admin links if "user" is USER_MASTER // . print the collection admin links if "user" is USER_ADMIN bool Pages::printAdminLinks ( SafeBuf *sb, long page , char *coll , bool isBasic ) { bool status = true; // prepare for printing these //if ( ! coll ) coll = ""; //if ( ! pwd ) pwd = ""; CollectionRec *cr = g_collectiondb.getRec ( coll ); // sometimes there are no collections! //if ( ! cr ) return true; //char *coll = ""; //if ( cr ) coll = cr->m_coll; //if ( ! top ) { // // . if no collection do not print anything else // // . no, we accept as legit (print out as "main") // //if ( ! coll[0] ) return status; // if ( g_collectiondb.m_numRecsUsed == 0 ) return status; // //if ( ! g_collectiondb.getRec ( coll ) ) return status; //} //sprintf(p,"\n" ); //p += gbstrlen(p); //sb->safePrintf ("
\n" ); // soemtimes we do not want to be USER_MASTER for testing char buf [ 64 ]; buf[0] = '\0'; //if ( g_users.hasPermission(username,PAGE_ADMIN ) ) // sprintf(buf,"&master=0"); // unfortunately width:100% is percent of the virtual window, not the // visible window... so just try 1000px max sb->safePrintf("
"); //long matt1 = atoip ( MATTIP1 , gbstrlen(MATTIP1) ); //long matt2 = atoip ( MATTIP2 , gbstrlen(MATTIP2) ); for ( long i = PAGE_BASIC_SETTINGS ; i < s_numPages ; i++ ) { // do not print link if no permission for that page //if ( (s_pages[i].m_perm & user) == 0 ) continue; //if ( ! g_users.hasPermission(username,i) ) continue; // do not print Sync link if only one host //if ( i == PAGE_SYNC && g_hostdb.getNumHosts() == 1) continue; // top or bottom //if ( top && i >= PAGE_CGIPARMS ) continue; //if ( ! top && i < PAGE_CGIPARMS ) continue; // skip seo link if ( ! g_conf.m_isMattWells && i == PAGE_SEO ) continue; // skip page autoban link if ( ! g_conf.m_isMattWells && i == PAGE_AUTOBAN ) continue; // is this page basic? bool pageBasic = false; if ( i >= PAGE_BASIC_SETTINGS && i <= PAGE_BASIC_SEARCH ) pageBasic = true; // print basic pages under the basic menu, advanced pages // under the advanced menu... if ( isBasic != pageBasic ) continue; // ignore these for now //if ( i == PAGE_SECURITY ) continue; if ( i == PAGE_ACCESS ) continue; if ( i == PAGE_INDEXDB ) continue; if ( i == PAGE_RULES ) continue; if ( i == PAGE_API ) continue; if ( i == PAGE_SEARCHBOX ) continue; if ( i == PAGE_TITLEDB ) continue; if ( i == PAGE_IMPORT ) continue; // move these links to the coll nav bar on the left if ( i == PAGE_ADDCOLL ) continue; if ( i == PAGE_DELCOLL ) continue; if ( i == PAGE_CLONECOLL ) continue; // put this back in //if ( i == PAGE_HOSTS ) continue; // print "url download" before "inject url" // GET /mycollname_urls.csv /* nah, keep this in basic > status if ( i == PAGE_INJECT ) { sb->safePrintf ( "" "" "" "data downloads" "" "" "" "   \n", coll ); } */ if ( cr && ! cr->m_isCustomCrawl && i == PAGE_CRAWLBOT ) continue; // print it out if ( i == PAGE_LOGIN || i == PAGE_LOGIN2 ) sb->safePrintf( //"" "%s" //"" "   \n",s_pages[i].m_filename, //username,pwd, coll, buf,s_pages[i].m_name); else if ( page == i ) sb->safePrintf( //"" "" "" "" "" "%s" "" "" "" "" //"" "   " "\n" ,s_pages[i].m_filename ,coll ,buf ,s_pages[i].m_name ); else sb->safePrintf( //"" "" "" "" "%s" "" "" "" //"" "   \n" ,s_pages[i].m_filename ,coll ,buf ,s_pages[i].m_name); // print
after the last master admin control /* if ( i == PAGE_DELCOLL && user == USER_MASTER ) { // . if no collection do not print anything else // . no, we accept as legit (print out as "main") //if ( ! coll[0] ) break; if ( g_collectiondb.m_numRecsUsed == 0 ) break; // or if no collection selected, same thing if ( ! coll[0] ) break; sprintf ( p , "

\n"); p += gbstrlen(p); } */ } // print documentation links /* if ( ! isBasic ) sb->safePrintf(" " "" "admin guide" " " "  " " " "dev guide" ); */ sb->safePrintf("
"); //sb->safePrintf("
" ); //sb->safePrintf("
" ); //sb->safePrintf("
" ); return status; } bool Pages::printCollectionNavBar ( SafeBuf *sb , long page , //long user , char *username, char *coll , char *pwd , char *qs , TcpSocket *sock , HttpRequest *hr ) { bool status = true; //if ( ! pwd ) pwd = ""; if ( ! qs ) qs = ""; // if not admin just print collection name if ( g_collectiondb.m_numRecsUsed == 0 ) { sb->safePrintf ( "
" "
No collections found. " "Click add collection to add one." "

\n"); return status; } // if not admin just print collection name //if ( user == USER_ADMIN ) { //if (g_users.hasPermission(username,PAGE_ADMIN) ){ //sb->safePrintf ( "

Collection " // "%s" // "

" , coll ); // return status ; //} // print up to 10 names on there collnum_t collnum = g_collectiondb.getCollnum ( coll ); bool highlight = true; if ( collnum < (collnum_t)0) { highlight = false; collnum=g_collectiondb.getFirstCollnum(); } if ( collnum < (collnum_t)0) return status; long a = collnum; long counta = 1; while ( a > 0 && counta < 15 ) if ( g_collectiondb.m_recs[--a] ) counta++; long b = collnum + 1; long countb = 0; while ( b < g_collectiondb.m_numRecs && countb < 16 ) if ( g_collectiondb.m_recs[b++] ) countb++; char *s = "s"; if ( g_collectiondb.m_numRecsUsed == 1 ) s = ""; bool isRootAdmin = g_conf.isRootAdmin ( sock , hr ); if ( isRootAdmin ) sb->safePrintf ( "
%li Collection%s" "
\n", g_collectiondb.m_numRecsUsed , s ); else sb->safePrintf ( "
Collections" "
\n"); sb->safePrintf( "
" "" "" "add     " "delete     " "clone" "" "" "
" , coll , coll , coll ); char *color = "red"; //if ( page >= PAGE_CGIPARMS ) color = "red"; //else color = "black"; // style for printing collection names sb->safePrintf("\n"); long row = 0; //for ( long i = a ; i < b ; i++ ) { for ( long i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) { CollectionRec *cc = g_collectiondb.m_recs[i]; if ( ! cc ) continue; // // CLOUD SEARCH ENGINE SUPPORT // // if not root admin and collrec's password does not match // the one we are logged in with (in the cookie) then skip it // if ( ! isRootAdmin && // cr->m_password && // ! strcmp(cr->m_password,pwd) ) // continue; char *cname = cc->m_coll; row++; //if ( p + gbstrlen(cname) + 100 >= pend ) return p; // collection name HACK for backwards compatibility //if ( ! cname[0] ) cname = "main"; // every other coll in a darker div if ( (row % 2) == 0 ) sb->safePrintf("
"); sb->safePrintf(""); // print color bullet // green = active // yellow = paused // black = done // gray = empty // red = going but has > 50% errors in last 100 sample. // like timeouts etc. CrawlInfo *ci = &cc->m_globalCrawlInfo; char *bcolor = ""; if ( ! cc->m_spideringEnabled && ci->m_hasUrlsReadyToSpider ) bcolor = "orange";// yellow is too hard to see if ( cc->m_spideringEnabled && ci->m_hasUrlsReadyToSpider ) bcolor = "green"; if ( ! ci->m_hasUrlsReadyToSpider ) bcolor = "black"; // when we first add a url via addurl or inject it will // set hasUrlsReadyToSpider on all hosts to true i think // and Spider.cpp increments urlsharvested. if ( cc->m_spideringEnabled && ! ci->m_hasUrlsReadyToSpider && ci->m_urlsHarvested ) bcolor = "gray"; sb->safePrintf(" ",bcolor); if ( i != collnum || ! highlight )// || ! coll || ! coll[0]) sb->safePrintf ( "%s" "  ", cname, s_pages[page].m_filename, cname , qs, cname ); else sb->safePrintf ( "%s " "  ", cname, color , cname ); sb->safePrintf(""); // every other coll in a darker div if ( (row % 2) == 0 ) sb->safePrintf("
\n"); else sb->safePrintf("
\n"); } //sb->safePrintf ( "

" ); return status; } /* char *Pages::printCollectionNavBar ( char *p , char *pend , long page , //long user , char *username , char *coll , char *pwd , char *qs ) { //if ( ! pwd ) pwd = ""; if ( ! qs ) qs = ""; // if not admin just print collection name if ( g_collectiondb.m_numRecsUsed == 0 ) { sprintf ( p , "
" "
No collections found. " "Click add collection to add one." "

\n"); p += gbstrlen ( p ); return p ; } // if not admin just print collection name //if ( user == USER_ADMIN ) { if (g_users.hasPermission(username,PAGE_ADMIN) ){ sprintf ( p , "

Collection " "%s" "

" , coll ); p += gbstrlen ( p ); return p ; } // print up to 10 names on there collnum_t collnum = g_collectiondb.getCollnum ( coll ); bool highlight = true; if ( collnum < (collnum_t)0) { highlight = false; collnum=g_collectiondb.getFirstCollnum(); } if ( collnum < (collnum_t)0) return p; long a = collnum; long counta = 1; while ( a > 0 && counta < 15 ) if ( g_collectiondb.m_recs[--a] ) counta++; long b = collnum + 1; long countb = 0; while ( b < g_collectiondb.m_numRecs && countb < 16 ) if ( g_collectiondb.m_recs[b++] ) countb++; sprintf ( p , "

Collections:  \n" ); p += gbstrlen ( p ); char *color; if ( page >= PAGE_OVERVIEW ) color = "red"; else color = "black"; for ( long i = a ; i < b ; i++ ) { CollectionRec *cc = g_collectiondb.m_recs[i]; if ( ! cc ) continue; char *cname = cc->m_coll; if ( p + gbstrlen(cname) + 100 >= pend ) return p; // collection name HACK for backwards compatibility //if ( ! cname[0] ) cname = "main"; if ( i != collnum || ! highlight )// || ! coll || ! coll[0]) sprintf ( p , "%s" "  ", s_pages[page].m_filename,cc->m_coll , qs, cname ); else sprintf ( p , "%s " "  ", color , cname ); p += gbstrlen ( p ); } sprintf ( p , "

" ); p += gbstrlen ( p ); return p; } */ /* // print the drop down menu of rulesets used by Sitedb and URL Filters page char *Pages::printRulesetDropDown ( char *p , char *pend , long user , char *cgi , long selectedNum , long subscript ) { // . print pulldown menu of different site filenums // . 0 - default site // . 1 - banned site // . 2 - bad site // . 3 - decent site // . 4 - good site // . 5 - super site if ( subscript <= 0 ) sprintf(p,"\n",cgi,subscript); p += gbstrlen ( p ); // print NONE (PageReindex.cpp uses this one) // if ( selectedNum == -2 ) { sprintf (p,"