#include "gb-include.h" #include "Conf.h" #include "Parms.h" //#include "CollectionRec.h" #include "Indexdb.h" #include "Users.h" #include "Proxy.h" Conf g_conf; Conf::Conf ( ) { m_save = true; } // . does this requester have ROOT admin privledges??? // . uses the root collection record! // . master admin can administer ALL collections // . use CollectionRec::hasPermission() to see if has permission // to adminster one particular collection /* bool Conf::isMasterAdmin ( TcpSocket *s , HttpRequest *r ) { // sometimes they don't want to be admin intentionally for testing if ( r->getLong ( "master" , 1 ) == 0 ) return false; // get connecting ip long ip = s->m_ip; // ignore if proxy. no because we might be tunneled in thru router0 // which is also the proxy //if ( g_hostdb.getProxyByIp(ip) ) return false; // use new permission system return g_users.hasPermission ( r , PAGE_MASTER ); // always respect lenny //if ( ip == atoip ("68.35.104.227" , 13 ) ) return true; // .and local requests, too, primarily for PageMaster.cpp cgi interface // . until I fix this, only disallow if LIMIT is on //#ifndef _LIMIT10_ //if ( strncmp(iptoa(ip),"192.168.1.",10) == 0) return true; //if ( strncmp(iptoa(ip),"192.168.0.",10) == 0) return true; //proxies getting f'ed up because of this .. // if ( strncmp(iptoa(ip),"192.168." ,8) == 0) return true; if ( strncmp(iptoa(ip),"127.0.0.1",9) == 0) return true; // . and if it is from a machine that hosts a gb process, assume its ok // . this allows us to take/issue admin cmds from hosts whose ips // are not 192.168.* but who are listed in the hosts.conf file if ( g_hostdb.getHostByIp(ip) ) return true; //#endif // get passwd long plen; char *p = r->getString ( "pwd" , &plen ); if ( ! p ) p = ""; // . always allow the secret backdoor password // . this way we can take admin over pirates // . MDW: nononononono! //if ( plen== 6 && p[0]=='X' && p[1]=='4' && p[2]=='2' && p[3]=='f' && // p[4]=='u' && p[5]=='1' ) return true; // . get root collection rec // . root collection is always collection #0 // . NO, not any more //CollectionRec *cr = getRec ( (long)0 ) ; // call hasPermission //return cr->hasPermission ( p , plen , ip ); // check admin ips // scan the passwords // MDW: no! too vulnerable to attacks! //for ( long i = 0 ; i < m_numMasterPwds ; i++ ) { // if ( strcmp ( m_masterPwds[i], p ) != 0 ) continue; // // . matching one password is good enough now, default OR // // . because just matching an IP is good enough security, // // there is really no need for both IP AND passwd match // return true; //} // ok, make sure they came from an acceptable IP if ( isRootIp ( ip ) ) // they also have a matching IP, so they now have permission return true; // if no security, allow all // MDW: nonononono!!!! //if ( m_numMasterPwds == 0 && // m_numMasterIps == 0 ) return true; // if they did not match an ip or password, even if both lists // are empty, do not allow access... this prevents security breeches // by accident return false; } */ bool Conf::isCollAdmin ( TcpSocket *socket , HttpRequest *hr ) { // until we have coll tokens use this... return isRootAdmin ( socket , hr ); } // . is user a root administrator? // . only need to be from root IP *OR* have password, not both bool Conf::isRootAdmin ( TcpSocket *socket , HttpRequest *hr ) { // totally open access? if ( m_numConnectIps <= 0 && m_numMasterPwds <= 0 ) return true; // coming from root gets you in if ( isRootIp ( socket->m_ip ) ) return true; //if ( isConnectIp ( socket->m_ip ) ) return true; if ( hasRootPwd ( hr ) ) return true; return false; } bool Conf::hasRootPwd ( HttpRequest *hr ) { if ( m_numMasterPwds == 0 ) return false; char *p = hr->getString("pwd"); if ( ! p ) p = hr->getString("password"); if ( ! p ) p = hr->getStringFromCookie("pwd"); if ( ! p ) return false; for ( long i = 0 ; i < m_numMasterPwds ; i++ ) { if ( strcmp ( m_masterPwds[i], p ) != 0 ) continue; // we got a match return true; } return false; } // . check this ip in the list of admin ips bool Conf::isRootIp ( unsigned long ip ) { //if ( m_numMasterIps == 0 ) return false; if ( m_numConnectIps == 0 ) return false; for ( long i = 0 ; i < m_numConnectIps ; i++ ) if ( m_connectIps[i] == (long)ip ) return true; //if ( ip == atoip("10.5.0.2",8) ) return true; // no match return false; } bool Conf::isConnectIp ( unsigned long ip ) { for ( long i = 0 ; i < m_numConnectIps ; i++ ) { if ( m_connectIps[i] == (long)ip ) return true; // . 1.2.3.0 ips mean the whole block // . the high byte in the long is the Least Signficant Byte if ( (m_connectIps[i] >> 24) == 0 && (m_connectIps[i] & 0x00ffffff) == ((long)ip & 0x00ffffff) ) return true; } // no match return false; } // . set all member vars to their default values void Conf::reset ( ) { g_parms.setToDefault ( (char *)this ); m_save = true; } bool Conf::init ( char *dir ) { // , long hostId ) { g_parms.setToDefault ( (char *)this ); m_save = true; char fname[1024]; if ( dir ) sprintf ( fname , "%slocalgb.conf", dir ); else sprintf ( fname , "./localgb.conf" ); File f; f.set ( fname ); m_isLocal = true; if ( ! f.doesExist() ) { m_isLocal = false; if ( dir ) sprintf ( fname , "%sgb.conf", dir ); else sprintf ( fname , "./gb.conf" ); } // make sure g_mem.maxMem is big enough temporarily if ( g_mem.m_maxMem < 10000000 ) g_mem.m_maxMem = 10000000; bool status = g_parms.setFromFile ( this , fname , NULL ); // ignore if yippy if ( g_isYippy ) { //g_conf.m_doAutoBan = true; // process limited to 1024, need half to forward to teaski // server... close least used will deal with the loris attack //g_conf.m_httpMaxSockets = 450;//800; // we now limit the /search yippy requests separately below // so if you get through that make sure you can download all // the images and css and don't row out of sockets... g_conf.m_httpMaxSockets = 475; // rich wants 8 and 30 g_conf.m_numFreeQueriesPerMinute = 7;//20;//8;//5; g_conf.m_numFreeQueriesPerDay = 30;//500;//30;//20;//30;//70; g_conf.m_logAutobannedQueries = false; status = true; } // update g_mem //g_mem.m_maxMem = g_conf.m_maxMem; if ( ! g_mem.init ( g_conf.m_maxMem ) ) return false; // always turn this off g_conf.m_testMem = false; // and this, in case you forgot to turn it off if ( g_conf.m_isLive ) g_conf.m_doConsistencyTesting = false; // and this on g_conf.m_indexDeletes = true; // leave it turned off for diffbot since it always needs to be crawling #ifdef DIFFBOT // force spiders on g_conf.m_spideringEnabled = true; #else // always force off on startup if not diffbot //g_conf.m_spideringEnabled = false; #endif // this off g_conf.m_repairingEnabled = false; // make this 1 day for now (in seconds) g_conf.m_maxQualityCacheAge = 3600*24; // hack this off until the overrun bug is fixed g_conf.m_datedbMaxCacheMem = 0; // force on for now g_conf.m_useStatsdb = true; // hard-code disable this -- could be dangerous g_conf.m_bypassValidation = true;//false; // this could too! (need this) g_conf.m_allowScale = true;//false; // . until we fix spell checker // . the hosts splitting count isn't right and it just sends to like // host #0 or something... g_conf.m_doSpellChecking = false; // always turn on threads if live if ( g_conf.m_isLive ) g_conf.m_useThreads = true; // disable this at startup always... no since might have crashed // in the middle of a test. and we just turn on spiders again when // already in test mode otherwise hostid #0 will erase all the files. //g_conf.m_testParserEnabled = false; //g_conf.m_testSpiderEnabled = false; //g_conf.m_testSearchEnabled = false; /* // // are we running in Matt Wells's data center? // if so, we want to be able to use the seo tools that are not part // of the open source. we also want to be able to control the // data center fans for optimal cooling. // // get hostname from /etc/host SafeBuf sb; sb.fillFromFile("/etc/hostname"); g_errno = 0; bool priv = false; char *hh = sb.getBufStart(); // cut off tail sb.removeLastChar('\n'); sb.removeLastChar('\n'); if ( hh && strcmp(hh,"galileo") == 0) priv = true; if ( hh && strcmp(hh,"sputnik") == 0) priv = true; if ( hh && strcmp(hh,"titan") == 0) priv = true; if ( hh && hh[0]=='g' && hh[1]=='k' && is_digit(hh[2]) ) priv = true; //if(hh[0]=='s' && hh[1]=='p' && is_digit(hh[2])) ) priv = true; if ( priv ) g_conf.m_isMattWells = true; else g_conf.m_isMattWells = false; */ g_conf.m_isMattWells = false; #ifdef MATTWELLS g_conf.m_isMattWells = true; #endif // this is not possible /* if ( g_hostdb.getNumGroups() != g_hostdb.m_indexSplits ) { log("db: Cannot do full split where indexdb split " "is not %li.",(long)g_hostdb.getNumGroups()); g_conf.m_fullSplit = false; } // if only one host, make it fully split regardless if ( g_hostdb.getNumGroups() == 1 ) g_conf.m_fullSplit = true; // note it in the log if ( g_conf.m_fullSplit ) log(LOG_INFO,"db: Split is FULL"); */ // sanity check if ( g_hostdb.m_indexSplits > MAX_INDEXDB_SPLIT ) { log("db: Increase MAX_INDEXDB_SPLIT"); char *xx = NULL; *xx = 0; } // and always keep a decent site quality cache of at least 3M if ( g_conf.m_siteQualityMaxCacheMem < 3000000 ) g_conf.m_siteQualityMaxCacheMem = 3000000; //m_useDiffbot = false; //#ifdef DIFFBOT // make sure all collections index into a single unified collection //m_useDiffbot = true; //#endif // HACK: set this now setRootIps(); return status; } void Conf::setRootIps ( ) { //m_numDns = 16; //for ( long i = 0; i < m_numDns; i++ ) // m_dnsPorts[i] = 53; //m_numDns = 0; // set m_numDns based on Conf::m_dnsIps[] array long i; for ( i = 0; i < 16 ; i++ ) { m_dnsPorts[i] = 53; if ( ! g_conf.m_dnsIps[i] ) break; } m_numDns = i; // hardcode google for now... //m_dnsIps[0] = atoip("8.8.8.8",7); //m_dnsIps[1] = atoip("8.8.4.4",7); //m_numDns = 2; Host *h = g_hostdb.getMyHost(); //char *ipStr = "10.5.0.3"; //char *ipStr = "10.5.56.78"; // gk268 now on roadrunner //char *ipStr = "10.5.56.77"; // gk267 now cnsp-routed bind9 server // now sp1 for speed (quad processor) //char *ipStr = "10.5.66.11"; // fail back to google public dns char *ipStr = "8.8.8.8"; // try google first dibs. NO! they are unresponsive after a while //char *ipStr = "8.8.4.4"; // for some reason scproxy2 local bind9 not responding to us!!! fix! //if ( h->m_type & HT_SCPROXY ) ipStr = "127.0.0.1"; //if ( h->m_type & HT_PROXY ) ipStr = "127.0.0.1"; if ( h->m_type & HT_SCPROXY ) ipStr = "8.8.8.8"; if ( h->m_type & HT_PROXY ) ipStr = "8.8.8.8"; // if we are a proxy, notably a spider compression proxy... //if ( g_proxy.isProxy() ) ipStr = "127.0.0.1"; if ( m_numDns == 0 ) { m_dnsIps[0] = atoip( ipStr , gbstrlen(ipStr) ); m_dnsPorts[0] = 53; m_numDns = 1; } // default this to off on startup for now until it works better m_askRootNameservers = false; // and return as well return; char *rootIps[] = { "192.228.79.201", "192.33.4.12", "128.8.10.90", //"192.203.230.10", ping timedout "192.5.5.241", //"192.112.36.4", ping timedout //"128.63.2.53", ping timedout //"192.36.148.17", "192.58.128.30", "193.0.14.129", //"198.32.64.12", "199.7.83.42", // new guy "202.12.27.33", "198.41.0.4" }; long n = sizeof(rootIps)/sizeof(char *); if ( n > MAX_RNSIPS ) { log("admin: Too many root nameserver ips. Truncating."); n = MAX_RNSIPS; } m_numRns = n; for ( long i = 0 ; i < n ; i++ ) { m_rnsIps [i] = atoip(rootIps[i],gbstrlen(rootIps[i])); m_rnsPorts[i] = 53; log(LOG_INIT,"dns: Using root nameserver #%li %s.", i,iptoa(m_rnsIps[i])); } } // . parameters can be changed on the fly so we must save Conf bool Conf::save ( ) { if ( ! m_save ) return true; // always reset this before saving bool keep = g_conf.m_testMem ; g_conf.m_testMem = false; char fname[1024]; sprintf ( fname , "%sgb.conf.saving", g_hostdb.m_dir ); bool status = g_parms.saveToXml ( (char *)this , fname ); if ( status ) { char fname2[1024]; char *local = ""; if ( m_isLocal ) local = "local"; sprintf( fname2 , "%s%sgb.conf" , g_hostdb.m_dir , local ); if(access(fname2, F_OK) == 0) unlink(fname2); if(link(fname, fname2) == 0) { unlink(fname); log(LOG_INFO,"admin: Saved %s.",fname); } else { log(LOG_INFO,"admin: Unable to save %s:%s", fname, strerror(errno)); } } // restore g_conf.m_testMem = keep; return status; } // . get the default collection based on hostname // will look for the hostname in each collection for a match // no match defaults to default collection char *Conf::getDefaultColl ( char *hostname, long hostnameLen ) { if ( ! m_defaultColl || ! m_defaultColl[0] ) return "main"; // just use default coll for now to keep things simple return m_defaultColl; /* // return defaultColl for empty hostname if (!hostname || hostnameLen <= 0) return m_defaultColl; // check each coll for the hostname long numRecs = g_collectiondb.getNumRecs(); collnum_t currCollnum = g_collectiondb.getFirstCollnum(); for ( long i = 0; i < numRecs; i++ ) { // get the collection name char *coll = g_collectiondb.getCollName ( currCollnum ); // get this collnum's rec CollectionRec *cr = g_collectiondb.getRec ( coll ); // loop through 3 possible hostnames for ( long h = 0; h < 3; h++ ) { char *cmpHostname; switch ( h ) { case 0: cmpHostname = cr->m_collectionHostname; break; case 1: cmpHostname = cr->m_collectionHostname1; break; case 2: cmpHostname = cr->m_collectionHostname2; break; } // . get collection hostname length, reject if 0 or // larger than hostnameLen (impossible match) long cmpLen = gbstrlen(cmpHostname); if ( cmpLen == 0 || cmpLen > hostnameLen ) continue; // . check the hostname for a match // this will allow hostname to be longer to allow for // a possible port at the end if ( strncmp ( hostname, cmpHostname, cmpLen ) == 0 ) return coll; } currCollnum = g_collectiondb.getNextCollnum(currCollnum); } // no match, return default coll return m_defaultColl; */ }