open-source-search-engine/Conf.h
mwells c216f7b2a7 use 48 bit url hash for lock keys again.
query reindex recs can just use their
prob docids as fake uh48s. we need it so we
can avoid the fakedb record and just use
the spider reply to trigger a 5-second
lock expiration. a little simpler. added
logdebugspiderwait for waiting tree debugging.
fixed per ip spider limiting. fixed losing
spiders down blackhole from updateCrawlInfo.
check UrlLock::m_confirmed when counting outstanding
spiders on one ip since may have a lock on one host
but not get granted on all! it calls
confirmLockAcquisition() when it gets fully granted
the lock so it can set UrlLock::confirmed.
2013-09-29 00:09:46 -06:00

812 lines
24 KiB
C++

// Copyright Matt Wells, Apr 2001
// . every host has a config record
// . like tagdb, record in 100% xml
// . allows remote configuration of hosts through Msg4 class
// . remote user sends some xml, we set our member vars using that xml
// . when we save to disk we convert our mem vars to xml
// . is global so everybody can see it
// . conf record can be changed by director OR with the host's priv key
// . use Conf remotely to get setup info about a specific host
// . get your local ip/port/groupMask/etc. from this class not HostMap
#ifndef _CONF_H_
#define _CONF_H_
//#include "../../rsa/rsa.h" // for private_key and public_key types
#include "Xml.h" // Xml class
#include "File.h" // File class
#include "ip.h" // atoip()
#include "Hostdb.h" // g_hostdb.makeGroupId(),makeGroupMask()
#include "HttpRequest.h"
#include "TcpSocket.h"
#include "Url.h" // MAX_COLL_LEN
#include "Collectiondb.h"
#define MAX_MASTER_IPS 15
#define MAX_MASTER_PASSWORDS 10
#define USERAGENTMAXSIZE 128
#define PASSWORD_MAX_LEN 12
#define MAX_CONNECT_IPS 128
#define AUTOBAN_TEXT_SIZE (32*8192)
#define MAX_DNSIPS 16
#define MAX_RNSIPS 13
#define MAX_MX_LEN 128
#define MAX_EMAIL_LEN 64
#define USERS_TEXT_SIZE 500000
#define MAX_GEOCODERS 4
class Conf {
public:
Conf();
bool isMasterAdmin ( class TcpSocket *s , class HttpRequest *r );
bool isSpamAssassin ( class TcpSocket *s , class HttpRequest *r );
bool isAdminIp ( unsigned long ip );
bool isConnectIp ( unsigned long ip );
// loads conf parms from this file "{dir}/gb.conf"
bool init ( char *dir );
void setRootIps();
// set from a buffer of null-terminated xml
bool add ( char *xml );
// saves any changes to the conf file
bool save ( );
// reset all values to their defaults
void reset();
// verify that some values are ok
bool verify();
// . get the default collection based on hostname
// will look for the hostname in each collection for a match
// no match defaults to default collection
char *getDefaultColl ( char *hostname, long hostnameLen );
// hold the filename of this conf file
char m_confFilename[256];
// general info
//bool m_isTrustedNet;
//char m_dir[256]; // our mattster root working dir
//long m_ip; // now in hostdb conf file
//bool m_isTrusted; // is the whole network trusted?
//private_key m_privKey; // our private key for this host
// max amount of memory we can use
long long m_maxMem;
// if this is false, we do not save, used by dump routines
// in main.cpp so they can change parms here and not worry about
// a core dump saving them
char m_save;
//director info (optional) (used iff m_isTrustedNet is false)
//public_key m_dirPubKey; // everyone should know director's pub key
//private_key m_dirPrivKey; // this is 0 if we don't know it
// . external ip of our firewall/router/...
// . regular users use this to connect
// . Host::m_externalIp/Port is used by admin
// . Host::m_ip/port is for machine to machine communication or
// if admin is coming from a local machine
//unsigned long m_mainExternalIp;
//unsigned short m_mainExternalPort;
// . our group info
//long m_hostId; // our hostId
//long m_numGroups;
//unsigned long m_groupId; // hi bits are set before low bits
//unsigned long m_groupMask; // hi bits are set before low bits
// the main directory
//char m_dir[256];
// an additional strip directory on a different drive
char m_stripeDir[256];
char m_defaultColl [ MAX_COLL_LEN + 1 ];
char m_dirColl [ MAX_COLL_LEN + 1];
char m_dirHost [ MAX_URL_LEN ];
char m_clusterName[32];
// . dns parameters
// . dnsDir should hold our saved cached (TODO: save the dns cache)
//short m_dnsClientPort;
long m_numDns ;
long m_dnsIps[MAX_DNSIPS];
short m_dnsPorts[MAX_DNSIPS];
long m_dnsMaxCacheMem;
bool m_dnsSaveCache;
long m_geocoderIps[MAX_GEOCODERS];
long m_wikiProxyIp;
long m_wikiProxyPort;
// built-in dns parameters using name servers
char m_askRootNameservers;
long m_numRns;
long m_rnsIps[MAX_RNSIPS];
short m_rnsPorts[MAX_RNSIPS];
// log absolute filename
//char m_logFilename[256];
// hostdb absolute conf filename
//char m_hostdbFilename[256];
// used to limit all rdb's to one merge per machine at a time
long m_mergeBufSize;
// tagdb parameters
long m_tagdbMaxTreeMem;
long m_tagdbMaxDiskPageCacheMem;
//long m_tagdbMaxCacheMem;
//bool m_tagdbUseSeals;
//long m_tagdbMinFilesToMerge;
//bool m_tagdbSaveCache;
// catdb parameters
long m_catdbMaxTreeMem;
long m_catdbMaxDiskPageCacheMem;
long m_catdbMaxCacheMem;
long m_catdbMinFilesToMerge;
long m_revdbMaxTreeMem;
long m_timedbMaxTreeMem;
// titledb parameters
//long m_titledbMaxTreeMem; // why isn't this used?
//long m_titledbMaxCacheMem;
//long m_titledbMinFilesToMerge;
//long m_titledbMaxCacheAge;
//bool m_titledbSaveCache;
// clusterdb for site clustering, each rec is 16 bytes
long m_clusterdbMaxTreeMem;
//long m_clusterdbMaxCacheMem;
//long m_clusterdbMaxDiskPageCacheMem;
long m_clusterdbMinFilesToMerge;
bool m_clusterdbSaveCache;
// if this is true, all collections index into the "main" collection
// but keep their own spiderdb in their collection.
//bool m_useDiffbot;
//bool m_indexEventsOnly;
// linkdb for storing linking relations
long m_linkdbMaxTreeMem;
// long m_linkdbMaxCacheMem;
long m_linkdbMaxDiskPageCacheMem;
long m_linkdbMinFilesToMerge;
// bool m_linkdbSaveCache;
// dup vector cache max mem
long m_maxVectorCacheMem;
// checksumdb for doc deduping, each rec is 12-16 bytes
//long m_checksumdbMaxTreeMem;
//long m_checksumdbMaxCacheMem;
//long m_checksumdbMaxDiskPageCacheMem;
//long m_checksumdbMinFilesToMerge;
// size of Checksumdb keys for this host
//long m_checksumdbKeySize;
//bool m_checksumdbSaveCache;
// for holding urls that have been entered into the spider queue
//long m_tfndbMaxTreeMem ;
long m_tfndbMaxDiskPageCacheMem ; // for the DiskPageCache class only
//long m_tfndbMinFilesToMerge;
//bool m_tfndbSaveCache;
//long long m_tfndbMaxUrls;
long m_maxCpuThreads;
long m_deadHostTimeout;
long m_sendEmailTimeout;
long m_pingSpacer;
// the spiderdb holds url records for spidering, when to spider, etc..
long m_maxWriteThreads ;
//long m_spiderdbMaxTreeMem ;
//long m_spiderdbMaxCacheMem ;
//long m_spiderdbMaxDiskPageCacheMem ;
//long m_spiderdbMinFilesToMerge;
long m_spiderMaxDiskThreads ;
long m_spiderMaxBigDiskThreads ; // > 1M read
long m_spiderMaxMedDiskThreads ; // 100k - 1M read
long m_spiderMaxSmaDiskThreads ; // < 100k read
long m_queryMaxDiskThreads ;
long m_queryMaxBigDiskThreads ; // > 1M read
long m_queryMaxMedDiskThreads ; // 100k - 1M read
long m_queryMaxSmaDiskThreads ; // < 100k per read
// categorize the disk read sizes by these here
long m_bigReadSize;
long m_medReadSize;
long m_smaReadSize;
long m_statsdbMaxTreeMem;
long m_statsdbMaxCacheMem;
long m_statsdbMaxDiskPageCacheMem;
//long m_statsdbMinFilesToMerge;
bool m_useStatsdb;
//bool m_statsdbSnapshots;
//bool m_statsdbPageEnabled;
//long m_spiderdbRootUrlPriority; // 0-7
//long m_spiderdbAddUrlPriority ;
//long m_minSpiderPriority ; // min spiderRec priority to spider
//long m_maxSpidersPerDomain ; // per foreign domain
//long m_maxRespiderWait ; // in seconds to re-spider a page
//long m_minRespiderWait ; // in seconds to re-spider a page
// this is now in the root collection record
//long m_maxNumSpiders ; // per local spider host
bool m_spideringEnabled ;
bool m_turkingEnabled ;
//bool m_webSpideringEnabled;
//bool m_facebookSpideringEnabled;
//bool m_stubHubSpideringEnabled;
//bool m_eventBriteSpideringEnabled;
//bool m_refreshFacebookUsersEnabled;
//bool m_injectionEnabled ;
// qa testing loop going on? uses "test" subdir
bool m_testParserEnabled ;
bool m_testSpiderEnabled ;
//bool m_doDocIdRangeSplitting ;
bool m_testSearchEnabled ;
//bool m_spiderLoggingEnabled ;
//bool m_logWarnings ; // generally small problems
//bool m_logCongestion ; // ENOSLOTS
bool m_addUrlEnabled ; // TODO: use at http interface level
bool m_adFeedEnabled ;
//bool m_timingDebugEnabled ;
//bool m_threadDebugEnabled ;
//bool m_httpServerEnabled ;// don't allow seo bots on all machines
bool m_doStripeBalancing ;
// . true if the server is on the production cluster
// . we enforce the 'elvtune -w 32 /dev/sd?' cmd on all drives because
// that yields higher performance when dumping/merging on disk
bool m_isLive;
// is this a buzzlogic cluster?
//bool m_isBuzzLogic;
// is this a wikipedia cluster?
bool m_isWikipedia;
//bool m_spiderLinks ;
//bool m_dedupingEnabled ; // dedup content on same mid domain
//long m_retryNum ; // how many times to retry url b4 nuke
//bool m_useIfModifiedSince ;
//bool m_doUrlSpamCheck ; // disallow urls w/ naughty hostnames
//bool m_timeBetweenUrls ; // for urls from same domain only
// for holding robot.txt files for various hostnames
long m_robotdbMaxCacheMem ;
bool m_robotdbSaveCache;
// indexdb has a max cached age for getting IndexLists (10 mins deflt)
long m_indexdbMaxTreeMem ;
long m_indexdbMaxCacheMem;
long m_indexdbMaxDiskPageCacheMem; // for DiskPageCache class only
long m_indexdbMaxIndexListAge;
long m_indexdbTruncationLimit;
long m_indexdbMinFilesToMerge;
bool m_indexdbSaveCache;
long m_datedbMaxTreeMem ;
long m_datedbMaxCacheMem;
long m_datedbMaxDiskPageCacheMem; // for DiskPageCache class only
long m_datedbMaxIndexListAge;
long m_datedbTruncationLimit;
long m_datedbMinFilesToMerge;
bool m_datedbSaveCache;
// for caching exact quotas in Msg36.cpp
//long m_quotaTableMaxMem;
//bool m_useBuckets;
// port of the main udp server
short m_udpPort;
// TODO: parse these out!!!!
//char m_httpRootDir[256] ;
//short m_httpPort ; now in hosts.conf only
long m_httpMaxSockets ;
long m_httpsMaxSockets ;
//long m_httpMaxReadBufSize ;
long m_httpMaxSendBufSize ;
//long m_httpMaxDownloadSockets ;
// a search results cache (for Msg40)
long m_searchResultsMaxCacheMem ;
long m_searchResultsMaxCacheAge ; // in seconds
bool m_searchResultsSaveCache;
// a sitelinkinfo cache (for Msg25)
long m_siteLinkInfoMaxCacheMem;
long m_siteLinkInfoMaxCacheAge;
bool m_siteLinkInfoSaveCache;
// a sitelinkinfo cache (for MsgD)
long m_siteQualityMaxCacheMem;
long m_siteQualityMaxCacheAge;
bool m_siteQualitySaveCache;
// a sitelinkinfo cache (for Msg25)
// for downloading an rdb
//long m_downloadBufSize; // how big should hosts read buf be?
// . how many incoming links should we sample?
// . used for linkText and quality weighting from number of links
// and their total base quality
long m_maxIncomingLinksToSample;
// phrase weighting
float m_queryPhraseWeight;
// for Weights.cpp
long m_sliderParm;
//long m_indexTableIntersectionAlgo;
// . maxmimum relative weight of a query term (1.0 to inf)
// . default about 8?
//float m_queryMaxMultiplier;
// send emails when a host goes down?
bool m_sendEmailAlerts;
//should we delay when only 1 host goes down out of twins till 9 30 am?
bool m_delayNonCriticalEmailAlerts;
//delay emails after
char m_delayEmailsAfter[6];
//delay emails before
char m_delayEmailsBefore[6];
//bool m_sendEmailAlertsToMattTmobile;
//bool m_sendEmailAlertsToMattAlltell;
//bool m_sendEmailAlertsToJavier;
//bool m_sendEmailAlertsToMelissa;
//bool m_sendEmailAlertsToPartap;
//bool m_sendEmailAlertsToCinco;
bool m_sendEmailAlertsToSysadmin;
//bool m_sendEmailAlertsToZak;
bool m_sendEmailAlertsToEmail1;
char m_email1MX[MAX_MX_LEN];
char m_email1Addr[MAX_EMAIL_LEN];
char m_email1From[MAX_EMAIL_LEN];
bool m_sendEmailAlertsToEmail2;
char m_email2MX[MAX_MX_LEN];
char m_email2Addr[MAX_EMAIL_LEN];
char m_email2From[MAX_EMAIL_LEN];
bool m_sendEmailAlertsToEmail3;
char m_email3MX[MAX_MX_LEN];
char m_email3Addr[MAX_EMAIL_LEN];
char m_email3From[MAX_EMAIL_LEN];
bool m_sendEmailAlertsToEmail4;
char m_email4MX[MAX_MX_LEN];
char m_email4Addr[MAX_EMAIL_LEN];
char m_email4From[MAX_EMAIL_LEN];
//bool m_sendEmailAlertsToSabino;
char m_errstr1[MAX_URL_LEN];
char m_errstr2[MAX_URL_LEN];
char m_errstr3[MAX_URL_LEN];
char m_sendParmChangeAlertsToEmail1;
char m_sendParmChangeAlertsToEmail2;
char m_sendParmChangeAlertsToEmail3;
char m_sendParmChangeAlertsToEmail4;
float m_avgQueryTimeThreshold;
//float m_maxQueryTime;
float m_querySuccessThreshold;
long m_numQueryTimes;
long m_maxCorruptLists;
// limit to how big a serialized query can be before just storing
// the raw string instead, keeps network traffic down at the expense
// of processing time, used by Msg serialization
long m_maxSerializedQuerySize;
// the spider won't go if this bandiwdth rate is currently exceeded
float m_maxIncomingKbps;
// max pgs/sec to index and delete from index. guards resources.
float m_maxPagesPerSecond;
float m_maxLoadAvg;
// redhat 9's NPTL doesn't like our async signals
bool m_allowAsyncSignals;
// if in read-only mode we do no spidering and load no saved trees
// so we can use all mem for caching index lists
bool m_readOnlyMode;
// if this is true we use /etc/hosts for hostname lookup before dns
bool m_useEtcHosts;
bool m_useMergeToken;
// . should we always read data from local machine if available?
// . if your network is not gigabit, this may be a good idea
bool m_preferLocalReads;
// should we bypass load balancing and always send titledb record
// lookup requests to a host to maxmize tfndb page cache hits?
//bool m_useBiasedTfndb;
// calls fsync(fd) if true after each write
bool m_flushWrites ;
bool m_verifyWrites;
long m_corruptRetries;
// log unfreed memory on exit
bool m_detectMemLeaks;
// . if false we will not keep spelling information in memory
// . we will keep the popularity info from dict though, since related
// topics requires that
bool m_doSpellChecking;
// . give suggestions to narrow the search
bool m_doNarrowSearch;
// are we running in Matt Wells's private data center? if so we
// use seo tools and control datacenter fans, etc.
bool m_isMattWells;
// maximum number of synonyms/stems to expand a word into
//long m_maxSynonyms;
// default affinity for spelling suggestions/numbers
//float m_defaultAffinity;
// threshold for synonym usage
//float m_frequencyThreshold;
// thesaurus configuration
//long m_maxAffinityRequests;
//long m_maxAffinityErrors;
//long m_maxAffinityAge;
//long m_affinityTimeout;
//char m_affinityServer[MAX_URL_LEN];
//char m_affinityParms[MAX_URL_LEN];
// new syncing information
bool m_syncEnabled;
bool m_syncIndexdb;
bool m_syncTitledb;
bool m_syncSpiderdb;
//bool m_syncChecksumdb;
bool m_syncSitedb;
bool m_syncLogging;
bool m_syncDoUnion;
bool m_syncDryRun;
char m_syncHostIds [ 256 ]; // restrict syncing to these host ids
//long m_syncReadBufSize; // limit disk activity for syncing
//long m_syncSeeksPerSecond; // limit disk activity for syncing
long m_syncBytesPerSecond; // limit disk activity for syncing
// if this is true we do not add indexdb keys that *should* already
// be in indexdb. but if you recently upped the m_truncationLimit
// then you can set this to false to add all indexdb keys.
//bool m_onlyAddUnchangedTermIds;
bool m_doIncrementalUpdating;
// always true unless entire indexdb was deleted and we are rebuilding
bool m_indexDeletes;
bool m_splitTwins;
bool m_useThreads;
bool m_useSHM;
bool m_useQuickpoll;
bool m_useDiskPageCacheIndexdb;
bool m_useDiskPageCachePosdb;
bool m_useDiskPageCacheDatedb;
bool m_useDiskPageCacheTitledb;
bool m_useDiskPageCacheSpiderdb;
bool m_useDiskPageCacheTfndb;
bool m_useDiskPageCacheTagdb;
bool m_useDiskPageCacheChecksumdb;
bool m_useDiskPageCacheClusterdb;
bool m_useDiskPageCacheCatdb;
bool m_useDiskPageCacheLinkdb;
//bool m_quickpollCoreOnError;
bool m_useShotgun;
bool m_testMem;
bool m_doConsistencyTesting;
// temporary hack for fixing docid collision resolution bug
bool m_hackFixWords;
bool m_hackFixPhrases;
// flags for excluding docs with only linktext or meta text matches
// for one or more query terms
//bool m_excludeLinkText;
//bool m_excludeMetaText;
// deny robots access to the search results
//bool m_robotCheck;
// scan all titledb files if we can't find the rec where it should be
bool m_scanAllIfNotFound;
// defaults to "Gigabot/1.0"
char m_spiderUserAgent [ USERAGENTMAXSIZE ];
long m_autoSaveFrequency;
long m_docCountAdjustment;
bool m_profilingEnabled;
bool m_dynamicPerfGraph;
long m_minProfThreshold;
bool m_sequentialProfiling;
long m_realTimeProfilerMinQuickPollDelta;
//long m_summaryMode; // JAB: moved to CollectionRec
// . for query-dependent summary/title generation
//long m_titleMaxLen;
//long m_summaryMaxLen;
//long m_summaryMaxNumLines;
//long m_summaryMaxNumCharsPerLine;
//long m_summaryDefaultNumLines;
//char m_summaryFrontHighlightTag[128];
//char m_summaryBackHighlightTag [128];
//
// See Log.h for an explanation of the switches below
//
// GET and POST requests.
bool m_logHttpRequests;
bool m_logAutobannedQueries;
//bool m_logQueryTimes;
// if query took this or more milliseconds, log its time
long m_logQueryTimeThreshold;
bool m_logQueryReply;
bool m_logQueryDebug;
// log what gets into the index
bool m_logSpideredUrls;
// log informational messages, they are not indicative of any error.
bool m_logInfo;
// when out of udp slots
bool m_logNetCongestion;
// doc quota limits, url truncation limits
bool m_logLimits;
// log debug switches
bool m_logDebugAddurl ;
bool m_logDebugAdmin ;
bool m_logDebugBuild ;
bool m_logDebugBuildTime ;
bool m_logDebugDb ;
bool m_logDebugDirty ;
bool m_logDebugDisk ;
bool m_logDebugDns ;
bool m_logDebugDownloads;
bool m_logDebugFacebook;
bool m_logDebugHttp ;
bool m_logDebugLoop ;
bool m_logDebugLang ;
bool m_logDebugLinkInfo ;
bool m_logDebugMem ;
bool m_logDebugMemUsage;
bool m_logDebugMerge ;
bool m_logDebugNet ;
bool m_logDebugPQR ; // post query rerank
bool m_logDebugQuery ;
bool m_logDebugQuota ;
bool m_logDebugRobots ;
bool m_logDebugSpcache ; // SpiderCache.cpp debug
bool m_logDebugSpiderFlow;
bool m_logDebugSpiderWait;
bool m_logDebugSpeller ;
bool m_logDebugTagdb ;
bool m_logDebugSections;
bool m_logDebugSEO;
bool m_logDebugSEOInserts;
bool m_logDebugStats ;
bool m_logDebugSummary ;
bool m_logDebugSpider ;
bool m_logDebugUrlAttempts ;
bool m_logDebugTcp ;
bool m_logDebugThread ;
bool m_logDebugTimedb ;
bool m_logDebugTitle ;
bool m_logDebugTopics ;
bool m_logDebugTopDocs ;
bool m_logDebugUdp ;
bool m_logDebugUnicode ;
bool m_logDebugRepair ;
bool m_logDebugDate ;
// expensive timing messages
bool m_logTimingAddurl ;
bool m_logTimingAdmin ;
bool m_logTimingBuild;
bool m_logTimingDb;
bool m_logTimingNet;
bool m_logTimingQuery;
bool m_logTimingSpcache;
bool m_logTimingTopics;
// programmer reminders.
bool m_logReminders;
long m_numMasterPwds;
char m_masterPwds[MAX_MASTER_PASSWORDS][PASSWORD_MAX_LEN];
long m_numMasterIps;
long m_masterIps[MAX_MASTER_IPS];
long m_numConnectIps;
long m_connectIps [ MAX_CONNECT_IPS ];
// should we generate similarity/content vector for titleRecs lacking?
// this takes a ~100+ ms, very expensive, so it is just meant for
// testing.
bool m_generateVectorAtQueryTime;
//Users
char m_users [ USERS_TEXT_SIZE ];
long m_usersLen;
char m_superTurks [ USERS_TEXT_SIZE ];
long m_superTurksLen;
long m_maxYippyOut;
char m_doAutoBan;
long m_banIpsLen;
char m_banIps [ AUTOBAN_TEXT_SIZE ];
long m_allowIpsLen;
char m_allowIps [ AUTOBAN_TEXT_SIZE ];
long m_validCodesLen;
char m_validCodes[ AUTOBAN_TEXT_SIZE ];
long m_banRegexLen;
char m_banRegex [ AUTOBAN_TEXT_SIZE ];
long m_extraParmsLen;
char m_extraParms [ AUTOBAN_TEXT_SIZE ];
unsigned char m_numFreeQueriesPerMinute;
unsigned long m_numFreeQueriesPerDay;
char m_redirect[MAX_URL_LEN];
char m_useCompressionProxy;
char m_gzipDownloads;
// used by proxy to make proxy point to the temp cluster while
// the original cluster is updated
char m_useTmpCluster;
char m_timeSyncProxy;
// For remote datafeed verification
//char m_useDFAcctServer;
//long m_dfAcctIp;
//long m_dfAcctPort;
//char m_dfAcctColl[MAX_COLL_LEN];
Xml m_xml;
char m_buf[10*1024];
long m_bufSize;
// . for specifying if this is an interface machine
// messages are rerouted from this machine to the main
// cluster set in the hosts.conf.
bool m_interfaceMachine;
// after we take the natural log of each query term's DF (doc freq.)
// we
float m_queryExp;
//char m_useDynamicPhraseWeighting;
float m_minPopForSpeller; // 0% to 100%
// catdb min site rec size for LARGE but latent domains
long m_catdbMinRecSizes;
// allow scaling up of hosts by removing recs not in the correct
// group. otherwise a sanity check will happen.
char m_allowScale;
// . timeout on dead hosts, only set when we know a host is dead and
// will not come back online. Messages will timeout on the dead
// host, but not error, allowing outstanding spidering to finish
// to the twin
char m_giveupOnDeadHosts;
char m_bypassValidation;
long m_maxHardDriveTemp;
long m_maxHeartbeatDelay;
long m_maxCallbackDelay;
// balance value for Msg6, each host can have this many ready domains
// per global host
//long m_distributedSpiderBalance;
//long m_distributedIpWait;
// parameters for indexdb spitting and tfndb extension bits
//long m_indexdbSplit;
//char m_fullSplit;
//char m_legacyIndexdbSplit;
//long m_tfndbExtBits;
// used by Repair.cpp
char m_repairingEnabled ;
long m_maxRepairSpiders ;
long m_repairMem;
char m_collsToRepair[1024];
char m_fullRebuild ;
char m_fullRebuildKeepNewSpiderRecs;
char m_rebuildRecycleLinkInfo ;
//char m_rebuildRecycleLinkInfo2 ;
//char m_removeBadPages ;
char m_rebuildTitledb ;
//char m_rebuildTfndb ;
//char m_rebuildIndexdb ;
char m_rebuildPosdb ;
//char m_rebuildNoSplits ;
//char m_rebuildDatedb ;
//char m_rebuildChecksumdb ;
char m_rebuildClusterdb ;
char m_rebuildSpiderdb ;
//char m_rebuildSitedb ;
char m_rebuildLinkdb ;
//char m_rebuildTagdb ;
//char m_rebuildPlacedb ;
char m_rebuildTimedb ;
char m_rebuildSectiondb ;
//char m_rebuildRevdb ;
char m_rebuildRoots ;
char m_rebuildNonRoots ;
char m_rebuildSkipSitedbLookup ;
// for caching the qualities of urls (see Msg20.cpp)
long m_maxQualityCacheAge ;
};
extern class Conf g_conf;
#endif
// old stuff:
// key is the hostId. hostId of -1 is the default conf record.
// here's the recognized fields:
// <dirPubKey> // default rec only
// <groupMask> // default rec only
// <rootDir> // default rec only
// <numPolice> // default rec only
// <isTrustedNet> // default rec only
// <hostId> -- stored in hostmap
// <ip> -- stored in hostmap
// <port> -- stored in hostmap
// <networkName> // also in default rec
// <maxDiskSpace>
// <maxMem>
// <maxCpu>
// <maxBps>
// <pubKey>
// <isTrustedHost> // director sealed