open-source-search-engine/gb.conf

# All <, >, " and # characters that are values for a field contained herein
# must be represented as &lt;, &gt;, &#34; and &#035; respectively.

# Mem available to this process. May be exceeded due to fragmentation.
<maxMem>4000000000</>

# Below the various Gigablast databases are configured.
# <*dbMaxTreeMem>          - mem used for holding new recs
# <*dbMaxDiskPageCacheMem> - disk page cache mem for this db
# <*dbMaxCacheMem>         - cache mem for holding single recs
# <*dbSaveCache>           - save the rec cache on exit?
# <*dbMaxCacheAge>         - max age (seconds) for recs in rec cache
# See that Stats page for record counts and stats.

# How many bytes should be used for caching DNS replies?
<dnsMaxCacheMem>128000</>

# A tagdb record assigns a url or site to a ruleset. Each tagdb record is
# about 100 bytes or so.
<tagdbMaxTreeMem>1028000</>
<tagdbMaxPageCacheMem>200000</>

# A catdb record assigns a url or site to DMOZ categories. Each catdb record
# is about 100 bytes.
<catdbMaxTreeMem>1000000</>
<catdbMaxPageCacheMem>25000000</>
<catdbMaxCacheMem>0</>

# Clusterdb caches small records for site clustering and deduping.
<clusterdbMaxTreeMem>1000000</>
<clusterdbSaveCache>0</>

# Max memory for dup vector cache.
<maxVectorCacheMem>10000000</>

# Robotdb caches robot.txt files.
<robotdbMaxCacheMem>128000</>
<robotdbSaveCache>0</>
<linkdbMaxPageCacheMem>0</>
<statsdbMaxTreeMem>5000000</>
<statsdbMaxCacheMem>0</>
<statsdbMaxDiskPageCacheMem>1000000</>

# Maximum bytes of a doc that can be sent before having to read more from disk
<httpMaxSendBufSize>128000</>

# Bytes to use for caching search result pages.
<searchResultsMaxCacheMem>100000</>

# Read only mode does not allow spidering.
<readOnlyMode>0</>

# Controls all spidering for all collections
<spideringEnabled>0</>

# What is the maximum number of web pages the spider is allowed to download
# simultaneously for ALL collections PER HOST?
<maxTotalSpiders>100</>

# Can people use the add url interface to add urls to the index?
<addUrlEnabled>1</>

# Save data in memory to disk after this many minutes have passed without the
# data having been dumped or saved to disk. Use 0 to disable.
<autoSaveFrequency>5</>

# Maximum sockets available to serve incoming HTTP requests. Too many
# outstanding requests will increase query latency. Excess requests will
# simply have their sockets closed.
<maxHttpSockets>100</>

# Maximum sockets available to serve incoming HTTPS requests. Like max http
# sockets, but for secure sockets.
<maxHttpsSockets>100</>

# Identification seen by web servers when the Gigablast spider downloads their
# web pages. It is polite to insert a contact email address here so webmasters
# that experience problems from the Gigablast spider have somewhere to vent.
<spiderUserAgent><![CDATA[GigablastOpenSource/1.0]]></>

# If this is true, gb will send Accept-Encoding: gzip to web servers when
# doing http downloads.
<askForGzippedDocsWhenDownloading>0</>

# How many seconds should we cache a search results page for?
<searchResultsCacheMaxAge>10800</>

# Keep track of ips which do queries, disallow non-customers from hitting us
# too hard.
<autobanIPsWhichViolateTheQueriesPerDayQuotas>0</>

# If a call to a message callback or message handler in the udp server takes
# more than this many milliseconds, then log it. Logs 'udp: Took %lli ms to
# call callback for msgType=0x%hhx niceness=%li'. Use -1 or less to disable
# the logging.
<maxDelayBeforeLoggingACallbackOrHandler>-1</>

# Sends emails to admin if a host goes down.
<sendEmailAlerts>0</>

# Do not send email alerts about dead hosts to anyone except
# sysadmin@gigablast.com between the times given below unless all the twins of
# the dead host are also dead. Instead, wait till after if the host is still
# dead. 
<delayNonCriticalEmailAlerts>0</>

# Email alerts will include the cluster name
<clusterName><![CDATA[unspecified]]></>

# Send an email after a host has not responded to successive pings for this
# many milliseconds.
<sendEmailTimeout>62000</>

# Send email alerts when query success rate goes below this threshold.
# (percent rate between 0.0 and 1.0)
<querySuccessRateThreshold>0.850000</>

# Send email alerts when average query latency goes above this threshold. (in
# seconds)
<averageQueryLatencyThreshold>2.000000</>

# Record this number of query times before calculating average query latency.
<numberOfQueryTimesInAverage>300</>

# At what temperature in Celsius should we send an email alert if a hard drive
# reaches it?
<maxHardDriveTemperature>45</>

# Look for this string in the kernel buffer for sending email alert. Useful
# for detecting some strange hard drive failures that really slow performance.
<errorString1><![CDATA[]]></>

# Look for this string in the kernel buffer for sending email alert. Useful
# for detecting some strange hard drive failures that really slow performance.
<errorString2><![CDATA[]]></>

# Look for this string in the kernel buffer for sending email alert. Useful
# for detecting some strange hard drive failures that really slow performance.
<errorString3><![CDATA[]]></>

# Sends to email address 1 through email server 1.
<sendEmailAlertsToEmail1>0</>

# Sends to email address 1 through email server 1 if any parm is changed.
<sendParmChangeEmailAlertsToEmail1>0</>

# Connects to this server directly when sending email 1 
<emailServer1><![CDATA[10.5.54.47]]></>

# Sends to this address when sending email 1 
<emailAddress1><![CDATA[4081234567@vtext.com]]></>

# The from field when sending email 1 
<fromEmailAddress1><![CDATA[sysadmin@mydomain.com]]></>

# Sends to email address 2 through email server 2.
<sendEmailAlertsToEmail2>0</>

# Sends to email address 2 through email server 2 if any parm is changed.
<sendParmChangeEmailAlertsToEmail2>0</>

# Connects to this server directly when sending email 2 
<emailServer2><![CDATA[mail.mydomain.com]]></>

# Sends to this address when sending email 2 
<emailAddress2><![CDATA[]]></>

# The from field when sending email 2 
<fromEmailAddress2><![CDATA[sysadmin@mydomain.com]]></>

# Sends to email address 3 through email server 3.
<sendEmailAlertsToEmail3>0</>

# Sends to email address 3 through email server 3 if any parm is changed.
<sendParmChangeEmailAlertsToEmail3>0</>

# Connects to this server directly when sending email 3 
<emailServer3><![CDATA[mail.mydomain.com]]></>

# Sends to this address when sending email 3 
<emailAddress3><![CDATA[]]></>

# The from field when sending email 3 
<fromEmailAddress3><![CDATA[sysadmin@mydomain.com]]></>

# IP address of the primary DNS server. Assumes UDP port 53. REQUIRED FOR
# SPIDERING! Use Google's public DNS 8.8.8.8 as default.
<dns0>8.8.8.8</>

# IP address of the secondary DNS server. Assumes UDP port 53. Will be
# accessed in conjunction with the primary dns, so make sure this is always
# up. An ip of 0 means disabled. Google's secondary public DNS is 8.8.4.4.
<dns1>8.8.4.4</>

# All hosts send to these DNSes based on hash of the subdomain to try to split
# DNS load evenly.
<dns2>0.0.0.0</>
<dns3>0.0.0.0</>
<dns4>0.0.0.0</>
<dns5>0.0.0.0</>
<dns6>0.0.0.0</>
<dns7>0.0.0.0</>
<dns8>0.0.0.0</>
<dns9>0.0.0.0</>
<dns10>0.0.0.0</>
<dns11>0.0.0.0</>
<dns12>0.0.0.0</>
<dns13>0.0.0.0</>
<dns14>0.0.0.0</>
<dns15>0.0.0.0</>

# add Ips here to bar them from accessing this gigablast server.
<banIps><![CDATA[]]></>

# add Ips here to give them an infinite query quota.
<allowIps><![CDATA[]]></>

# Don't try to autoban queries that have one of these codes. Also, the code
# must be valid for us to use &uip=IPADDRESS as the IP address of the
# submitter for purposes of autoban AND purposes of addurl daily quotas.
<validCodes><![CDATA[]]></>

# Append extra default parms to queries that match certain substrings. 
# Format: text to match in url, followed by a space, then the list of extra
# parms as they would appear appended to the url.  One match per line.
<extraParms><![CDATA[]]></>

# ban any query that matches this list of substrings.  Must match all
# comma-separated strings on the same line.  ('\n' = OR, ',' = AND)
<banRegex><![CDATA[]]></>

# Any matching password will have administrative access to Gigablast and all
# collections.
# Use <masterPassword> tag.

# Any IPs in this list will have administrative access to Gigablast and all
# collections.
# Use <masterIp> tag.

# Log GET and POST requests received from the http server?
<logHttpRequests>1</>

# Should we log queries that are autobanned? They can really fill up the log.
<logAutobannedQueries>1</>

# If query took this many millliseconds or longer, then log the query and the
# time it took to process.
<logQueryTimeThreshold>5000</>

# Log query reply in proxy, but only for those queries above the time
# threshold above.
<logQueryReply>0</>

# Log status of spidered or injected urls?
<logSpideredUrls>1</>

# Log messages if Gigablast runs out of udp sockets?
<logNetworkCongestion>0</>

# Log messages not related to an error condition, but meant more to give an
# idea of the state of the gigablast process. These can be useful when
# diagnosing problems.
<logInformationalMessages>1</>

# Log it when document not added due to quota breech. Log it when url is too
# long and it gets truncated.
<logLimitBreeches>0</>

# Log various debug messages.
<logDebugAdminMessages>0</>
<logDebugBuildMessages>0</>
<logDebugBuildTimeMessages>0</>
<logDebugDatabaseMessages>0</>
<logDebugDirtyMessages>0</>
<logDebugDiskMessages>0</>
<logDebugDnsMessages>0</>
<logDebugHttpMessages>0</>
<logDebugLoopMessages>0</>
<logDebugLanguageDetectionMessages>0</>
<logDebugLinkInfo>0</>
<logDebugMemMessages>0</>
<logDebugMemUsageMessages>0</>
<logDebugNetMessages>0</>
<logDebugQueryMessages>0</>
<logDebugQuotaMessages>0</>
<logDebugRobotsMessages>0</>
<logDebugSpiderCacheMessages>0</>
<logDebugSpellerMessages>0</>
<logDebugSectionsMessages>0</>
<logDebugSeoInsertMessages>0</>
<logDebugSeoMessages>0</>
<logDebugStatsMessages>0</>
<logDebugSummaryMessages>0</>
<logDebugSpiderMessages>0</>
<logDebugUrlAttempts>0</>
<logDebugSpiderDownloads>0</>
<logDebugFacebook>0</>
<logDebugTagdbMessages>0</>
<logDebugTcpMessages>0</>
<logDebugThreadMessages>0</>
<logDebugTitleMessages>0</>
<logDebugTimedbMessages>0</>
<logDebugTopicMessages>0</>
<logDebugTopDocMessages>0</>
<logDebugUdpMessages>0</>
<logDebugUnicodeMessages>0</>
<logDebugRepairMessages>0</>
<logDebugPubDateExtractionMessages>0</>

# Log various timing related messages.
<logTimingMessagesForBuild>0</>

# Log various timing related messages.
<logTimingMessagesForAdmin>0</>
<logTimingMessagesForDatabase>0</>
<logTimingMessagesForNetworkLayer>0</>
<logTimingMessagesForQuery>0</>

# Log various timing related messages.
<logTimingMessagesForSpcache>0</>
<logTimingMessagesForRelatedTopics>0</>

# Log reminders to the programmer. You do not need this.
<logReminderMessages>0</>

# If enabled, gigablast will repair the rdbs as specified by the parameters
# below. When a particular collection is in repair mode, it can not spider or
# merge titledb files.
<repairModeEnabled>0</>

# Comma or space separated list of the collections to repair or rebuild.
<collectionsToRepairOrRebuild><![CDATA[main]]></>

# In bytes.
<memoryToUseForRepair>300000000</>

# Maximum number of outstanding inject spiders for repair.
<maxRepairSpiders>32</>

# If enabled, gigablast will reinject the content of all title recs into a
# secondary rdb system. That will the primary rdb system when complete.
<fullRebuild>0</>

# If enabled, gigablast will keep the new spiderdb records when doing the full
# rebuild or the spiderdb rebuild.
<keepNewSpiderdbRecs>1</>

# If enabled, gigablast will recycle the link info when rebuilding titledb.
<recycleLinkInfo>0</>

# If enabled, gigablast will rebuild this rdb
<rebuildTitledb>1</>

# If enabled, gigablast will rebuild this rdb
<rebuildPosdb>0</>

# If enabled, gigablast will rebuild this rdb
<rebuildClusterdb>0</>

# If enabled, gigablast will rebuild this rdb
<rebuildSpiderdb>0</>

# If enabled, gigablast will rebuild this rdb
<rebuildLinkdb>0</>

# If disabled, gigablast will skip root urls.
<rebuildRootUrls>1</>

# If disabled, gigablast will skip non-root urls.
<rebuildNonrootUrls>1</>

# When rebuilding spiderdb and scanning it for new spiderdb records, should a
# tagdb lookup be performed? Runs much much faster without it. Will also keep
# the original doc quality and spider priority in tact.
<skipTagdbLookup>0</>
Initial file population. 2013-08-03 00:12:24 +04:00			`# All <, >, " and # characters that are values for a field contained herein`
			`# must be represented as <, >, " and # respectively.`

			`# Mem available to this process. May be exceeded due to fragmentation.`
			`<maxMem>4000000000</>`

			`# Below the various Gigablast databases are configured.`
			`# <*dbMaxTreeMem> - mem used for holding new recs`
			`# <*dbMaxDiskPageCacheMem> - disk page cache mem for this db`
			`# <*dbMaxCacheMem> - cache mem for holding single recs`
			`# <*dbSaveCache> - save the rec cache on exit?`
			`# <*dbMaxCacheAge> - max age (seconds) for recs in rec cache`
			`# See that Stats page for record counts and stats.`

			`# How many bytes should be used for caching DNS replies?`
			`<dnsMaxCacheMem>128000</>`

			`# A tagdb record assigns a url or site to a ruleset. Each tagdb record is`
			`# about 100 bytes or so.`
			`<tagdbMaxTreeMem>1028000</>`
			`<tagdbMaxPageCacheMem>200000</>`

			`# A catdb record assigns a url or site to DMOZ categories. Each catdb record`
			`# is about 100 bytes.`
			`<catdbMaxTreeMem>1000000</>`
			`<catdbMaxPageCacheMem>25000000</>`
			`<catdbMaxCacheMem>0</>`

			`# Clusterdb caches small records for site clustering and deduping.`
			`<clusterdbMaxTreeMem>1000000</>`
			`<clusterdbSaveCache>0</>`

			`# Max memory for dup vector cache.`
			`<maxVectorCacheMem>10000000</>`

			`# Robotdb caches robot.txt files.`
			`<robotdbMaxCacheMem>128000</>`
			`<robotdbSaveCache>0</>`
			`<linkdbMaxPageCacheMem>0</>`
			`<statsdbMaxTreeMem>5000000</>`
			`<statsdbMaxCacheMem>0</>`
			`<statsdbMaxDiskPageCacheMem>1000000</>`

			`# Maximum bytes of a doc that can be sent before having to read more from disk`
			`<httpMaxSendBufSize>128000</>`

			`# Bytes to use for caching search result pages.`
			`<searchResultsMaxCacheMem>100000</>`

			`# Read only mode does not allow spidering.`
			`<readOnlyMode>0</>`

default gb.conf update 2014-01-10 07:59:02 +04:00			`# Controls all spidering for all collections`
thread fixes. if pthread_create fails then keep thread queue and just return. will try to relaunch later. do not count delete keys towards shard rebalance count. 2014-03-16 07:07:02 +04:00			`<spideringEnabled>0</>`
Initial file population. 2013-08-03 00:12:24 +04:00
added diffbot retry rules. added maxTotalSpiders parm for all colls to follow. tried to fix msg 0x00 socket jam up. 2014-01-23 07:57:38 +04:00			`# What is the maximum number of web pages the spider is allowed to download`
			`# simultaneously for ALL collections PER HOST?`
take out confusing function no longer used 2014-01-28 23:10:59 +04:00			`<maxTotalSpiders>100</>`
added diffbot retry rules. added maxTotalSpiders parm for all colls to follow. tried to fix msg 0x00 socket jam up. 2014-01-23 07:57:38 +04:00
default gb.conf update 2014-01-10 07:59:02 +04:00			`# Can people use the add url interface to add urls to the index?`
			`<addUrlEnabled>1</>`

list collections in sidebar. 2014-01-10 09:13:41 +04:00			`# Save data in memory to disk after this many minutes have passed without the`
default gb.conf update 2014-01-10 07:59:02 +04:00			`# data having been dumped or saved to disk. Use 0 to disable.`
default parm updates 2014-01-10 08:07:19 +04:00			`<autoSaveFrequency>5</>`
default gb.conf update 2014-01-10 07:59:02 +04:00
			`# Maximum sockets available to serve incoming HTTP requests. Too many`
			`# outstanding requests will increase query latency. Excess requests will`
			`# simply have their sockets closed.`
			`<maxHttpSockets>100</>`

			`# Maximum sockets available to serve incoming HTTPS requests. Like max http`
			`# sockets, but for secure sockets.`
			`<maxHttpsSockets>100</>`

			`# Identification seen by web servers when the Gigablast spider downloads their`
list collections in sidebar. 2014-01-10 09:13:41 +04:00			`# web pages. It is polite to insert a contact email address here so webmasters`
default gb.conf update 2014-01-10 07:59:02 +04:00			`# that experience problems from the Gigablast spider have somewhere to vent.`
			`<spiderUserAgent><![CDATA[GigablastOpenSource/1.0]]></>`

list collections in sidebar. 2014-01-10 09:13:41 +04:00			`# If this is true, gb will send Accept-Encoding: gzip to web servers when`
			`# doing http downloads.`
default gb.conf update 2014-01-10 07:59:02 +04:00			`<askForGzippedDocsWhenDownloading>0</>`
Initial file population. 2013-08-03 00:12:24 +04:00
default gb.conf update 2014-01-10 07:59:02 +04:00			`# How many seconds should we cache a search results page for?`
			`<searchResultsCacheMaxAge>10800</>`

			`# Keep track of ips which do queries, disallow non-customers from hitting us`
			`# too hard.`
			`<autobanIPsWhichViolateTheQueriesPerDayQuotas>0</>`
Initial file population. 2013-08-03 00:12:24 +04:00
			`# If a call to a message callback or message handler in the udp server takes`
			`# more than this many milliseconds, then log it. Logs 'udp: Took %lli ms to`
			`# call callback for msgType=0x%hhx niceness=%li'. Use -1 or less to disable`
			`# the logging.`
			`<maxDelayBeforeLoggingACallbackOrHandler>-1</>`

			`# Sends emails to admin if a host goes down.`
use cookies to display url filters table. 2013-09-19 00:50:55 +04:00			`<sendEmailAlerts>0</>`
Initial file population. 2013-08-03 00:12:24 +04:00
default gb.conf update 2014-01-10 07:59:02 +04:00			`# Do not send email alerts about dead hosts to anyone except`
			`# sysadmin@gigablast.com between the times given below unless all the twins of`
			`# the dead host are also dead. Instead, wait till after if the host is still`
			`# dead.`
			`<delayNonCriticalEmailAlerts>0</>`

			`# Email alerts will include the cluster name`
			`<clusterName><![CDATA[unspecified]]></>`

			`# Send an email after a host has not responded to successive pings for this`
			`# many milliseconds.`
			`<sendEmailTimeout>62000</>`

			`# Send email alerts when query success rate goes below this threshold.`
			`# (percent rate between 0.0 and 1.0)`
			`<querySuccessRateThreshold>0.850000</>`

			`# Send email alerts when average query latency goes above this threshold. (in`
			`# seconds)`
			`<averageQueryLatencyThreshold>2.000000</>`

			`# Record this number of query times before calculating average query latency.`
default parm updates 2014-01-10 08:07:19 +04:00			`<numberOfQueryTimesInAverage>300</>`
default gb.conf update 2014-01-10 07:59:02 +04:00
			`# At what temperature in Celsius should we send an email alert if a hard drive`
			`# reaches it?`
			`<maxHardDriveTemperature>45</>`

			`# Look for this string in the kernel buffer for sending email alert. Useful`
			`# for detecting some strange hard drive failures that really slow performance.`
			`<errorString1><![CDATA[]]></>`

			`# Look for this string in the kernel buffer for sending email alert. Useful`
			`# for detecting some strange hard drive failures that really slow performance.`
			`<errorString2><![CDATA[]]></>`

			`# Look for this string in the kernel buffer for sending email alert. Useful`
			`# for detecting some strange hard drive failures that really slow performance.`
			`<errorString3><![CDATA[]]></>`
Initial file population. 2013-08-03 00:12:24 +04:00
			`# Sends to email address 1 through email server 1.`
default parm updates 2014-01-10 08:07:19 +04:00			`<sendEmailAlertsToEmail1>0</>`
Initial file population. 2013-08-03 00:12:24 +04:00
			`# Sends to email address 1 through email server 1 if any parm is changed.`
default parm updates 2014-01-10 08:07:19 +04:00			`<sendParmChangeEmailAlertsToEmail1>0</>`
Initial file population. 2013-08-03 00:12:24 +04:00
			`# Connects to this server directly when sending email 1`
			`<emailServer1><![CDATA[10.5.54.47]]></>`

			`# Sends to this address when sending email 1`
default parm updates 2014-01-10 08:07:19 +04:00			`<emailAddress1><![CDATA[4081234567@vtext.com]]></>`
Initial file population. 2013-08-03 00:12:24 +04:00
			`# The from field when sending email 1`
			`<fromEmailAddress1><![CDATA[sysadmin@mydomain.com]]></>`

			`# Sends to email address 2 through email server 2.`
			`<sendEmailAlertsToEmail2>0</>`

			`# Sends to email address 2 through email server 2 if any parm is changed.`
default gb.conf update 2014-01-10 07:59:02 +04:00			`<sendParmChangeEmailAlertsToEmail2>0</>`
Initial file population. 2013-08-03 00:12:24 +04:00
			`# Connects to this server directly when sending email 2`
			`<emailServer2><![CDATA[mail.mydomain.com]]></>`

			`# Sends to this address when sending email 2`
			`<emailAddress2><![CDATA[]]></>`

			`# The from field when sending email 2`
			`<fromEmailAddress2><![CDATA[sysadmin@mydomain.com]]></>`

			`# Sends to email address 3 through email server 3.`
			`<sendEmailAlertsToEmail3>0</>`

			`# Sends to email address 3 through email server 3 if any parm is changed.`
default parm updates 2014-01-10 08:07:19 +04:00			`<sendParmChangeEmailAlertsToEmail3>0</>`
Initial file population. 2013-08-03 00:12:24 +04:00
			`# Connects to this server directly when sending email 3`
			`<emailServer3><![CDATA[mail.mydomain.com]]></>`

			`# Sends to this address when sending email 3`
			`<emailAddress3><![CDATA[]]></>`

			`# The from field when sending email 3`
			`<fromEmailAddress3><![CDATA[sysadmin@mydomain.com]]></>`

default gb.conf update 2014-01-10 07:59:02 +04:00			`# IP address of the primary DNS server. Assumes UDP port 53. REQUIRED FOR`
			`# SPIDERING! Use Google's public DNS 8.8.8.8 as default.`
Initial file population. 2013-08-03 00:12:24 +04:00			`<dns0>8.8.8.8</>`

			`# IP address of the secondary DNS server. Assumes UDP port 53. Will be`
			`# accessed in conjunction with the primary dns, so make sure this is always`
default gb.conf update 2014-01-10 07:59:02 +04:00			`# up. An ip of 0 means disabled. Google's secondary public DNS is 8.8.4.4.`
dns ip fix in gb.conf. 2013-08-20 01:25:37 +04:00			`<dns1>8.8.4.4</>`
default gb.conf update 2014-01-10 07:59:02 +04:00
			`# All hosts send to these DNSes based on hash of the subdomain to try to split`
			`# DNS load evenly.`
Initial file population. 2013-08-03 00:12:24 +04:00			`<dns2>0.0.0.0</>`
			`<dns3>0.0.0.0</>`
			`<dns4>0.0.0.0</>`
			`<dns5>0.0.0.0</>`
			`<dns6>0.0.0.0</>`
			`<dns7>0.0.0.0</>`
			`<dns8>0.0.0.0</>`
			`<dns9>0.0.0.0</>`
			`<dns10>0.0.0.0</>`
			`<dns11>0.0.0.0</>`
			`<dns12>0.0.0.0</>`
			`<dns13>0.0.0.0</>`
			`<dns14>0.0.0.0</>`
			`<dns15>0.0.0.0</>`

			`# add Ips here to bar them from accessing this gigablast server.`
			`<banIps><![CDATA[]]></>`

			`# add Ips here to give them an infinite query quota.`
			`<allowIps><![CDATA[]]></>`

			`# Don't try to autoban queries that have one of these codes. Also, the code`
			`# must be valid for us to use &uip=IPADDRESS as the IP address of the`
			`# submitter for purposes of autoban AND purposes of addurl daily quotas.`
			`<validCodes><![CDATA[]]></>`

			`# Append extra default parms to queries that match certain substrings.`
			`# Format: text to match in url, followed by a space, then the list of extra`
			`# parms as they would appear appended to the url. One match per line.`
			`<extraParms><![CDATA[]]></>`

			`# ban any query that matches this list of substrings. Must match all`
			`# comma-separated strings on the same line. ('\n' = OR, ',' = AND)`
			`<banRegex><![CDATA[]]></>`

security admin fixes 2014-02-12 11:36:09 +04:00			`# Any matching password will have administrative access to Gigablast and all`
			`# collections.`
code checkpoint 2014-02-10 03:41:43 +04:00			`# Use <masterPassword> tag.`

security admin fixes 2014-02-12 11:36:09 +04:00			`# Any IPs in this list will have administrative access to Gigablast and all`
			`# collections.`
			`# Use <masterIp> tag.`
basic admin controls page structure 2014-02-08 11:34:45 +04:00
Initial file population. 2013-08-03 00:12:24 +04:00			`# Log GET and POST requests received from the http server?`
			`<logHttpRequests>1</>`

			`# Should we log queries that are autobanned? They can really fill up the log.`
			`<logAutobannedQueries>1</>`

			`# If query took this many millliseconds or longer, then log the query and the`
			`# time it took to process.`
			`<logQueryTimeThreshold>5000</>`

			`# Log query reply in proxy, but only for those queries above the time`
			`# threshold above.`
			`<logQueryReply>0</>`

			`# Log status of spidered or injected urls?`
			`<logSpideredUrls>1</>`

			`# Log messages if Gigablast runs out of udp sockets?`
			`<logNetworkCongestion>0</>`

			`# Log messages not related to an error condition, but meant more to give an`
			`# idea of the state of the gigablast process. These can be useful when`
			`# diagnosing problems.`
			`<logInformationalMessages>1</>`

			`# Log it when document not added due to quota breech. Log it when url is too`
			`# long and it gets truncated.`
			`<logLimitBreeches>0</>`

			`# Log various debug messages.`
			`<logDebugAdminMessages>0</>`
			`<logDebugBuildMessages>0</>`
			`<logDebugBuildTimeMessages>0</>`
			`<logDebugDatabaseMessages>0</>`
fix url filters bugs. 2013-09-18 22:02:09 +04:00			`<logDebugDirtyMessages>0</>`
Initial file population. 2013-08-03 00:12:24 +04:00			`<logDebugDiskMessages>0</>`
			`<logDebugDnsMessages>0</>`
			`<logDebugHttpMessages>0</>`
			`<logDebugLoopMessages>0</>`
			`<logDebugLanguageDetectionMessages>0</>`
			`<logDebugLinkInfo>0</>`
			`<logDebugMemMessages>0</>`
			`<logDebugMemUsageMessages>0</>`
			`<logDebugNetMessages>0</>`
			`<logDebugQueryMessages>0</>`
			`<logDebugQuotaMessages>0</>`
			`<logDebugRobotsMessages>0</>`
			`<logDebugSpiderCacheMessages>0</>`
			`<logDebugSpellerMessages>0</>`
			`<logDebugSectionsMessages>0</>`
more formatting 2014-01-19 13:09:38 +04:00			`<logDebugSeoInsertMessages>0</>`
Initial file population. 2013-08-03 00:12:24 +04:00			`<logDebugSeoMessages>0</>`
			`<logDebugStatsMessages>0</>`
			`<logDebugSummaryMessages>0</>`
more bug fixes with notification system. 2013-10-10 02:28:15 +04:00			`<logDebugSpiderMessages>0</>`
Initial file population. 2013-08-03 00:12:24 +04:00			`<logDebugUrlAttempts>0</>`
			`<logDebugSpiderDownloads>0</>`
			`<logDebugFacebook>0</>`
			`<logDebugTagdbMessages>0</>`
			`<logDebugTcpMessages>0</>`
			`<logDebugThreadMessages>0</>`
			`<logDebugTitleMessages>0</>`
			`<logDebugTimedbMessages>0</>`
			`<logDebugTopicMessages>0</>`
			`<logDebugTopDocMessages>0</>`
			`<logDebugUdpMessages>0</>`
			`<logDebugUnicodeMessages>0</>`
			`<logDebugRepairMessages>0</>`
			`<logDebugPubDateExtractionMessages>0</>`

			`# Log various timing related messages.`
			`<logTimingMessagesForBuild>0</>`

			`# Log various timing related messages.`
			`<logTimingMessagesForAdmin>0</>`
			`<logTimingMessagesForDatabase>0</>`
			`<logTimingMessagesForNetworkLayer>0</>`
			`<logTimingMessagesForQuery>0</>`

			`# Log various timing related messages.`
			`<logTimingMessagesForSpcache>0</>`
			`<logTimingMessagesForRelatedTopics>0</>`

			`# Log reminders to the programmer. You do not need this.`
			`<logReminderMessages>0</>`

			`# If enabled, gigablast will repair the rdbs as specified by the parameters`
			`# below. When a particular collection is in repair mode, it can not spider or`
			`# merge titledb files.`
			`<repairModeEnabled>0</>`

			`# Comma or space separated list of the collections to repair or rebuild.`
			`<collectionsToRepairOrRebuild><![CDATA[main]]></>`

			`# In bytes.`
			`<memoryToUseForRepair>300000000</>`

			`# Maximum number of outstanding inject spiders for repair.`
			`<maxRepairSpiders>32</>`

			`# If enabled, gigablast will reinject the content of all title recs into a`
			`# secondary rdb system. That will the primary rdb system when complete.`
			`<fullRebuild>0</>`

			`# If enabled, gigablast will keep the new spiderdb records when doing the full`
			`# rebuild or the spiderdb rebuild.`
			`<keepNewSpiderdbRecs>1</>`

			`# If enabled, gigablast will recycle the link info when rebuilding titledb.`
			`<recycleLinkInfo>0</>`

			`# If enabled, gigablast will rebuild this rdb`
			`<rebuildTitledb>1</>`

			`# If enabled, gigablast will rebuild this rdb`
			`<rebuildPosdb>0</>`

			`# If enabled, gigablast will rebuild this rdb`
			`<rebuildClusterdb>0</>`

			`# If enabled, gigablast will rebuild this rdb`
			`<rebuildSpiderdb>0</>`

			`# If enabled, gigablast will rebuild this rdb`
			`<rebuildLinkdb>0</>`

			`# If disabled, gigablast will skip root urls.`
			`<rebuildRootUrls>1</>`

			`# If disabled, gigablast will skip non-root urls.`
			`<rebuildNonrootUrls>1</>`

			`# When rebuilding spiderdb and scanning it for new spiderdb records, should a`
			`# tagdb lookup be performed? Runs much much faster without it. Will also keep`
			`# the original doc quality and spider priority in tact.`
			`<skipTagdbLookup>0</>`