mirror of
https://github.com/plausible/analytics.git
synced 2024-11-22 10:43:38 +03:00
4498 lines
106 KiB
YAML
4498 lines
106 KiB
YAML
###############
|
||
# Device Detector - The Universal Device Detection library for parsing User Agents
|
||
#
|
||
# @link https://matomo.org
|
||
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
|
||
###############
|
||
|
||
- regex: 'WireReaderBot(?:/([\d+.]+))?'
|
||
name: 'WireReaderBot'
|
||
category: 'Feed Fetcher'
|
||
url: 'https://wirereader.app/'
|
||
|
||
- regex: 'monitoring360bot'
|
||
name: '360 Monitoring'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.360monitoring.io'
|
||
producer:
|
||
name: 'Plesk International GmbH'
|
||
url: 'https://www.plesk.com'
|
||
|
||
- regex: 'Cloudflare-Healthchecks'
|
||
name: 'Cloudflare Health Checks'
|
||
category: 'Service Agent'
|
||
url: 'https://developers.cloudflare.com/health-checks/'
|
||
producer:
|
||
name: 'CloudFlare'
|
||
url: 'https://www.cloudflare.com/'
|
||
|
||
- regex: '360Spider'
|
||
name: '360Spider'
|
||
category: 'Search bot'
|
||
url: 'https://www.so.com/help/help_3_2.html'
|
||
producer:
|
||
name: 'Online Media Group, Inc.'
|
||
url: ''
|
||
|
||
- regex: 'Aboundex'
|
||
name: 'Aboundexbot'
|
||
category: 'Search bot'
|
||
url: 'http://www.aboundex.com/crawler/'
|
||
producer:
|
||
name: 'Aboundex.com'
|
||
url: 'http://www.aboundex.com'
|
||
|
||
- regex: 'AcoonBot'
|
||
name: 'Acoon'
|
||
category: 'Search bot'
|
||
url: 'http://www.acoon.de/robot.asp'
|
||
producer:
|
||
name: 'Acoon GmbH'
|
||
url: 'http://www.acoon.de'
|
||
|
||
- regex: 'AddThis\.com'
|
||
name: 'AddThis.com'
|
||
category: 'Social Media Agent'
|
||
url: ''
|
||
producer:
|
||
name: 'Clearspring Technologies, Inc.'
|
||
url: 'http://www.clearspring.com'
|
||
|
||
- regex: 'AhrefsBot'
|
||
name: 'aHrefs Bot'
|
||
category: 'Crawler'
|
||
url: 'https://ahrefs.com/robot'
|
||
producer:
|
||
name: 'Ahrefs Pte Ltd'
|
||
url: 'https://ahrefs.com/robot'
|
||
|
||
- regex: 'AhrefsSiteAudit/[\d.]+'
|
||
name: 'AhrefsSiteAudit'
|
||
category: 'Site Monitor'
|
||
url: 'https://ahrefs.com/robot/site-audit'
|
||
producer:
|
||
name: 'Ahrefs Pte Ltd'
|
||
url: 'https://ahrefs.com/'
|
||
|
||
- regex: 'ia_archiver|alexabot|verifybot'
|
||
name: 'Alexa Crawler'
|
||
category: 'Search bot'
|
||
url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
|
||
producer:
|
||
name: 'Alexa Internet'
|
||
url: 'https://www.alexa.com'
|
||
|
||
- regex: 'alexa site audit'
|
||
name: 'Alexa Site Audit'
|
||
category: 'Site Monitor'
|
||
url: 'https://support.alexa.com/hc/en-us/articles/200450194'
|
||
producer:
|
||
name: 'Alexa Internet'
|
||
url: 'https://www.alexa.com'
|
||
|
||
- regex: 'Amazonbot/[\d.]+'
|
||
name: 'Amazon Bot'
|
||
category: 'Crawler'
|
||
url: 'https://developer.amazon.com/support/amazonbot'
|
||
producer:
|
||
name: 'Amazon.com, Inc.'
|
||
url: 'https://www.amazon.com/'
|
||
|
||
- regex: 'AmazonAdBot/[\d.]+'
|
||
name: 'Amazon AdBot'
|
||
category: 'Crawler'
|
||
url: 'https://adbot.amazon.com/'
|
||
producer:
|
||
name: 'Amazon.com, Inc.'
|
||
url: 'https://www.amazon.com/'
|
||
|
||
- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
|
||
name: 'Amazon Route53 Health Check'
|
||
category: 'Service Agent'
|
||
producer:
|
||
name: 'Amazon Web Services'
|
||
url: 'https://aws.amazon.com/'
|
||
|
||
- regex: 'AmorankSpider'
|
||
name: 'Amorank Spider'
|
||
category: 'Crawler'
|
||
url: 'http://amorank.com/webcrawler.html'
|
||
producer:
|
||
name: 'Amorank'
|
||
url: 'http://www.amorank.com'
|
||
|
||
- regex: 'ApacheBench'
|
||
name: 'ApacheBench'
|
||
category: 'Benchmark'
|
||
url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
|
||
producer:
|
||
name: 'The Apache Software Foundation'
|
||
url: 'https://www.apache.org/foundation/'
|
||
|
||
- regex: 'Applebot'
|
||
name: 'Applebot'
|
||
category: 'Crawler'
|
||
url: 'https://support.apple.com/en-us/HT204683'
|
||
producer:
|
||
name: 'Apple Inc'
|
||
url: 'https://www.apple.com'
|
||
|
||
- regex: 'AppSignalBot'
|
||
name: 'AppSignalBot'
|
||
category: 'Site Monitor'
|
||
url: 'https://docs.appsignal.com/uptime-monitoring/'
|
||
producer:
|
||
name: 'AppSignal'
|
||
url: 'https://appsignal.com/'
|
||
|
||
- regex: 'Arachni'
|
||
name: 'Arachni'
|
||
category: 'Security Checker'
|
||
url: 'https://www.arachni-scanner.com/'
|
||
producer:
|
||
name: 'Sarosys LLC'
|
||
url: 'https://www.sarosys.com/'
|
||
|
||
- regex: 'AspiegelBot'
|
||
name: 'AspiegelBot'
|
||
category: 'Crawler'
|
||
url: 'https://aspiegel.com/'
|
||
producer:
|
||
name: 'Huawei'
|
||
url: 'https://www.huawei.com/'
|
||
|
||
- regex: 'Castro 2, Episode Duration Lookup'
|
||
name: 'Castro 2'
|
||
category: 'Service Agent'
|
||
url: 'http://supertop.co/castro/'
|
||
producer:
|
||
name: 'Supertop'
|
||
url: 'http://supertop.co'
|
||
|
||
- regex: 'Curious George'
|
||
name: 'Analytics SEO Crawler'
|
||
category: 'Crawler'
|
||
url: 'http://www.analyticsseo.com/crawler'
|
||
producer:
|
||
name: 'Analytics SEO'
|
||
url: 'http://www.analyticsseo.com'
|
||
|
||
- regex: 'archive\.org_bot|special_archiver'
|
||
name: 'archive.org bot'
|
||
category: 'Crawler'
|
||
url: 'https://archive.org/details/archive.org_bot'
|
||
producer:
|
||
name: 'The Internet Archive'
|
||
url: 'https://archive.org'
|
||
|
||
- regex: 'Ask Jeeves/Teoma'
|
||
name: 'Ask Jeeves'
|
||
category: 'Search bot'
|
||
url: ''
|
||
producer:
|
||
name: 'Ask Jeeves Inc.'
|
||
url: 'http://www.ask.com'
|
||
|
||
- regex: 'Backlink-Check\.de'
|
||
name: 'Backlink-Check.de'
|
||
category: 'Crawler'
|
||
url: 'http://www.backlink-check.de/bot.html'
|
||
producer:
|
||
name: 'Mediagreen Medienservice'
|
||
url: 'http://www.backlink-check.de'
|
||
|
||
- regex: 'BacklinkCrawler'
|
||
name: 'BacklinkCrawler'
|
||
category: 'Crawler'
|
||
url: 'http://www.backlinktest.com/crawler.html'
|
||
producer:
|
||
name: '2.0Promotion GbR'
|
||
url: 'http://www.backlinktest.com'
|
||
|
||
- regex: 'Baidu.*spider|baidu Transcoder'
|
||
name: 'Baidu Spider'
|
||
category: 'Search bot'
|
||
url: 'http://www.baidu.com/search/spider.htm'
|
||
producer:
|
||
name: 'Baidu'
|
||
url: 'http://www.baidu.com'
|
||
|
||
- regex: 'BazQux'
|
||
name: 'BazQux Reader'
|
||
url: 'https://bazqux.com/fetcher'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'Better Uptime Bot'
|
||
name: 'Better Uptime Bot'
|
||
category: 'Site Monitor'
|
||
url: 'https://betteruptime.com/faq'
|
||
producer:
|
||
name: 'Better Uptime'
|
||
url: 'https://betteruptime.com/'
|
||
|
||
- regex: 'MSNBot|msrbot|bingbot|bingadsbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
|
||
name: 'BingBot'
|
||
category: 'Search bot'
|
||
url: 'http://search.msn.com/msnbot.htmn'
|
||
producer:
|
||
name: 'Microsoft Corporation'
|
||
url: 'http://www.microsoft.com'
|
||
|
||
- regex: 'Blekkobot'
|
||
name: 'Blekkobot'
|
||
category: 'Search bot'
|
||
url: 'http://blekko.com/about/blekkobot'
|
||
producer:
|
||
name: 'Blekko'
|
||
url: 'http://blekko.com'
|
||
|
||
- regex: 'BLEXBot'
|
||
name: 'BLEXBot Crawler'
|
||
category: 'Crawler'
|
||
url: 'http://webmeup-crawler.com'
|
||
producer:
|
||
name: 'WebMeUp'
|
||
url: 'http://webmeup.com'
|
||
|
||
- regex: 'Bloglovin'
|
||
name: 'Bloglovin'
|
||
url: 'http://www.bloglovin.com'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'Blogtrottr'
|
||
name: 'Blogtrottr'
|
||
url: ''
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: 'Blogtrottr Ltd'
|
||
url: 'https://blogtrottr.com/'
|
||
|
||
- regex: 'BoardReader Blog Indexer'
|
||
name: 'BoardReader Blog Indexer'
|
||
category: 'Crawler'
|
||
producer:
|
||
name: 'BoardReader'
|
||
url: 'https://boardreader.com/'
|
||
|
||
- regex: 'BountiiBot'
|
||
name: 'Bountii Bot'
|
||
category: 'Search bot'
|
||
url: 'http://bountii.com/contact.php'
|
||
producer:
|
||
name: 'Bountii Inc.'
|
||
url: 'http://bountii.com'
|
||
|
||
- regex: 'Browsershots'
|
||
name: 'Browsershots'
|
||
category: 'Service Agent'
|
||
url: 'http://browsershots.org/faq'
|
||
producer:
|
||
name: 'Browsershots.org'
|
||
url: 'http://browsershots.org'
|
||
|
||
- regex: 'BUbiNG'
|
||
name: 'BUbiNG'
|
||
category: 'Crawler'
|
||
url: 'http://law.di.unimi.it/BUbiNG.html'
|
||
producer:
|
||
name: 'The Laboratory for Web Algorithmics (LAW)'
|
||
url: 'http://law.di.unimi.it/software.php#buging'
|
||
|
||
- regex: '(?<!HTC)[ _]Butterfly/'
|
||
name: 'Butterfly Robot'
|
||
category: 'Search bot'
|
||
url: 'http://labs.topsy.com/butterfly'
|
||
producer:
|
||
name: 'Topsy Labs'
|
||
url: 'http://labs.topsy.com'
|
||
|
||
- regex: 'CareerBot'
|
||
name: 'CareerBot'
|
||
category: 'Crawler'
|
||
url: 'http://www.career-x.de/bot.html'
|
||
producer:
|
||
name: 'career-x GmbH'
|
||
url: 'http://www.career-x.de'
|
||
|
||
- regex: 'CCBot'
|
||
name: 'ccBot crawler'
|
||
category: 'Crawler'
|
||
url: 'http://commoncrawl.org/faq/'
|
||
producer:
|
||
name: 'reddit inc.'
|
||
url: 'http://www.reddit.com'
|
||
|
||
- regex: 'Cliqzbot'
|
||
name: 'Cliqzbot'
|
||
category: 'Crawler'
|
||
url: 'http://cliqz.com/company/cliqzbot'
|
||
producer:
|
||
name: '10betterpages GmbH'
|
||
url: 'http://cliqz.com'
|
||
|
||
- regex: 'Cloudflare-AMP'
|
||
name: 'CloudFlare AMP Fetcher'
|
||
category: 'Crawler'
|
||
url: 'https://amp.cloudflare.com/doc/fetcher.html'
|
||
producer:
|
||
name: 'CloudFlare'
|
||
url: 'http://www.cloudflare.com'
|
||
|
||
- regex: 'Cloudflare-?Diagnostics'
|
||
name: 'Cloudflare Diagnostics'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.cloudflare.com/'
|
||
producer:
|
||
name: 'Cloudflare'
|
||
url: 'https://www.cloudflare.com/'
|
||
|
||
- regex: 'CloudFlare-AlwaysOnline'
|
||
name: 'CloudFlare Always Online'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.cloudflare.com/always-online'
|
||
producer:
|
||
name: 'CloudFlare'
|
||
url: 'https://www.cloudflare.com/'
|
||
|
||
- regex: 'Cloudflare-SSLDetector'
|
||
name: 'Cloudflare SSL Detector'
|
||
category: 'Site Monitor'
|
||
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
||
producer:
|
||
name: 'CloudFlare'
|
||
url: 'https://www.cloudflare.com/'
|
||
|
||
- regex: 'Cloudflare Custom Hostname Verification'
|
||
name: 'Cloudflare Custom Hostname Verification'
|
||
category: 'Service Agent'
|
||
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
||
producer:
|
||
name: 'CloudFlare'
|
||
url: 'https://www.cloudflare.com/'
|
||
|
||
- regex: 'Cloudflare-Traffic-Manager'
|
||
name: 'Cloudflare Traffic Manager'
|
||
category: 'Site Monitor'
|
||
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
||
producer:
|
||
name: 'CloudFlare'
|
||
url: 'https://www.cloudflare.com/'
|
||
|
||
- regex: 'https://developers\.cloudflare\.com/security-center/'
|
||
name: 'Cloudflare Security Insights'
|
||
category: 'Site Monitor'
|
||
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
||
producer:
|
||
name: 'CloudFlare'
|
||
url: 'https://www.cloudflare.com/'
|
||
|
||
- regex: 'coccoc\.com'
|
||
name: 'Cốc Cốc Bot'
|
||
url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
|
||
category: 'Search bot'
|
||
producer:
|
||
name: 'Cốc Cốc'
|
||
url: 'https://coccoc.com/'
|
||
|
||
- regex: 'collectd'
|
||
name: 'Collectd'
|
||
url: 'https://collectd.org/'
|
||
category: 'Site Monitor'
|
||
producer:
|
||
name: 'Collectd'
|
||
url: 'https://collectd.org/'
|
||
|
||
- regex: 'CommaFeed'
|
||
name: 'CommaFeed'
|
||
url: 'http://www.commafeed.com'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'CSS Certificate Spider'
|
||
name: 'CSS Certificate Spider'
|
||
category: 'Crawler'
|
||
url: 'http://www.css-security.com/certificatespider/'
|
||
producer:
|
||
name: 'Certified Security Solutions'
|
||
url: 'https://www.css-security.com/company/about-us/'
|
||
|
||
- regex: 'Datadog Agent|Datadog/?Synthetics'
|
||
name: 'Datadog Agent'
|
||
url: 'https://github.com/DataDog/dd-agent'
|
||
category: 'Site Monitor'
|
||
producer:
|
||
name: 'Datadog'
|
||
url: 'https://www.datadoghq.com/'
|
||
|
||
- regex: 'Datanyze'
|
||
name: 'Datanyze'
|
||
url: ''
|
||
category: 'Crawler'
|
||
producer:
|
||
name: 'Datanyze'
|
||
url: 'https://www.datanyze.com'
|
||
|
||
- regex: 'Dataprovider'
|
||
name: 'Dataprovider'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'Dataprovider B.V.'
|
||
url: 'https://www.dataprovider.com/'
|
||
|
||
- regex: 'Daum(?!(?:Apps|Device))'
|
||
name: 'Daum'
|
||
category: 'Search bot'
|
||
url: 'http://tab.search.daum.net/aboutWebSearch_en.html'
|
||
producer:
|
||
name: 'Daum Communications Corp.'
|
||
url: 'http://www.kakaocorp.com/main'
|
||
|
||
- regex: 'Dazoobot'
|
||
name: 'Dazoobot'
|
||
category: 'Search bot'
|
||
url: ''
|
||
producer:
|
||
name: 'DAZOO.FR'
|
||
url: 'http://dazoo.fr'
|
||
|
||
- regex: 'discobot'
|
||
name: 'Discobot'
|
||
category: 'Search bot'
|
||
url: 'http://discoveryengine.com/discobot.html'
|
||
producer:
|
||
name: 'Discovery Engine'
|
||
url: 'http://discoveryengine.com'
|
||
|
||
- regex: 'Domain Re-Animator Bot|support@domainreanimator\.com'
|
||
name: 'Domain Re-Animator Bot'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'Domain Re-Animator, LLC'
|
||
url: 'http://domainreanimator.com'
|
||
|
||
- regex: 'DotBot'
|
||
name: 'DotBot'
|
||
category: 'Crawler'
|
||
url: 'http://www.opensiteexplorer.org/dotbot'
|
||
producer:
|
||
name: 'SEOmoz, Inc.'
|
||
url: 'http://moz.com/'
|
||
|
||
- regex: 'DuckDuck(?:Go-Favicons-)?Bot'
|
||
name: 'DuckDuckGo Bot'
|
||
category: 'Search bot'
|
||
url: 'https://duckduckgo.com/duckduckbot'
|
||
producer:
|
||
name: 'DuckDuckGo'
|
||
url: 'https://duckduckgo.com/'
|
||
|
||
- regex: 'EasouSpider'
|
||
name: 'Easou Spider'
|
||
category: 'Search bot'
|
||
url: 'http://www.easou.com/search/spider.html'
|
||
producer:
|
||
name: 'easou ICP'
|
||
url: 'http://www.easou.com'
|
||
|
||
- regex: 'eCairn-Grabber'
|
||
name: 'eCairn-Grabber'
|
||
category: 'Crawler'
|
||
producer:
|
||
name: 'eCairn'
|
||
url: 'https://ecairn.com'
|
||
|
||
- regex: 'EMail Exractor'
|
||
name: 'EMail Exractor'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'evc-batch'
|
||
name: 'evc-batch'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'eVenture Capital Partners II, LLC'
|
||
url: 'http://www.eventures.vc/'
|
||
|
||
- regex: 'Exabot|ExaleadCloudview'
|
||
name: 'ExaBot'
|
||
category: 'Crawler'
|
||
url: 'http://www.exabot.com/go/robot'
|
||
producer:
|
||
name: 'Dassault Systèmes'
|
||
url: 'http://www.3ds.com'
|
||
|
||
- regex: 'ExactSeek Crawler'
|
||
name: 'ExactSeek Crawler'
|
||
category: 'Search bot'
|
||
url: 'http://www.exactseek.com'
|
||
producer:
|
||
name: 'Jayde Online, Inc.'
|
||
url: 'http://www.jaydeonlineinc.com'
|
||
|
||
- regex: 'Ezooms'
|
||
name: 'Ezooms'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'SEOmoz, Inc.'
|
||
url: 'http://moz.com/'
|
||
|
||
- regex: 'facebook(?:catalog|externalhit|externalua|platform|scraper)'
|
||
name: 'Facebook Crawler'
|
||
category: 'Social Media Agent'
|
||
url: 'https://developers.facebook.com/docs/sharing/webmasters/crawler/'
|
||
producer:
|
||
name: 'Meta Platforms, Inc.'
|
||
url: 'https://www.meta.com/'
|
||
|
||
- regex: 'FacebookBot/[\d.]+'
|
||
name: 'FacebookBot'
|
||
category: 'Crawler'
|
||
url: 'https://developers.facebook.com/docs/sharing/bot'
|
||
producer:
|
||
name: 'Meta Platforms, Inc.'
|
||
url: 'https://www.meta.com/'
|
||
|
||
- regex: 'Feedbin'
|
||
name: 'Feedbin'
|
||
url: 'http://feedbin.com/'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'FeedBurner'
|
||
name: 'FeedBurner'
|
||
url: 'http://www.feedburner.com'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'Feed Wrangler'
|
||
name: 'Feed Wrangler'
|
||
url: 'https://feedwrangler.net/'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: 'David Smith & Developing Perspective, LLC'
|
||
url: 'https://david-smith.org'
|
||
|
||
- regex: 'Feedly'
|
||
name: 'Feedly'
|
||
url: 'http://www.feedly.com'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'Feedspot'
|
||
name: 'Feedspot'
|
||
url: 'http://www.feedspot.com'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'Fever/[0-9]'
|
||
name: 'Fever'
|
||
url: 'http://feedafever.com/'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'FlipboardProxy|FlipboardRSS'
|
||
name: 'Flipboard'
|
||
url: 'http://flipboard.com/browserproxy'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: 'Flipboard'
|
||
url: 'http://flipboard.com/'
|
||
|
||
- regex: 'Findxbot'
|
||
name: 'Findxbot'
|
||
category: 'Crawler'
|
||
url: 'http://www.findxbot.com'
|
||
|
||
- regex: 'FreshRSS'
|
||
name: 'FreshRSS'
|
||
category: 'Feed Fetcher'
|
||
url: 'https://freshrss.org/'
|
||
|
||
- regex: 'Genieo'
|
||
name: 'Genieo Web filter'
|
||
category: ''
|
||
url: 'http://www.genieo.com/webfilter.html'
|
||
producer:
|
||
name: 'Genieo'
|
||
url: 'http://www.genieo.com'
|
||
|
||
- regex: 'GigablastOpenSource'
|
||
name: 'Gigablast'
|
||
category: 'Search bot'
|
||
url: 'https://github.com/gigablast/open-source-search-engine'
|
||
producer:
|
||
name: 'Matt Wells'
|
||
url: 'http://www.gigablast.com/faq.html'
|
||
|
||
- regex: 'Gluten Free Crawler'
|
||
name: 'Gluten Free Crawler'
|
||
category: 'Crawler'
|
||
url: 'http://glutenfreepleasure.com/'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'gobuster'
|
||
name: 'Gobuster'
|
||
url: 'https://github.com/OJ/gobuster'
|
||
|
||
- regex: 'ichiro/mobile goo'
|
||
name: 'Goo'
|
||
category: 'Search bot'
|
||
url: 'http://search.goo.ne.jp/option/use/sub4/sub4-1'
|
||
producer:
|
||
name: 'NTT Resonant'
|
||
url: 'http://goo.ne.jp'
|
||
|
||
- regex: 'Storebot-Google'
|
||
name: 'Google StoreBot'
|
||
category: 'Crawler'
|
||
|
||
- regex: 'Google Favicon'
|
||
name: 'Google Favicon'
|
||
category: 'Crawler'
|
||
|
||
- regex: 'Google Search Console'
|
||
name: 'Google Search Console'
|
||
category: 'Crawler'
|
||
url: 'https://search.google.com/search-console/about'
|
||
producer:
|
||
name: 'Google Inc.'
|
||
url: 'https://www.google.com/'
|
||
|
||
- regex: 'Google Page Speed Insights'
|
||
name: 'Google PageSpeed Insights'
|
||
category: 'Site Monitor'
|
||
url: 'http://developers.google.com/speed/pagespeed/insights/'
|
||
producer:
|
||
name: 'Google Inc.'
|
||
url: 'https://www.google.com/'
|
||
|
||
- regex: 'google_partner_monitoring'
|
||
name: 'Google Partner Monitoring'
|
||
category: 'Site Monitor'
|
||
url: ''
|
||
producer:
|
||
name: 'Google Inc.'
|
||
url: 'https://www.google.com/'
|
||
|
||
- regex: 'Google-Cloud-Scheduler'
|
||
name: 'Google Cloud Scheduler'
|
||
category: 'Crawler'
|
||
url: 'https://cloud.google.com/scheduler'
|
||
producer:
|
||
name: 'Google Inc.'
|
||
url: 'https://www.google.com'
|
||
|
||
- regex: 'Google-Structured-Data-Testing-Tool'
|
||
name: 'Google Structured Data Testing Tool'
|
||
category: 'Validator'
|
||
url: 'https://search.google.com/structured-data/testing-tool'
|
||
producer:
|
||
name: 'Google Inc.'
|
||
url: 'https://www.google.com/'
|
||
|
||
- regex: 'GoogleStackdriverMonitoring'
|
||
name: 'Google Stackdriver Monitoring'
|
||
category: 'Site Monitor'
|
||
url: 'https://cloud.google.com/monitoring'
|
||
producer:
|
||
name: 'Google Inc.'
|
||
url: 'https://www.google.com'
|
||
|
||
- regex: 'Google-Transparency-Report'
|
||
name: 'Google Transparency Report'
|
||
category: 'Site Monitor'
|
||
url: 'https://transparencyreport.google.com/'
|
||
producer:
|
||
name: 'Google Inc.'
|
||
url: 'https://www.google.com/'
|
||
|
||
- regex: 'via ggpht\.com GoogleImageProxy'
|
||
name: 'Gmail Image Proxy'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'Google Inc.'
|
||
url: 'https://www.google.com/'
|
||
|
||
- regex: 'SeznamEmailProxy'
|
||
name: 'Seznam Email Proxy'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'Seznam.cz, a.s.'
|
||
url: 'http://www.seznam.cz/'
|
||
|
||
- regex: 'Seznam-Zbozi-robot'
|
||
name: 'Seznam Zbozi.cz'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'Seznam.cz, a.s.'
|
||
url: 'https://www.zbozi.cz/'
|
||
|
||
- regex: 'Heurekabot-Feed'
|
||
name: 'Heureka Feed'
|
||
category: 'Crawler'
|
||
url: 'https://sluzby.heureka.cz/napoveda/heurekabot/'
|
||
producer:
|
||
name: 'Heureka.cz, a.s.'
|
||
url: 'https://www.heureka.cz/'
|
||
|
||
- regex: 'ShopAlike'
|
||
name: 'ShopAlike'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'Visual Meta'
|
||
url: 'https://www.shopalike.cz/'
|
||
|
||
- regex: 'Googlebot-News'
|
||
name: 'Googlebot News'
|
||
category: 'Search bot'
|
||
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
|
||
producer:
|
||
name: 'Google Inc.'
|
||
url: 'https://www.google.com/'
|
||
|
||
- regex: 'Adwords-(?:DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(?:adstxt|Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Safety|Shopping-Quality|Site-Verification|Sites-Thumbnails|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer|Sites)|Google.*/\+/web/snippet'
|
||
name: 'Googlebot'
|
||
category: 'Search bot'
|
||
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
|
||
producer:
|
||
name: 'Google Inc.'
|
||
url: 'https://www.google.com/'
|
||
|
||
- regex: '^Google$'
|
||
name: 'Googlebot'
|
||
category: 'Search bot'
|
||
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
|
||
producer:
|
||
name: 'Google Inc.'
|
||
url: 'https://www.google.com/'
|
||
|
||
- regex: 'Google-Area120-PrivacyPolicyFetcher'
|
||
name: 'Google Area 120 Privacy Policy Fetcher'
|
||
category: 'Crawler'
|
||
url: 'https://area120.google.com/'
|
||
producer:
|
||
name: 'Google Inc.'
|
||
url: 'https://www.google.com/'
|
||
|
||
- regex: 'heritrix'
|
||
name: 'Heritrix'
|
||
category: 'Crawler'
|
||
url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
|
||
producer:
|
||
name: 'The Internet Archive'
|
||
url: 'https://archive.org'
|
||
|
||
- regex: 'HubSpot '
|
||
name: 'HubSpot'
|
||
category: 'Crawler'
|
||
producer:
|
||
name: 'HubSpot Inc.'
|
||
url: 'https://www.hubspot.com'
|
||
|
||
- regex: 'vuhuvBot'
|
||
name: 'Vuhuv Bot'
|
||
category: 'Crawler'
|
||
url: 'http://vuhuv.com/bot.html'
|
||
|
||
- regex: 'HTTPMon/[\d.]+'
|
||
name: 'HTTPMon'
|
||
category: 'Site Monitor'
|
||
url: 'http://www.httpmon.com'
|
||
producer:
|
||
name: 'towards GmbH'
|
||
url: 'http://www.towards.ch/'
|
||
|
||
- regex: 'ICC-Crawler'
|
||
name: 'ICC-Crawler'
|
||
category: 'Crawler'
|
||
url: 'http://www.nict.go.jp/en/univ-com/plan/crawl.html'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'inoreader\.com'
|
||
name: 'inoreader'
|
||
category: 'Feed Reader'
|
||
url: 'https://www.inoreader.com'
|
||
|
||
- regex: 'iisbot'
|
||
name: 'IIS Site Analysis'
|
||
category: 'Crawler'
|
||
url: 'http://www.iis.net/iisbot.html'
|
||
producer:
|
||
name: 'Microsoft Corporation'
|
||
url: 'http://www.microsoft.com'
|
||
|
||
- regex: 'ips-agent'
|
||
name: 'IPS Agent'
|
||
category: 'Crawler'
|
||
producer:
|
||
name: 'VeriSign, Inc'
|
||
url: 'http://www.verisign.com/'
|
||
|
||
- regex: 'IP-Guide\.com'
|
||
name: 'IP-Guide Crawler'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: ''
|
||
url: 'https://ip-guide.com'
|
||
|
||
- regex: 'k6/[0-9\.]+'
|
||
name: 'K6'
|
||
url: 'https://k6.io/'
|
||
|
||
- regex: 'kouio'
|
||
name: 'Kouio'
|
||
url: 'http://kouio.com/'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'larbin'
|
||
name: 'Larbin web crawler'
|
||
category: 'Crawler'
|
||
url: 'http://larbin.sourceforge.net'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: '[A-z0-9]*-Lighthouse'
|
||
name: 'Lighthouse'
|
||
category: 'Site Monitor'
|
||
url: 'https://developers.google.com/web/tools/lighthouse'
|
||
producer:
|
||
name: 'Lighthouse'
|
||
url: 'https://developers.google.com/web/tools/lighthouse'
|
||
|
||
- regex: 'last-modified\.com'
|
||
name: 'LastMod Bot'
|
||
category: 'Site Monitor'
|
||
url: 'https://last-modified.com/en/about'
|
||
producer:
|
||
name: ''
|
||
url: 'https://last-modified.com/en'
|
||
|
||
- regex: 'linkdexbot|linkdex\.com'
|
||
name: 'Linkdex Bot'
|
||
category: 'Search bot'
|
||
url: 'http://www.linkdex.com/bots'
|
||
producer:
|
||
name: 'Mojeek Ltd.'
|
||
url: 'http://www.mojeek.com'
|
||
|
||
- regex: 'LinkedInBot'
|
||
name: 'LinkedIn Bot'
|
||
category: 'Social Media Agent'
|
||
url: 'http://www.linkedin.com'
|
||
producer:
|
||
name: 'LinkedIn'
|
||
url: 'http://www.linkedin.com'
|
||
|
||
- regex: 'ltx71'
|
||
name: 'LTX71'
|
||
category: 'Security Checker'
|
||
url: 'https://ltx71.com/'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'Mail\.RU'
|
||
name: 'Mail.Ru Bot'
|
||
category: 'Search bot'
|
||
url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
|
||
producer:
|
||
name: 'Mail.Ru Group'
|
||
url: 'http://corp.mail.ru'
|
||
|
||
- regex: 'magpie-crawler'
|
||
name: 'Magpie-Crawler'
|
||
category: 'Social Media Agent'
|
||
url: 'http://www.brandwatch.com/magpie-crawler/'
|
||
producer:
|
||
name: 'Brandwatch'
|
||
url: 'http://www.brandwatch.com'
|
||
|
||
- regex: 'MagpieRSS'
|
||
name: 'MagpieRSS'
|
||
url: 'http://magpierss.sourceforge.net/'
|
||
category: 'Feed Parser'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'masscan-ng/[\d.]+'
|
||
name: 'masscan-ng'
|
||
url: 'https://github.com/bi-zone/masscan-ng'
|
||
category: 'Crawler'
|
||
producer:
|
||
name: 'BIZON, OOO'
|
||
url: 'https://bi.zone/'
|
||
|
||
- regex: '.*masscan'
|
||
name: 'masscan'
|
||
url: 'https://github.com/robertdavidgraham/masscan'
|
||
category: 'Crawler'
|
||
producer:
|
||
name: 'Robert Graham'
|
||
url: 'https://github.com/robertdavidgraham'
|
||
|
||
- regex: 'Mastodon/'
|
||
name: 'Mastodon Bot'
|
||
category: 'Social Media Agent'
|
||
|
||
- regex: 'meanpathbot'
|
||
name: 'Meanpath Bot'
|
||
category: 'Search bot'
|
||
url: 'http://www.meanpath.com/meanpathbot.html'
|
||
producer:
|
||
name: 'Meanpath'
|
||
url: 'http://www.meanpath.com'
|
||
|
||
- regex: 'MetaJobBot'
|
||
name: 'MetaJobBot'
|
||
category: 'Crawler'
|
||
url: 'http://www.metajob.at/the/crawler'
|
||
producer:
|
||
name: 'MetaJob'
|
||
url: 'http://www.metajob.at'
|
||
|
||
- regex: 'MetaInspector'
|
||
name: 'MetaInspector'
|
||
category: 'Crawler'
|
||
url: 'https://github.com/jaimeiniesta/metainspector'
|
||
|
||
- regex: 'MixrankBot'
|
||
name: 'Mixrank Bot'
|
||
category: 'Crawler'
|
||
url: 'http://mixrank.com'
|
||
producer:
|
||
name: 'Online Media Group, Inc.'
|
||
url: ''
|
||
|
||
- regex: 'MJ12bot'
|
||
name: 'MJ12 Bot'
|
||
category: 'Search bot'
|
||
url: 'http://majestic12.co.uk/bot.php'
|
||
producer:
|
||
name: 'Majestic-12'
|
||
url: 'http://majestic12.co.uk'
|
||
|
||
- regex: 'Mnogosearch'
|
||
name: 'Mnogosearch'
|
||
category: 'Search bot'
|
||
url: 'http://www.mnogosearch.org/'
|
||
producer:
|
||
name: 'Lavtech.Com Corp.'
|
||
url: ''
|
||
- regex: 'MojeekBot'
|
||
name: 'MojeekBot'
|
||
category: 'Search bot'
|
||
url: 'http://www.mojeek.com/bot.html'
|
||
producer:
|
||
name: 'Mojeek Ltd.'
|
||
url: 'http://www.mojeek.com'
|
||
|
||
- regex: 'munin'
|
||
name: 'Munin'
|
||
category: 'Site Monitor'
|
||
url: 'http://munin-monitoring.org/'
|
||
producer:
|
||
name: 'Munin'
|
||
url: 'http://munin-monitoring.org/'
|
||
|
||
- regex: 'NalezenCzBot'
|
||
name: 'NalezenCzBot'
|
||
category: 'Crawler'
|
||
url: 'http://www.nalezen.cz/about-crawler'
|
||
producer:
|
||
name: 'Jaroslav Kuboš'
|
||
url: ''
|
||
|
||
- regex: 'check_http/v'
|
||
name: 'Nagios check_http'
|
||
category: 'Site Monitor'
|
||
url: 'https://nagios.org'
|
||
producer:
|
||
name: 'Nagios Plugins Development Team'
|
||
url: 'https://nagios.org'
|
||
|
||
- regex: 'nbertaupete95\(at\)gmail\.com'
|
||
name: 'nbertaupete95'
|
||
category: 'Crawler'
|
||
|
||
- regex: 'Netcraft(?: Web Server Survey| SSL Server Survey|SurveyAgent)'
|
||
name: 'Netcraft Survey Bot'
|
||
category: 'Search bot'
|
||
url: ''
|
||
producer:
|
||
name: 'Netcraft'
|
||
url: 'http://www.netcraft.com'
|
||
|
||
- regex: 'netEstate NE Crawler'
|
||
name: 'netEstate'
|
||
category: 'Crawler'
|
||
url: 'http://www.website-datenbank.de/Impressum'
|
||
producer:
|
||
name: 'netEstate GmbH'
|
||
url: 'https://www.netestate.de/en/'
|
||
|
||
- regex: 'Netvibes'
|
||
name: 'Netvibes'
|
||
url: 'http://www.netvibes.com/'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'NewsBlur .*(?:Fetcher|Finder)'
|
||
name: 'NewsBlur'
|
||
url: 'http://www.newsblur.com'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'NewsGatorOnline'
|
||
name: 'NewsGator'
|
||
url: 'http://www.newsgator.com'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'nlcrawler'
|
||
name: 'NLCrawler'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'Northern Light'
|
||
url: 'http://northernlight.com'
|
||
|
||
- regex: 'Nmap Scripting Engine'
|
||
name: 'Nmap'
|
||
category: 'Security Checker'
|
||
url: 'https://nmap.org/book/nse.html'
|
||
producer:
|
||
name: 'Nmap'
|
||
url: 'https://nmap.org/'
|
||
|
||
- regex: 'Nuzzel'
|
||
name: 'Nuzzel'
|
||
category: 'Crawler'
|
||
producer:
|
||
name: 'Nuzzel'
|
||
url: 'https://www.nuzzel.com/'
|
||
|
||
- regex: 'Octopus [0-9]'
|
||
name: 'Octopus'
|
||
|
||
- regex: 'OnlineOrNot\.com_bot'
|
||
name: 'OnlineOrNot Bot'
|
||
category: 'Site Monitor'
|
||
url: 'https://onlineornot.com/website-monitoring'
|
||
producer:
|
||
name: 'OnlineOrNot'
|
||
url: 'https://onlineornot.com'
|
||
|
||
- regex: 'omgili'
|
||
name: 'Omgili bot'
|
||
category: 'Search bot'
|
||
url: 'http://www.omgili.com/Crawler.html'
|
||
producer:
|
||
name: 'Omgili'
|
||
url: 'http://www.omgili.com'
|
||
|
||
- regex: 'OpenindexSpider'
|
||
name: 'Openindex Spider'
|
||
category: 'Search bot'
|
||
url: 'http://www.openindex.io/en/webmasters/spider.html'
|
||
producer:
|
||
name: 'Openindex B.V.'
|
||
url: 'http://www.openindex.io'
|
||
|
||
- regex: 'spbot'
|
||
name: 'OpenLinkProfiler'
|
||
category: 'Crawler'
|
||
url: 'http://openlinkprofiler.org/bot'
|
||
producer:
|
||
name: 'Axandra GmbH'
|
||
url: 'http://www.axandra.com'
|
||
|
||
- regex: 'OpenWebSpider'
|
||
name: 'OpenWebSpider'
|
||
category: 'Crawler'
|
||
url: 'http://www.openwebspider.org'
|
||
producer:
|
||
name: 'OpenWebSpider Lab'
|
||
url: 'http://lab.openwebspider.org'
|
||
|
||
- regex: 'OrangeBot|VoilaBot'
|
||
name: 'Orange Bot'
|
||
category: 'Search bot'
|
||
url: 'http://lemoteur.orange.fr'
|
||
producer:
|
||
name: 'Orange'
|
||
url: 'http://www.orange.fr'
|
||
|
||
- regex: 'PaperLiBot'
|
||
name: 'PaperLiBot'
|
||
category: 'Search bot'
|
||
url: 'http://support.paper.li/entries/20023257-what-is-paper-li'
|
||
producer:
|
||
name: 'Smallrivers SA'
|
||
url: 'http://www.paper.li'
|
||
|
||
- regex: 'phantomas/'
|
||
name: 'Phantomas'
|
||
category: 'Site Monitor'
|
||
url: 'https://github.com/macbre/phantomas'
|
||
|
||
- regex: 'phpservermon'
|
||
name: 'PHP Server Monitor'
|
||
category: 'Site Monitor'
|
||
url: 'https://github.com/phpservermon/phpservermon'
|
||
producer:
|
||
name: 'PHP Server Monitor'
|
||
url: 'http://www.phpservermonitor.org/'
|
||
|
||
- regex: 'Pocket(?:ImageCache|Parser)/[\d.]+'
|
||
name: 'Pocket'
|
||
category: 'Read-it-later Service'
|
||
url: 'https://getpocket.com/pocketparser_ua'
|
||
producer:
|
||
name: 'Read It Later, Inc.'
|
||
url: 'https://getpocket.com/'
|
||
|
||
- regex: 'PritTorrent'
|
||
name: 'PritTorrent'
|
||
category: 'Crawler'
|
||
url: 'https://github.com/astro/prittorrent'
|
||
producer:
|
||
name: 'Bitlove'
|
||
url: 'http://bitlove.org/'
|
||
|
||
- regex: 'PRTG Network Monitor'
|
||
name: 'PRTG Network Monitor'
|
||
category: 'Network Monitor'
|
||
url: 'https://www.paessler.com/prtg'
|
||
producer:
|
||
name: 'Paessler AG'
|
||
url: 'https://www.paessler.com'
|
||
|
||
- regex: 'psbot'
|
||
name: 'Picsearch bot'
|
||
category: 'Search bot'
|
||
url: 'http://www.picsearch.com/bot.html'
|
||
producer:
|
||
name: 'Picsearch'
|
||
url: 'http://www.picsearch.com'
|
||
|
||
- regex: 'Pingdom(?:\.com|TMS)'
|
||
name: 'Pingdom Bot'
|
||
category: 'Site Monitor'
|
||
url: ''
|
||
producer:
|
||
name: 'Pingdom AB'
|
||
url: 'https://www.pingdom.com'
|
||
|
||
- regex: 'Quora Link Preview'
|
||
name: 'Quora Link Preview'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'Quora'
|
||
url: 'http://www.quora.com'
|
||
|
||
- regex: 'Quora-Bot'
|
||
name: 'Quora Bot'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'Quora'
|
||
url: 'https://www.quora.com/'
|
||
|
||
- regex: 'RamblerMail'
|
||
name: 'RamblerMail Image Proxy'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'Rambler&Co'
|
||
url: 'https://rambler-co.ru/'
|
||
|
||
- regex: 'QuerySeekerSpider'
|
||
name: 'QuerySeekerSpider'
|
||
category: 'Crawler'
|
||
url: 'http://queryseeker.com/bot.html'
|
||
producer:
|
||
name: 'QueryEye Inc.'
|
||
url: 'http://queryeye.com'
|
||
|
||
- regex: 'Qwantify'
|
||
name: 'Qwantify'
|
||
category: 'Crawler'
|
||
url: 'https://www.qwant.com/'
|
||
producer:
|
||
name: 'Qwant Corporation'
|
||
url: 'https://www.qwant.com/'
|
||
|
||
- regex: 'Rainmeter'
|
||
name: 'Rainmeter'
|
||
category: 'Crawler'
|
||
url: 'https://www.rainmeter.net'
|
||
|
||
- regex: 'redditbot'
|
||
name: 'Reddit Bot'
|
||
category: 'Social Media Agent'
|
||
url: 'http://www.reddit.com/feedback'
|
||
producer:
|
||
name: 'reddit inc.'
|
||
url: 'http://www.reddit.com'
|
||
|
||
- regex: 'Riddler'
|
||
name: 'Riddler'
|
||
category: 'Security search bot'
|
||
url: 'https://riddler.io/about'
|
||
producer:
|
||
name: 'F-Secure'
|
||
url: 'https://www.f-secure.com'
|
||
|
||
- regex: 'rogerbot'
|
||
name: 'Rogerbot'
|
||
category: 'Crawler'
|
||
url: 'http://moz.com/help/pro/what-is-rogerbot-'
|
||
producer:
|
||
name: 'SEOmoz, Inc.'
|
||
url: 'http://moz.com/'
|
||
|
||
- regex: 'ROI Hunter'
|
||
name: 'ROI Hunter'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'Roihunter a.s.'
|
||
url: 'http://roihunter.com/'
|
||
|
||
- regex: 'SafeDNSBot'
|
||
name: 'SafeDNSBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.safedns.com/searchbot'
|
||
producer:
|
||
name: 'SafeDNS, Inc.'
|
||
url: 'https://www.safedns.com/'
|
||
|
||
- regex: 'Scrapy'
|
||
name: 'Scrapy'
|
||
category: 'Crawler'
|
||
url: 'http://scrapy.org'
|
||
|
||
- regex: 'Screaming Frog SEO Spider'
|
||
name: 'Screaming Frog SEO Spider'
|
||
category: 'Crawler'
|
||
url: 'http://www.screamingfrog.co.uk/seo-spider'
|
||
producer:
|
||
name: 'Screaming Frog Ltd'
|
||
url: 'http://www.screamingfrog.co.uk'
|
||
|
||
- regex: 'ScreenerBot'
|
||
name: 'ScreenerBot'
|
||
category: 'Crawler'
|
||
url: 'http://www.screenerbot.com'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'SemrushBot'
|
||
name: 'SemrushBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.semrush.com/bot/'
|
||
producer:
|
||
name: 'Semrush Inc.'
|
||
url: 'https://www.semrush.com/'
|
||
|
||
- regex: 'SerpReputationManagementAgent/[\d.]+'
|
||
name: 'Semrush Reputation Management'
|
||
category: 'Service Agent'
|
||
url: 'https://www.semrush.com/bot/'
|
||
producer:
|
||
name: 'Semrush Inc.'
|
||
url: 'https://www.semrush.com/'
|
||
|
||
- regex: 'SplitSignalBot'
|
||
name: 'SplitSignalBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.semrush.com/bot/'
|
||
producer:
|
||
name: 'Semrush Inc.'
|
||
url: 'https://www.semrush.com/'
|
||
|
||
- regex: 'SiteAuditBot/[\d.]+'
|
||
name: 'SiteAuditBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.semrush.com/bot/'
|
||
producer:
|
||
name: 'Semrush Inc.'
|
||
url: 'https://www.semrush.com/'
|
||
|
||
- regex: 'SensikaBot'
|
||
name: 'Sensika Bot'
|
||
category: ''
|
||
url: ''
|
||
producer:
|
||
name: 'Sensika'
|
||
url: 'http://sensika.com'
|
||
|
||
- regex: 'SEOENG(?:World)?Bot'
|
||
name: 'SEOENGBot'
|
||
category: 'Crawler'
|
||
url: 'http://www.seoengine.com/seoengbot.htm'
|
||
producer:
|
||
name: 'SEO Engine'
|
||
url: 'http://www.seoengine.com'
|
||
|
||
- regex: 'SEOkicks-Robot'
|
||
name: 'SEOkicks-Robot'
|
||
category: 'Crawler'
|
||
url: 'http://www.seokicks.de/robot.html'
|
||
producer:
|
||
name: 'SEOkicks'
|
||
url: 'https://www.seokicks.de/'
|
||
|
||
- regex: 'seoscanners\.net'
|
||
name: 'Seoscanners.net'
|
||
category: 'Crawler'
|
||
url: ''
|
||
|
||
- regex: 'SkypeUriPreview'
|
||
name: 'Skype URI Preview'
|
||
category: 'Service Agent'
|
||
url: ''
|
||
producer:
|
||
name: 'Skype Communications S.à.r.l.'
|
||
url: 'https://www.skype.com'
|
||
|
||
- regex: 'SeznamBot|SklikBot|Seznam screenshot-generator'
|
||
name: 'Seznam Bot'
|
||
category: 'Search bot'
|
||
url: 'http://www.mapy.cz/cz/seznambot.html'
|
||
producer:
|
||
name: 'Seznam.cz, a.s.'
|
||
url: 'http://www.seznam.cz/'
|
||
|
||
- regex: 'shopify-partner-homepage-scraper'
|
||
name: 'Shopify Partner'
|
||
category: 'Crawler'
|
||
url: 'https://www.shopify.com/partners'
|
||
producer:
|
||
name: 'Shopify'
|
||
url: 'https://www.shopify.com/'
|
||
|
||
- regex: 'ShopWiki'
|
||
name: 'ShopWiki'
|
||
category: 'Search tools'
|
||
url: 'http://www.shopwiki.com/wiki/Help:Bot'
|
||
producer:
|
||
name: 'ShopWiki Corp.'
|
||
url: 'http://www.shopwiki.com'
|
||
|
||
- regex: 'SilverReader'
|
||
name: 'SilverReader'
|
||
url: 'http://silverreader.com'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'SimplePie'
|
||
name: 'SimplePie'
|
||
url: 'http://www.simplepie.org'
|
||
category: 'Feed Parser'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'SISTRIX Crawler'
|
||
name: 'SISTRIX Crawler'
|
||
category: 'Crawler'
|
||
url: 'http://crawler.sistrix.net'
|
||
producer:
|
||
name: 'SISTRIX GmbH'
|
||
url: 'http://www.sistrix.de'
|
||
|
||
- regex: 'compatible; (?:SISTRIX )?Optimizer'
|
||
name: 'SISTRIX Optimizer'
|
||
category: 'Crawler'
|
||
url: 'https://optimizer.sistrix.com'
|
||
producer:
|
||
name: 'SISTRIX GmbH'
|
||
url: 'http://www.sistrix.de'
|
||
|
||
- regex: 'SiteSucker'
|
||
name: 'SiteSucker'
|
||
category: 'Crawler'
|
||
url: 'http://ricks-apps.com/osx/sitesucker/'
|
||
|
||
- regex: 'sixy\.ch'
|
||
name: 'Sixy.ch'
|
||
category: 'Site Monitor'
|
||
url: 'http://sixy.ch'
|
||
producer:
|
||
name: 'Manuel Kasper'
|
||
url: 'https://neon1.net/'
|
||
|
||
- regex: 'Slackbot|Slack-ImgProxy'
|
||
name: 'Slackbot'
|
||
category: 'Crawler'
|
||
url: 'https://api.slack.com/robots'
|
||
producer:
|
||
name: 'Slack Technologies'
|
||
url: 'http://slack.com'
|
||
|
||
- regex: 'Sogou[ -](?:head|inst|Orion|Pic|Test|web)[ -]spider|New-Sogou-Spider'
|
||
name: 'Sogou Spider'
|
||
category: 'Search bot'
|
||
url: 'http://www.sogou.com/docs/help/webmasters.htm'
|
||
producer:
|
||
name: 'Sohu, Inc.'
|
||
url: 'http://www.sogou.com'
|
||
|
||
- regex: 'Sosospider|Sosoimagespider'
|
||
name: 'Soso Spider'
|
||
category: 'Search bot'
|
||
url: 'http://help.soso.com/webspider.htm'
|
||
producer:
|
||
name: 'Tencent Holdings'
|
||
url: 'http://www.soso.com'
|
||
|
||
- regex: 'Sprinklr'
|
||
name: 'Sprinklr'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'Sprinklr, Inc.'
|
||
url: 'https://www.sprinklr.com/'
|
||
|
||
- regex: 'sqlmap/'
|
||
name: 'sqlmap'
|
||
category: 'Security Checker'
|
||
url: 'http://sqlmap.org/'
|
||
producer:
|
||
name: 'sqlmap'
|
||
url: 'http://sqlmap.org/'
|
||
|
||
- regex: 'SSL Labs'
|
||
name: 'SSL Labs'
|
||
category: 'Validator'
|
||
url: 'https://www.ssllabs.com/about/assessment.html'
|
||
producer:
|
||
name: 'SSL Labs'
|
||
url: 'https://www.ssllabs.com/about/assessment.html'
|
||
|
||
- regex: 'StatusCake'
|
||
name: 'StatusCake'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.statuscake.com'
|
||
producer:
|
||
name: 'StatusCake'
|
||
url: 'https://www.statuscake.com'
|
||
|
||
- regex: 'Superfeedr bot'
|
||
name: 'Superfeedr Bot'
|
||
category: 'Feed Fetcher'
|
||
url: ''
|
||
producer:
|
||
name: 'Superfeedr'
|
||
url: 'https://superfeedr.com/'
|
||
|
||
- regex: 'Sparkler/[0-9]'
|
||
name: 'Sparkler'
|
||
category: 'Crawler'
|
||
url: 'https://github.com/USCDataScience/sparkler'
|
||
|
||
- regex: 'Spinn3r'
|
||
name: 'Spinn3r'
|
||
category: 'Crawler'
|
||
url: 'http://spinn3r.com/robot'
|
||
producer:
|
||
name: 'Tailrank Inc'
|
||
url: 'http://spinn3r.com'
|
||
|
||
- regex: 'SputnikBot'
|
||
name: 'Sputnik Bot'
|
||
category: 'Crawler'
|
||
url: ''
|
||
|
||
- regex: 'SputnikFaviconBot'
|
||
name: 'Sputnik Favicon Bot'
|
||
category: 'Crawler'
|
||
url: ''
|
||
|
||
- regex: 'SputnikImageBot'
|
||
name: 'Sputnik Image Bot'
|
||
category: 'Crawler'
|
||
url: ''
|
||
|
||
- regex: 'SurveyBot'
|
||
name: 'Survey Bot'
|
||
category: 'Search bot'
|
||
url: 'http://www.domaintools.com/webmasters/surveybot.php'
|
||
producer:
|
||
name: 'Domain Tools'
|
||
url: 'http://www.domaintools.com'
|
||
|
||
- regex: 'TarmotGezgin'
|
||
name: 'Tarmot Gezgin'
|
||
url: 'http://www.tarmot.com/gezgin/'
|
||
category: 'Search bot'
|
||
|
||
- regex: 'TelegramBot'
|
||
name: 'TelegramBot'
|
||
url: 'https://telegram.org/blog/bot-revolution'
|
||
|
||
- regex: 'TLSProbe'
|
||
name: 'TLSProbe'
|
||
url: 'https://scan.trustnet.venafi.com/'
|
||
category: 'Security search bot'
|
||
producer:
|
||
name: 'Venafi TrustNet'
|
||
url: 'https://www.venafi.com'
|
||
|
||
- regex: 'TinEye-bot'
|
||
name: 'TinEye Crawler'
|
||
category: 'Search bot'
|
||
url: 'http://www.tineye.com/crawler.html'
|
||
producer:
|
||
name: 'Idée Inc.'
|
||
url: 'http://ideeinc.com'
|
||
|
||
- regex: 'Tiny Tiny RSS'
|
||
name: 'Tiny Tiny RSS'
|
||
url: 'http://tt-rss.org'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'theoldreader\.com'
|
||
name: 'theoldreader'
|
||
category: 'Feed Reader'
|
||
url: 'https://theoldreader.com'
|
||
|
||
- regex: 'Trackable/0\.1'
|
||
name: 'Chartable'
|
||
category: 'Site Monitor'
|
||
url: 'https://help.chartable.com/article/34-what-is-the-trackable-analytics-prefix'
|
||
producer:
|
||
name: 'Chartable'
|
||
url: 'https://chartable.com'
|
||
|
||
- regex: 'trendictionbot'
|
||
name: 'Trendiction Bot'
|
||
category: 'Crawler'
|
||
url: 'http://www.trendiction.de/bot'
|
||
producer:
|
||
name: 'Talkwalker Inc.'
|
||
url: 'http://www.talkwalker.com'
|
||
|
||
- regex: 'TurnitinBot'
|
||
name: 'TurnitinBot'
|
||
category: 'Crawler'
|
||
url: 'http://www.turnitin.com/robot/crawlerinfo.html'
|
||
producer:
|
||
name: 'iParadigms, LLC.'
|
||
url: 'http://www.turnitin.com'
|
||
|
||
- regex: 'TweetedTimes'
|
||
name: 'TweetedTimes Bot'
|
||
category: 'Crawler'
|
||
url: 'https://tweetedtimes.com/'
|
||
producer:
|
||
name: 'TweetedTimes'
|
||
url: 'https://tweetedtimes.com/'
|
||
|
||
- regex: 'TweetmemeBot'
|
||
name: 'Tweetmeme Bot'
|
||
category: 'Crawler'
|
||
url: 'http://tweetmeme.com/'
|
||
producer:
|
||
name: 'Mediasift'
|
||
url: ''
|
||
|
||
- regex: 'Twingly Recon'
|
||
name: 'Twingly Recon'
|
||
category: 'Crawler'
|
||
producer:
|
||
name: 'Twingly'
|
||
url: 'https://www.twingly.com'
|
||
|
||
- regex: 'Twitterbot'
|
||
name: 'Twitterbot'
|
||
category: 'Social Media Agent'
|
||
url: 'https://dev.twitter.com/docs/cards/getting-started'
|
||
producer:
|
||
name: 'Twitter'
|
||
url: 'http://www.twitter.com'
|
||
|
||
- regex: 'UniversalFeedParser'
|
||
name: 'UniversalFeedParser'
|
||
category: 'Feed Fetcher'
|
||
url: 'https://github.com/kurtmckee/feedparser'
|
||
producer:
|
||
name: 'Kurt McKee'
|
||
url: 'https://github.com/kurtmckee'
|
||
|
||
- regex: 'via secureurl\.fwdcdn\.com'
|
||
name: 'UkrNet Mail Proxy'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'UkrNet Ltd'
|
||
url: 'https://www.ukr.net/'
|
||
|
||
- regex: 'Uptime(?:bot)?/[\d.]+'
|
||
name: 'Uptimebot'
|
||
category: 'Site Monitor'
|
||
url: 'https://uptime.com/uptime-bot'
|
||
producer:
|
||
name: 'Uptime'
|
||
url: 'https://uptime.com/'
|
||
|
||
- regex: 'UptimeRobot'
|
||
name: 'UptimeRobot'
|
||
category: 'Site Monitor'
|
||
url: 'https://uptimerobot.com/'
|
||
producer:
|
||
name: 'Uptime Robot'
|
||
url: 'https://uptimerobot.com/'
|
||
|
||
- regex: 'URLAppendBot'
|
||
name: 'URLAppendBot'
|
||
category: 'Crawler'
|
||
url: 'http://www.profound.net/urlappendbot.html'
|
||
producer:
|
||
name: 'Profound Networks'
|
||
url: 'http://www.profound.net'
|
||
|
||
- regex: 'Vagabondo'
|
||
name: 'Vagabondo'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'WiseGuys'
|
||
url: 'http://www.wise-guys.nl/'
|
||
|
||
- regex: 'vkShare; '
|
||
name: 'VK Share Button'
|
||
category: 'Crawler'
|
||
url: 'https://dev.vk.com/en/widgets/share'
|
||
producer:
|
||
name: 'VK'
|
||
url: 'https://vk.com/'
|
||
|
||
- regex: 'VKRobot'
|
||
name: 'VK Robot'
|
||
category: 'Crawler'
|
||
url: 'https://dev.vk.com/en/'
|
||
producer:
|
||
name: 'VK'
|
||
url: 'https://vk.com/'
|
||
|
||
- regex: 'VSMCrawler'
|
||
name: 'Visual Site Mapper Crawler'
|
||
category: 'Crawler'
|
||
url: 'http://www.visualsitemapper.com/crawler'
|
||
producer:
|
||
name: 'Alentum Software Ltd.'
|
||
url: 'http://www.alentum.com'
|
||
|
||
- regex: 'Jigsaw'
|
||
name: 'W3C CSS Validator'
|
||
category: 'Validator'
|
||
url: 'http://jigsaw.w3.org/css-validator'
|
||
producer:
|
||
name: 'W3C'
|
||
url: 'http://www.w3.org'
|
||
|
||
- regex: 'W3C_I18n-Checker'
|
||
name: 'W3C I18N Checker'
|
||
category: 'Validator'
|
||
url: 'http://validator.w3.org/i18n-checker'
|
||
producer:
|
||
name: 'W3C'
|
||
url: 'http://www.w3.org'
|
||
|
||
- regex: 'W3C-checklink'
|
||
name: 'W3C Link Checker'
|
||
category: 'Validator'
|
||
url: 'http://validator.w3.org/checklink'
|
||
producer:
|
||
name: 'W3C'
|
||
url: 'http://www.w3.org'
|
||
|
||
- regex: 'W3C_Validator|Validator\.nu'
|
||
name: 'W3C Markup Validation Service'
|
||
category: 'Validator'
|
||
url: 'http://validator.w3.org/services'
|
||
producer:
|
||
name: 'W3C'
|
||
url: 'http://www.w3.org'
|
||
|
||
- regex: 'W3C-mobileOK'
|
||
name: 'W3C MobileOK Checker'
|
||
category: 'Validator'
|
||
url: 'http://validator.w3.org/mobile'
|
||
producer:
|
||
name: 'W3C'
|
||
url: 'http://www.w3.org'
|
||
|
||
- regex: 'W3C_Unicorn'
|
||
name: 'W3C Unified Validator'
|
||
category: 'Validator'
|
||
url: 'http://validator.w3.org/unicorn'
|
||
producer:
|
||
name: 'W3C'
|
||
url: 'http://www.w3.org'
|
||
|
||
- regex: 'P3P Validator'
|
||
name: 'W3C P3P Validator'
|
||
category: 'Validator'
|
||
url: 'https://www.w3.org/P3P/validator.html'
|
||
producer:
|
||
name: 'W3C'
|
||
url: 'https://www.w3.org'
|
||
|
||
- regex: 'Wappalyzer'
|
||
name: 'Wappalyzer'
|
||
url: 'https://github.com/AliasIO/Wappalyzer'
|
||
producer:
|
||
name: 'AliasIO'
|
||
url: 'https://github.com/AliasIO'
|
||
|
||
- regex: 'PTST/'
|
||
name: 'WebPageTest'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.webpagetest.org'
|
||
|
||
- regex: 'WeSEE'
|
||
name: 'WeSEE:Search'
|
||
category: 'Search bot'
|
||
url: 'http://www.wesee.com/bot'
|
||
producer:
|
||
name: 'WeSEE Ltd'
|
||
url: 'http://www.wesee.com'
|
||
|
||
- regex: 'WebbCrawler'
|
||
name: 'WebbCrawler'
|
||
category: 'Crawler'
|
||
url: 'http://badcheese.com/crawler.html'
|
||
producer:
|
||
name: 'Steve Webb'
|
||
url: 'http://badcheese.com'
|
||
|
||
- regex: 'websitepulse[+ ]checker'
|
||
name: 'WebSitePulse'
|
||
category: 'Site Monitor'
|
||
url: 'http://badcheese.com/crawler.html'
|
||
producer:
|
||
name: 'WebSitePulse'
|
||
url: 'http://www.websitepulse.com/'
|
||
|
||
- regex: 'WordPress.+isitwp\.com'
|
||
name: 'IsItWP'
|
||
category: 'Crawler'
|
||
url: 'https://www.isitwp.com/'
|
||
producer:
|
||
name: 'WPBeginner, LLC'
|
||
url: 'https://www.wpbeginner.com/'
|
||
|
||
- regex: 'Automattic Analytics Crawler/[\d.]+'
|
||
name: 'Automattic Analytics'
|
||
category: 'Crawler'
|
||
url: 'https://wordpress.com/crawler/'
|
||
producer:
|
||
name: 'Wordpress.org'
|
||
url: 'https://wordpress.org/'
|
||
|
||
- regex: 'WordPress'
|
||
name: 'WordPress'
|
||
category: 'Service Agent'
|
||
url: 'https://wordpress.org/'
|
||
producer:
|
||
name: 'Wordpress.org'
|
||
url: 'https://wordpress.org/'
|
||
|
||
- regex: 'Wotbox'
|
||
name: 'Wotbox'
|
||
category: 'Search bot'
|
||
url: 'http://www.wotbox.com/bot/'
|
||
producer:
|
||
name: 'Wotbox'
|
||
url: 'http://www.wotbox.com'
|
||
|
||
- regex: 'XenForo'
|
||
name: 'XenForo'
|
||
category: 'Service Agent'
|
||
url: 'https://xenforo.com/'
|
||
producer:
|
||
name: 'XenForo Ltd.'
|
||
url: 'https://xenforo.com/'
|
||
|
||
- regex: 'yacybot'
|
||
name: 'YaCy'
|
||
category: 'Search bot'
|
||
url: 'http://yacy.net/bot.html'
|
||
producer:
|
||
name: 'YaCy'
|
||
url: 'http://yacy.net'
|
||
|
||
- regex: 'Yahoo! Slurp|Yahoo!-AdCrawler'
|
||
name: 'Yahoo! Slurp'
|
||
category: 'Search bot'
|
||
url: 'http://help.yahoo.com/ysearch/slurp'
|
||
producer:
|
||
name: 'Yahoo! Inc.'
|
||
url: 'http://www.yahoo.com'
|
||
|
||
- regex: 'Yahoo Link Preview|Yahoo:LinkExpander:Slingstone'
|
||
name: 'Yahoo! Link Preview'
|
||
category: 'Crawler'
|
||
url: 'https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html'
|
||
producer:
|
||
name: 'Yahoo! Inc.'
|
||
url: 'http://www.yahoo.com'
|
||
|
||
- regex: 'YahooMailProxy'
|
||
name: 'Yahoo! Mail Proxy'
|
||
category: 'Service Agent'
|
||
url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
|
||
producer:
|
||
name: 'Yahoo! Inc.'
|
||
url: 'http://www.yahoo.com'
|
||
|
||
- regex: 'YahooCacheSystem'
|
||
name: 'Yahoo! Cache System'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'Yahoo! Inc.'
|
||
url: 'http://www.yahoo.com'
|
||
|
||
- regex: 'Y!J-BRW'
|
||
name: 'Yahoo! Japan BRW'
|
||
category: 'Crawler'
|
||
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
|
||
producer:
|
||
name: 'Yahoo! Japan Corp.'
|
||
url: 'https://www.yahoo.co.jp/'
|
||
|
||
- regex: 'Y!J-WSC'
|
||
name: 'Yahoo! Japan WSC'
|
||
category: 'Crawler'
|
||
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
|
||
producer:
|
||
name: 'Yahoo! Japan Corp.'
|
||
url: 'https://www.yahoo.co.jp/'
|
||
|
||
- regex: 'Y!J-ASR'
|
||
name: 'Yahoo! Japan ASR'
|
||
category: 'Crawler'
|
||
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
|
||
producer:
|
||
name: 'Yahoo! Japan Corp.'
|
||
url: 'https://www.yahoo.co.jp/'
|
||
|
||
- regex: '^Y!J'
|
||
name: 'Yahoo! Japan'
|
||
category: 'Crawler'
|
||
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
|
||
producer:
|
||
name: 'Yahoo! Japan Corp.'
|
||
url: 'https://www.yahoo.co.jp/'
|
||
|
||
- regex: 'Yandex(?:(?:\.Gazeta |Accessibility|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:AdNet|Antivirus|Blogs|Calendar|Catalog|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Userproxy|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher'
|
||
name: 'Yandex Bot'
|
||
category: 'Search bot'
|
||
url: 'https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html'
|
||
producer:
|
||
name: 'Yandex LLC'
|
||
url: 'https://yandex.com/company/'
|
||
|
||
- regex: 'Yeti|NaverJapan|AdsBot-Naver'
|
||
name: 'Yeti/Naverbot'
|
||
category: 'Search bot'
|
||
url: 'http://help.naver.com/robots/'
|
||
producer:
|
||
name: 'Naver'
|
||
url: 'http://www.naver.com'
|
||
|
||
- regex: 'YoudaoBot'
|
||
name: 'Youdao Bot'
|
||
category: 'Search bot'
|
||
url: 'http://www.youdao.com/help/webmaster/spider'
|
||
producer:
|
||
name: 'NetEase, Inc.'
|
||
url: 'http://corp.163.com'
|
||
|
||
- regex: 'YOURLS v[0-9]'
|
||
name: 'Yourls'
|
||
category: 'Crawler'
|
||
url: 'http://yourls.org'
|
||
|
||
- regex: 'YRSpider|YYSpider'
|
||
name: 'Yunyun Bot'
|
||
category: 'Search bot'
|
||
url: 'http://www.yunyun.com/SiteInfo.php?r=about'
|
||
producer:
|
||
name: 'YunYun'
|
||
url: 'http://www.yunyun.com'
|
||
|
||
- regex: 'zgrab'
|
||
name: 'zgrab'
|
||
category: 'Security Checker'
|
||
url: 'https://github.com/zmap/zgrab'
|
||
|
||
- regex: 'Zookabot'
|
||
name: 'Zookabot'
|
||
category: 'Crawler'
|
||
url: 'http://zookabot.com'
|
||
producer:
|
||
name: 'Hwacha ApS'
|
||
url: 'http://hwacha.dk'
|
||
|
||
- regex: 'ZumBot'
|
||
name: 'ZumBot'
|
||
category: 'Search bot'
|
||
url: 'http://help.zum.com/inquiry'
|
||
producer:
|
||
name: 'ZUM internet'
|
||
url: 'http://www.zuminternet.com/'
|
||
|
||
- regex: 'YottaaMonitor'
|
||
name: 'Yottaa Site Monitor'
|
||
category: 'Site Monitor'
|
||
url: 'http://www.yottaa.com/products/site-monitor'
|
||
producer:
|
||
name: 'Yottaa'
|
||
url: 'http://www.yottaa.com/'
|
||
|
||
- regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857'
|
||
name: 'Yahoo Gemini'
|
||
category: 'Crawler'
|
||
url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html'
|
||
producer:
|
||
name: 'Yahoo! Inc.'
|
||
url: 'http://www.yahoo.com'
|
||
|
||
- regex: '.*Java.*outbrain'
|
||
name: 'Outbrain'
|
||
category: 'Crawler'
|
||
url: ''
|
||
producer:
|
||
name: 'Outbrain'
|
||
url: 'http://www.outbrain.com/'
|
||
|
||
- regex: 'HubPages.*crawlingpolicy'
|
||
name: 'HubPages'
|
||
category: 'Crawler'
|
||
url: 'https://hubpages.com/help/crawlingpolicy'
|
||
producer:
|
||
name: 'HubPages, Inc.'
|
||
url: 'https://discover.hubpages.com/'
|
||
|
||
- regex: 'Pinterest(?:bot)?/[\d.]+.*www\.pinterest\.com'
|
||
name: 'Pinterest'
|
||
url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
|
||
category: 'Crawler'
|
||
producer:
|
||
name: 'Pinterest'
|
||
url: 'https://www.pinterest.com/'
|
||
|
||
- regex: '.*Site24x7'
|
||
name: 'Site24x7 Website Monitoring'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.site24x7.com/site24x7-faq.html'
|
||
producer:
|
||
name: 'Site24x7'
|
||
url: 'https://www.site24x7.com'
|
||
|
||
- regex: '.* HLB/[\d.]+'
|
||
name: 'Site24x7 Defacement Monitor'
|
||
category: 'Site Monitor'
|
||
url: 'https://support.site24x7.com/portal/en/kb/articles/default-user-agent-used-in-website-defacement-monitor'
|
||
producer:
|
||
name: 'Site24x7'
|
||
url: 'https://www.site24x7.com/'
|
||
|
||
- regex: 's~snapchat-proxy'
|
||
name: 'Snapchat Proxy'
|
||
category: 'Crawler'
|
||
url: 'https://www.snapchat.com'
|
||
producer:
|
||
name: 'Snapchat Inc.'
|
||
url: 'https://www.snapchat.com'
|
||
|
||
- regex: 'Snap URL Preview Service'
|
||
name: 'Snap URL Preview Service'
|
||
category: 'Service Agent'
|
||
url: 'https://developers.snap.com/robots'
|
||
producer:
|
||
name: 'Snapchat Inc.'
|
||
url: 'https://www.snapchat.com/'
|
||
|
||
- regex: 'SnapchatAds/[\d.]+'
|
||
name: 'Snapchat Ads'
|
||
category: 'Crawler'
|
||
url: 'https://businesshelp.snapchat.com/s/article/adsbot-crawler?language=en_US'
|
||
producer:
|
||
name: 'Snapchat Inc.'
|
||
url: 'https://www.snapchat.com/'
|
||
|
||
- regex: "Let's Encrypt validation server"
|
||
name: "Let's Encrypt Validation"
|
||
category: 'Service Agent'
|
||
url: 'https://letsencrypt.org/how-it-works/'
|
||
producer:
|
||
name: "Let's Encrypt"
|
||
url: 'https://letsencrypt.org'
|
||
|
||
- regex: 'GrapeshotCrawler'
|
||
name: 'Grapeshot'
|
||
category: 'Crawler'
|
||
url: 'https://www.grapeshot.com/crawler'
|
||
producer:
|
||
name: 'Grapeshot'
|
||
url: 'https://www.grapeshot.com'
|
||
|
||
- regex: 'www\.monitor\.us'
|
||
name: 'Monitor.Us'
|
||
category: 'Site Monitor'
|
||
url: 'http://www.monitor.us'
|
||
producer:
|
||
name: 'Monitor.Us'
|
||
url: 'http://www.monitor.us'
|
||
|
||
- regex: 'Catchpoint'
|
||
name: 'Catchpoint'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.catchpoint.com/'
|
||
producer:
|
||
name: 'Catchpoint Systems'
|
||
url: 'https://www.catchpoint.com/'
|
||
|
||
- regex: 'bitlybot'
|
||
name: 'BitlyBot'
|
||
category: 'Crawler'
|
||
url: 'https://bitly.com'
|
||
producer:
|
||
name: 'Bitly, Inc.'
|
||
url: 'https://bitly.com'
|
||
|
||
- regex: 'Zao/'
|
||
name: 'Zao'
|
||
category: 'Crawler'
|
||
|
||
- regex: 'lycos'
|
||
name: 'Lycos'
|
||
|
||
- regex: 'Slurp'
|
||
name: 'Inktomi Slurp'
|
||
|
||
- regex: 'Speedy Spider'
|
||
name: 'Speedy'
|
||
|
||
- regex: 'ScoutJet'
|
||
name: 'ScoutJet'
|
||
|
||
- regex: 'nrsbot|netresearch'
|
||
name: 'NetResearchServer'
|
||
|
||
- regex: 'scooter'
|
||
name: 'Scooter'
|
||
|
||
- regex: 'gigabot'
|
||
name: 'Gigabot'
|
||
|
||
- regex: 'charlotte'
|
||
name: 'Charlotte'
|
||
|
||
- regex: 'Pompos'
|
||
name: 'Pompos'
|
||
|
||
- regex: 'ichiro'
|
||
name: 'ichiro'
|
||
|
||
- regex: 'PagePeeker'
|
||
name: 'PagePeeker'
|
||
|
||
- regex: 'WebThumbnail'
|
||
name: 'WebThumbnail'
|
||
|
||
- regex: 'Willow Internet Crawler'
|
||
name: 'Willow Internet Crawler'
|
||
|
||
- regex: 'EmailWolf'
|
||
name: 'EmailWolf'
|
||
|
||
- regex: 'NetLyzer FastProbe'
|
||
name: 'NetLyzer FastProbe'
|
||
|
||
- regex: 'AdMantX.*admantx\.com'
|
||
name: 'ADMantX'
|
||
|
||
- regex: 'Server Density Service Monitoring'
|
||
name: 'Server Density'
|
||
|
||
- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
|
||
name: 'RSSRadio Bot'
|
||
|
||
- regex: '^sentry'
|
||
name: 'Sentry Bot'
|
||
producer:
|
||
name: 'Sentry'
|
||
url: 'https://sentry.io'
|
||
|
||
- regex: '^Spotify/[\d.]+$'
|
||
name: 'Spotify'
|
||
producer:
|
||
name: 'Spotify'
|
||
url: 'https://www.spotify.com'
|
||
|
||
- regex: 'The Knowledge AI'
|
||
name: 'The Knowledge AI'
|
||
category: 'Crawler'
|
||
|
||
- regex: 'Embedly'
|
||
name: 'Embedly'
|
||
category: 'Crawler'
|
||
url: 'https://support.embed.ly/hc/en-us'
|
||
producer:
|
||
name: 'A Medium, Corp.'
|
||
url: 'https://medium.com/'
|
||
|
||
- regex: 'BrandVerity'
|
||
name: 'BrandVerity'
|
||
category: 'Crawler'
|
||
url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
|
||
producer:
|
||
name: 'BrandVerity, Inc.'
|
||
url: 'https://www.brandverity.com/'
|
||
|
||
- regex: 'Kaspersky Lab CFR link resolver'
|
||
name: 'Kaspersky'
|
||
category: 'Security Checker'
|
||
url: 'https://www.kaspersky.com/'
|
||
producer:
|
||
name: 'AO Kaspersky Lab'
|
||
url: 'https://www.kaspersky.com/'
|
||
|
||
- regex: 'eZ Publish Link Validator'
|
||
name: 'eZ Publish Link Validator'
|
||
category: 'Crawler'
|
||
url: 'https://ez.no/'
|
||
producer:
|
||
name: 'eZ Systems AS'
|
||
url: 'https://ez.no/'
|
||
|
||
- regex: 'woorankreview'
|
||
name: 'WooRank'
|
||
category: 'Search bot'
|
||
url: 'https://www.woorank.com/'
|
||
producer:
|
||
name: 'WooRank sprl'
|
||
url: 'https://www.woorank.com/'
|
||
|
||
- regex: 'by Siteimprove\.com'
|
||
name: 'Siteimprove'
|
||
category: 'Search bot'
|
||
url: 'https://siteimprove.com/'
|
||
producer:
|
||
name: 'Siteimprove GmbH'
|
||
url: 'https://siteimprove.com/'
|
||
|
||
- regex: 'CATExplorador'
|
||
name: 'CATExplorador'
|
||
category: 'Search bot'
|
||
url: 'https://fundacio.cat/ca/domini/'
|
||
producer:
|
||
name: 'Fundació puntCAT'
|
||
url: 'https://fundacio.cat/ca/domini/'
|
||
|
||
- regex: 'Buck'
|
||
name: 'Buck'
|
||
category: 'Search bot'
|
||
url: 'https://hypefactors.com/'
|
||
producer:
|
||
name: 'Hypefactors A/S'
|
||
url: 'https://hypefactors.com/'
|
||
|
||
- regex: 'tracemyfile'
|
||
name: 'TraceMyFile'
|
||
category: 'Search bot'
|
||
url: 'https://www.tracemyfile.com/'
|
||
producer:
|
||
name: 'Idee Inc.'
|
||
url: 'http://ideeinc.com/'
|
||
|
||
- regex: 'zelist\.ro feed parser'
|
||
name: 'Ze List'
|
||
url: 'https://www.zelist.ro/'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: 'Treeworks SRL'
|
||
url: 'https://www.tree.ro/'
|
||
|
||
- regex: 'weborama-fetcher'
|
||
name: 'Weborama'
|
||
category: 'Search bot'
|
||
url: 'https://weborama.com/'
|
||
producer:
|
||
name: 'Weborama SA'
|
||
url: 'https://weborama.com/'
|
||
|
||
- regex: 'BoardReader Favicon Fetcher'
|
||
name: 'BoardReader'
|
||
category: 'Search bot'
|
||
url: 'https://boardreader.com/'
|
||
producer:
|
||
name: 'Effyis Inc'
|
||
url: 'https://boardreader.com/'
|
||
|
||
- regex: 'IDG/IT'
|
||
name: 'IDG/IT'
|
||
category: 'Search bot'
|
||
url: 'https://spaziodati.eu/'
|
||
producer:
|
||
name: 'SpazioDati S.r.l.'
|
||
url: 'https://spaziodati.eu/'
|
||
|
||
- regex: 'Bytespider'
|
||
name: 'Bytespider'
|
||
category: 'Search bot'
|
||
url: 'https://bytedance.com/'
|
||
producer:
|
||
name: 'ByteDance Ltd.'
|
||
url: 'https://bytedance.com/'
|
||
|
||
- regex: 'WikiDo'
|
||
name: 'WikiDo'
|
||
category: 'Search bot'
|
||
url: 'https://www.wikido.com/'
|
||
producer:
|
||
name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
|
||
url: 'https://www.wikido.com/'
|
||
|
||
- regex: 'Awario(?:Smart)?Bot'
|
||
name: 'Awario'
|
||
category: 'Search bot'
|
||
url: 'https://awario.com/bots.html'
|
||
producer:
|
||
name: 'TechFusion Ltd.'
|
||
url: 'https://www.techfusion.com.cy/'
|
||
|
||
- regex: 'AwarioRssBot'
|
||
name: 'Awario'
|
||
category: 'Feed Fetcher'
|
||
url: 'https://awario.com/bots.html'
|
||
producer:
|
||
name: 'TechFusion Ltd.'
|
||
url: 'https://www.techfusion.com.cy/'
|
||
|
||
- regex: 'oBot'
|
||
name: 'oBot'
|
||
category: 'Search bot'
|
||
url: 'https://www.xforce-security.com/crawler/'
|
||
producer:
|
||
name: 'IBM Germany Research & Development GmbH'
|
||
url: 'https://exchange.xforce.ibmcloud.com/'
|
||
|
||
- regex: 'SMTBot'
|
||
name: 'SMTBot'
|
||
category: 'Search bot'
|
||
url: 'https://www.similartech.com/smtbot'
|
||
producer:
|
||
name: 'SimilarTech Ltd.'
|
||
url: 'https://www.similartech.com/'
|
||
|
||
- regex: 'LCC'
|
||
name: 'LCC'
|
||
category: 'Search bot'
|
||
url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
|
||
producer:
|
||
name: 'Universität Leipzig'
|
||
url: 'https://www.uni-leipzig.de/'
|
||
|
||
- regex: 'Startpagina-Linkchecker'
|
||
name: 'Startpagina Linkchecker'
|
||
category: 'Search bot'
|
||
url: 'https://www.startpagina.nl/linkchecker'
|
||
producer:
|
||
name: 'Startpagina B.V.'
|
||
url: 'https://www.startpagina.nl/'
|
||
|
||
- regex: 'MoodleBot-Linkchecker'
|
||
name: 'MoodleBot Linkchecker'
|
||
category: 'Search bot'
|
||
url: 'hhttps://docs.moodle.org/en/Usage'
|
||
producer:
|
||
name: 'Moodle Pty Ltd'
|
||
url: 'https://moodle.org/'
|
||
|
||
- regex: 'GTmetrix'
|
||
name: 'GTmetrix'
|
||
category: 'Crawler'
|
||
url: 'https://gtmetrix.com/'
|
||
producer:
|
||
name: 'Carbon60 Operating Co. Ltd.'
|
||
url: 'https://www.carbon60.com/'
|
||
|
||
- regex: 'Nutch'
|
||
name: 'Nutch-based Bot'
|
||
category: 'Crawler'
|
||
url: 'https://nutch.apache.org'
|
||
producer:
|
||
name: 'The Apache Software Foundation'
|
||
url: 'https://www.apache.org/foundation/'
|
||
|
||
- regex: 'Seobility'
|
||
name: 'Seobility'
|
||
category: 'Crawler'
|
||
url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
|
||
|
||
- regex: 'Vercelbot'
|
||
name: 'Vercel Bot'
|
||
category: 'Service bot'
|
||
url: 'https://vercel.com'
|
||
|
||
- regex: 'Grammarly'
|
||
name: 'Grammarly'
|
||
category: 'Service bot'
|
||
url: 'https://www.grammarly.com'
|
||
|
||
- regex: 'Robozilla'
|
||
name: 'Robozilla'
|
||
category: 'Crawler'
|
||
|
||
- regex: 'Domains Project'
|
||
name: 'Domains Project'
|
||
category: 'Crawler'
|
||
url: 'https://domainsproject.org'
|
||
|
||
- regex: 'PetalBot'
|
||
name: 'Petal Bot'
|
||
category: 'Crawler'
|
||
url: 'https://aspiegel.com/petalbot'
|
||
|
||
- regex: 'SerendeputyBot'
|
||
name: 'Serendeputy Bot'
|
||
category: 'Crawler'
|
||
url: 'https://serendeputy.com/about/serendeputy-bot'
|
||
|
||
- regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx\.com.*service-fetcher'
|
||
name: 'ADmantX Service Fetcher'
|
||
category: 'Service bot'
|
||
url: 'https://www.admantx.com/service-fetcher.html'
|
||
|
||
- regex: 'SemanticScholarBot'
|
||
name: 'Semantic Scholar Bot'
|
||
category: 'Crawler'
|
||
url: 'https://www.semanticscholar.org/crawler'
|
||
|
||
- regex: 'VelenPublicWebCrawler'
|
||
name: 'Velen Public Web Crawler'
|
||
category: 'Crawler'
|
||
url: 'https://hunter.io/robot'
|
||
|
||
- regex: 'Barkrowler'
|
||
name: 'Barkrowler'
|
||
category: 'Crawler'
|
||
url: 'http://www.exensa.com/crawl'
|
||
|
||
- regex: 'BDCbot'
|
||
name: 'BDCbot'
|
||
category: 'Crawler'
|
||
url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
|
||
producer:
|
||
name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
|
||
url: 'https://bigdatacorp.com.br/'
|
||
|
||
- regex: 'adbeat'
|
||
name: 'Adbeat'
|
||
category: 'Crawler'
|
||
url: 'https://www.adbeat.com/operation_policy'
|
||
producer:
|
||
name: 'PPC Labs LLC'
|
||
url: 'https://www.adbeat.com/'
|
||
|
||
- regex: '(?:BuiltWith|BW)/[\d.]+'
|
||
name: 'BuiltWith'
|
||
category: 'Crawler'
|
||
url: 'https://builtwith.com/biup'
|
||
producer:
|
||
name: 'BuiltWith Pty Ltd'
|
||
url: 'https://builtwith.com/'
|
||
|
||
- regex: 'https://whatis\.contentkingapp\.com'
|
||
name: 'ContentKing'
|
||
category: 'Site Monitor'
|
||
url: 'https://whatis.contentkingapp.com/'
|
||
producer:
|
||
name: 'ContentKing BV'
|
||
url: 'https://www.contentkingapp.com/'
|
||
|
||
- regex: 'MicroAdBot'
|
||
name: 'MicroAdBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.microad.co.jp/'
|
||
producer:
|
||
name: 'MicroAd, Inc.'
|
||
url: 'https://www.microad.co.jp/'
|
||
|
||
- regex: 'PingAdmin\.Ru'
|
||
name: 'PingAdmin.Ru'
|
||
category: 'Site Monitor'
|
||
url: 'https://ping-admin.ru/'
|
||
|
||
- regex: 'notifyninja.+monitoring'
|
||
name: 'Notify Ninja'
|
||
category: 'Site Monitor'
|
||
url: 'http://notifyninja.com'
|
||
|
||
- regex: 'WebDataStats'
|
||
name: 'WebDataStats'
|
||
category: 'Crawler'
|
||
url: 'https://webdatastats.com/policy.html'
|
||
producer:
|
||
name: 'WebTehRazrabotka LLC'
|
||
url: 'https://webdatastats.com/'
|
||
|
||
- regex: 'parse\.ly scraper'
|
||
name: 'parse.ly'
|
||
category: 'Crawler'
|
||
url: 'https://www.parse.ly/help/integration/crawler'
|
||
producer:
|
||
name: 'Parsely, Inc.'
|
||
url: 'https://www.parse.ly/'
|
||
|
||
- regex: 'Nimbostratus-Bot'
|
||
name: 'Nimbostratus Bot'
|
||
category: 'Site Monitor'
|
||
url: 'http://cloudsystemnetworks.com'
|
||
|
||
- regex: 'HeartRails_Capture/[\d.]+'
|
||
name: 'Heart Rails Capture'
|
||
category: 'Service Agent'
|
||
url: 'http://capture.heartrails.com'
|
||
|
||
- regex: 'Project-Resonance'
|
||
name: 'Project Resonance'
|
||
category: 'Crawler'
|
||
url: 'https://project-resonance.com/'
|
||
producer:
|
||
name: 'RedHunt Labs Limited'
|
||
url: 'https://redhuntlabs.com/'
|
||
|
||
- regex: 'DataXu/[\d.]+'
|
||
name: 'DataXu'
|
||
category: 'Service Agent'
|
||
url: 'https://advertising.roku.com/dataxu'
|
||
producer:
|
||
name: 'Roku, Inc.'
|
||
url: 'https://roku.com'
|
||
|
||
- regex: 'Cocolyzebot'
|
||
name: 'Cocolyzebot'
|
||
category: 'Crawler'
|
||
url: 'https://cocolyze.com/en/cocolyzebot'
|
||
producer:
|
||
name: 'VSI INNOVATION SAS'
|
||
url: 'https://vsi-innovation.com/'
|
||
|
||
- regex: 'veryhip'
|
||
name: 'VeryHip'
|
||
category: 'Crawler'
|
||
url: 'https://veryhip.com/'
|
||
producer:
|
||
name: 'VeryHip'
|
||
url: 'https://veryhip.com/'
|
||
|
||
- regex: 'LinkpadBot'
|
||
name: 'LinkpadBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.linkpad.org/'
|
||
producer:
|
||
name: 'Solomono LLC'
|
||
url: 'https://www.linkpad.org/'
|
||
|
||
- regex: 'MuscatFerret'
|
||
name: 'MuscatFerret'
|
||
category: 'Crawler'
|
||
url: 'http://www.webtop.com/'
|
||
|
||
- regex: 'PageThing\.com'
|
||
name: 'PageThing'
|
||
category: 'Crawler'
|
||
url: 'https://www.pagething.com/'
|
||
producer:
|
||
name: 'SPECIALNOISE LTD'
|
||
url: 'https://www.specialnoise.com/'
|
||
|
||
- regex: 'ArchiveBox'
|
||
name: 'ArchiveBox'
|
||
url: 'https://archivebox.io/'
|
||
category: 'Crawler'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'Choosito'
|
||
name: 'Choosito'
|
||
url: 'https://www.choosito.com/'
|
||
category: 'Crawler'
|
||
producer:
|
||
name: 'Choosito! Inc.'
|
||
url: 'https://www.choosito.com/'
|
||
|
||
- regex: 'datagnionbot'
|
||
name: 'datagnionbot'
|
||
url: 'https://www.datagnion.com/bot.html'
|
||
category: 'Crawler'
|
||
producer:
|
||
name: 'DATAGNION GMBH'
|
||
url: 'https://www.datagnion.com/'
|
||
|
||
- regex: 'WhatCMS'
|
||
name: 'WhatCMS'
|
||
url: 'https://whatcms.org/'
|
||
category: 'Crawler'
|
||
producer:
|
||
name: 'Nineteen Ten LLC'
|
||
url: 'https://whatcms.org/'
|
||
|
||
- regex: 'httpx'
|
||
name: 'httpx'
|
||
url: 'https://github.com/projectdiscovery/httpx'
|
||
category: 'Crawler'
|
||
producer:
|
||
name: 'ProjectDiscovery, Inc.'
|
||
url: 'https://projectdiscovery.io/'
|
||
|
||
- regex: '.*\.oast\.'
|
||
name: 'Interactsh'
|
||
category: 'Security Checker'
|
||
url: 'https://github.com/projectdiscovery/interactsh'
|
||
producer:
|
||
name: 'ProjectDiscovery, Inc.'
|
||
url: 'https://projectdiscovery.io/'
|
||
|
||
- regex: 'scaninfo@(?:expanseinc|paloaltonetworks)\.com'
|
||
name: 'Expanse'
|
||
category: 'Security Checker'
|
||
url: 'https://expanse.co/'
|
||
producer:
|
||
name: 'Expanse Inc.'
|
||
url: 'https://expanse.co/'
|
||
|
||
- regex: 'HuaweiWebCatBot'
|
||
name: 'HuaweiWebCatBot'
|
||
category: 'Crawler'
|
||
url: 'https://isecurity.huawei.com'
|
||
producer:
|
||
name: 'Huawei Technologies Co., Ltd.'
|
||
url: 'https://huawei.com'
|
||
|
||
- regex: 'Hatena-Favicon'
|
||
name: 'Hatena Favicon'
|
||
category: 'Crawler'
|
||
url: 'https://www.hatena.ne.jp/faq/'
|
||
producer:
|
||
name: 'Hatena Co., Ltd.'
|
||
url: 'https://www.hatena.ne.jp'
|
||
- regex: 'Hatena-?Bookmark'
|
||
name: 'Hatena Bookmark'
|
||
category: 'Crawler'
|
||
url: 'https://www.hatena.ne.jp/faq/'
|
||
producer:
|
||
name: 'Hatena Co., Ltd.'
|
||
url: 'https://www.hatena.ne.jp'
|
||
|
||
- regex: 'RyowlEngine/[\d.]+'
|
||
name: 'Ryowl'
|
||
category: 'Crawler'
|
||
url: 'https://ryowl.org'
|
||
|
||
- regex: 'OdklBot/[\d.]+'
|
||
name: 'Odnoklassniki Bot'
|
||
category: 'Crawler'
|
||
url: 'https://odnoklassniki.ru'
|
||
|
||
- regex: 'Mediatoolkitbot'
|
||
name: 'Mediatoolkit Bot'
|
||
category: 'Crawler'
|
||
url: 'https://mediatoolkit.com'
|
||
|
||
- regex: 'ZoominfoBot'
|
||
name: 'ZoominfoBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.zoominfo.com'
|
||
|
||
- regex: 'WeViKaBot/[\d.]+'
|
||
name: 'WeViKaBot'
|
||
category: 'Crawler'
|
||
url: 'http://www.wevika.de'
|
||
|
||
- regex: 'SEOkicks'
|
||
name: 'SEOkicks'
|
||
category: 'Crawler'
|
||
url: 'https://www.seokicks.de/robot.html'
|
||
|
||
- regex: 'Plukkie/[\d.]+'
|
||
name: 'Plukkie'
|
||
category: 'Crawler'
|
||
url: 'http://www.botje.com/plukkie.htm'
|
||
|
||
- regex: 'proximic;'
|
||
name: 'Comscore'
|
||
category: 'Crawler'
|
||
url: 'https://www.comscore.com/Web-Crawler'
|
||
|
||
- regex: 'SurdotlyBot/[\d.]+'
|
||
name: 'SurdotlyBot'
|
||
category: 'Crawler'
|
||
url: 'http://sur.ly/bot.html'
|
||
|
||
- regex: 'Gowikibot/[\d.]+'
|
||
name: 'Gowikibot'
|
||
category: 'Crawler'
|
||
url: 'http:/www.gowikibot.com'
|
||
|
||
- regex: 'SabsimBot/[\d.]+'
|
||
name: 'SabsimBot'
|
||
category: 'Crawler'
|
||
url: 'https://sabsim.com'
|
||
|
||
- regex: 'LumtelBot/[\d.]+'
|
||
name: 'LumtelBot'
|
||
category: 'Crawler'
|
||
url: 'https://umtel.com'
|
||
|
||
- regex: 'PiplBot'
|
||
name: 'PiplBot'
|
||
category: 'Crawler'
|
||
url: 'http://www.pipl.com/bot'
|
||
|
||
- regex: 'woobot/[\d.]+'
|
||
name: 'WooRank'
|
||
category: 'Crawler'
|
||
url: 'https://www.woorank.com/bot'
|
||
|
||
- regex: 'Cookiebot/[\d.]+'
|
||
name: 'Cookiebot'
|
||
category: 'Crawler'
|
||
url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
|
||
producer:
|
||
name: 'Cybot A/S'
|
||
url: 'https://www.cybot.com/'
|
||
|
||
- regex: 'NetSystemsResearch'
|
||
name: 'NetSystemsResearch'
|
||
category: 'Security Checker'
|
||
url: 'https://www.netsystemsresearch.com/'
|
||
producer:
|
||
name: 'NET SYSTEMS RESEARCH LLC'
|
||
url: 'https://www.netsystemsresearch.com/'
|
||
|
||
- regex: 'CensysInspect/[\d.]+'
|
||
name: 'CensysInspect'
|
||
category: 'Security Checker'
|
||
url: 'https://about.censys.io/'
|
||
producer:
|
||
name: 'Censys, Inc.'
|
||
url: 'https://censys.io/'
|
||
|
||
- regex: 'gdnplus\.com'
|
||
name: 'GDNP'
|
||
category: 'Crawler'
|
||
url: 'https://gdnplus.com/'
|
||
producer:
|
||
name: 'Global Digital Network Plus, LLC'
|
||
url: 'https://gdnplus.com/'
|
||
|
||
- regex: 'WellKnownBot/[\d.]+'
|
||
name: 'WellKnownBot'
|
||
category: 'Crawler'
|
||
url: 'https://well-known.dev'
|
||
|
||
- regex: 'Adsbot/[\d.]+'
|
||
name: 'Adsbot'
|
||
category: 'Crawler'
|
||
url: 'https://seostar.co/robot/'
|
||
|
||
- regex: 'MTRobot/[\d.]+'
|
||
name: 'MTRobot'
|
||
category: 'Crawler'
|
||
url: 'https://metrics-tools.de/robot.html'
|
||
producer:
|
||
name: 'Metrics Tools'
|
||
url: 'https://metrics-tools.de/'
|
||
|
||
- regex: 'serpstatbot/[\d.]+'
|
||
name: 'serpstatbot'
|
||
category: 'Crawler'
|
||
url: 'http://serpstatbot.com/'
|
||
producer:
|
||
name: 'Netpeak Ltd'
|
||
url: 'https://netpeak.net/'
|
||
|
||
- regex: 'colly'
|
||
name: 'colly'
|
||
category: 'Crawler'
|
||
url: 'https://github.com/gocolly/colly/'
|
||
|
||
- regex: 'l9tcpid/v[\d.]+'
|
||
name: 'l9tcpid'
|
||
category: 'Security Checker'
|
||
url: 'https://github.com/LeakIX/l9tcpid'
|
||
|
||
- regex: 'l9explore/[\d.]+'
|
||
name: 'l9explore'
|
||
category: 'Security Checker'
|
||
url: 'https://github.com/LeakIX/l9explore'
|
||
|
||
- regex: 'l9scan/|^Lkx-.*/[\d.]+'
|
||
name: 'LeakIX'
|
||
category: 'Security Checker'
|
||
url: 'https://leakix.net/'
|
||
producer:
|
||
name: 'BaDaaS SRL'
|
||
url: 'https://leakix.net/'
|
||
|
||
- regex: 'MegaIndex\.ru/[\d.]+'
|
||
name: 'MegaIndex'
|
||
category: 'Crawler'
|
||
url: 'https://megaindex.com/crawler'
|
||
|
||
- regex: 'Seekport'
|
||
name: 'Seekport'
|
||
category: 'Crawler'
|
||
url: 'https://bot.seekport.com/'
|
||
producer:
|
||
name: 'SISTRIX GmbH'
|
||
url: 'https://www.sistrix.de/'
|
||
|
||
- regex: 'seolyt/[\d.]+'
|
||
name: 'seolyt'
|
||
category: 'Crawler'
|
||
url: 'https://seolyt.com/'
|
||
|
||
- regex: 'YaK/[\d.]+'
|
||
name: 'YaK'
|
||
category: 'Crawler'
|
||
url: 'https://www.linkfluence.com/'
|
||
producer:
|
||
name: 'Linkfluence SAS'
|
||
url: 'https://www.linkfluence.com/'
|
||
|
||
- regex: 'KomodiaBot/[\d.]+'
|
||
name: 'KomodiaBot'
|
||
category: 'Crawler'
|
||
url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
|
||
producer:
|
||
name: 'Komodia Inc.'
|
||
url: 'https://www.komodia.com/'
|
||
|
||
- regex: 'Neevabot/[\d.]+'
|
||
name: 'Neevabot'
|
||
category: 'Search bot'
|
||
url: 'https://neeva.com/neevabot'
|
||
producer:
|
||
name: 'Neeva Inc.'
|
||
url: 'https://neeva.com/'
|
||
|
||
- regex: 'LinkPreview/[\d.]+'
|
||
name: 'LinkPreview'
|
||
category: 'Service Agent'
|
||
url: 'https://www.linkpreview.net/'
|
||
|
||
- regex: 'JungleKeyThumbnail/[\d.]+'
|
||
name: 'JungleKeyThumbnail'
|
||
category: 'Crawler'
|
||
url: 'https://junglekey.com/'
|
||
|
||
- regex: 'rocketmonitor(?: |bot/)[\d.]+'
|
||
name: 'RocketMonitorBot'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
|
||
producer:
|
||
name: 'Radio Mast, Inc.'
|
||
url: 'https://www.radiomast.io/'
|
||
|
||
- regex: 'SitemapParser-VIPnytt/[\d.]+'
|
||
name: 'SitemapParser-VIPnytt'
|
||
category: 'Crawler'
|
||
url: 'https://github.com/VIPnytt/SitemapParser/'
|
||
|
||
- regex: '^Turnitin'
|
||
name: 'Turnitin'
|
||
category: 'Crawler'
|
||
url: 'https://turnitin.com/robot/crawlerinfo.html'
|
||
|
||
- regex: 'DMBrowser/[\d.]+|DMBrowser-[UB]V'
|
||
name: 'Dotcom Monitor'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.dotcom-monitor.com'
|
||
|
||
- regex: 'ThinkChaos/'
|
||
name: 'ThinkChaos'
|
||
category: 'Crawler'
|
||
|
||
- regex: 'DataForSeoBot'
|
||
name: 'DataForSeoBot'
|
||
category: 'Crawler'
|
||
url: 'https://dataforseo.com/dataforseo-bot'
|
||
|
||
- regex: 'Discordbot/[\d.]+'
|
||
name: 'Discord Bot'
|
||
category: 'Service Agent'
|
||
url: 'https://discordapp.com'
|
||
|
||
- regex: 'Linespider/[\d.]+'
|
||
name: 'Linespider'
|
||
category: 'Crawler'
|
||
url: 'https://lin.ee/4dwXkTH'
|
||
|
||
- regex: 'Cincraw/[\d.]+'
|
||
name: 'Cincraw'
|
||
category: 'Crawler'
|
||
url: 'http://cincrawdata.net/bot/'
|
||
|
||
- regex: 'CISPA Web Analyzer'
|
||
name: 'CISPA Web Analyzer'
|
||
category: 'Crawler'
|
||
url: 'https://notify.cispa.de/'
|
||
producer:
|
||
name: 'CISPA - Helmholtz-Zentrum für Informationssicherheit gGmbH'
|
||
url: 'https://cispa.de/en'
|
||
|
||
- regex: 'IonCrawl'
|
||
name: 'IONOS Crawler'
|
||
category: 'Crawler'
|
||
url: 'https://www.ionos.de/terms-gtc/faq-crawler-en/'
|
||
producer:
|
||
name: 'IONOS SE'
|
||
url: 'https://www.ionos.de/'
|
||
|
||
- regex: 'Crawldad'
|
||
name: 'Crawldad'
|
||
category: 'Crawler'
|
||
url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972'
|
||
|
||
- regex: 'https://securitytxt-scan\.cs\.hm\.edu/'
|
||
name: 'security.txt scanserver'
|
||
category: 'Security Checker'
|
||
url: 'https://securitytxt-scan.cs.hm.edu/'
|
||
producer:
|
||
name: 'Hochschule für angewandte Wissenschaften München'
|
||
url: 'https://www.hm.edu/'
|
||
|
||
- regex: 'TigerBot/[\d.]+'
|
||
name: 'TigerBot'
|
||
category: 'Crawler'
|
||
url: 'https://tiger.ch/'
|
||
|
||
- regex: 'TestCrawler/[\d.]+'
|
||
name: 'TestCrawler'
|
||
category: 'Crawler'
|
||
url: 'https://www.comcepta.com/'
|
||
|
||
- regex: 'CrowdTanglebot/[\d.]+'
|
||
name: 'CrowdTangle'
|
||
category: 'Crawler'
|
||
url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
|
||
producer:
|
||
name: 'CrowdTangle, Inc.'
|
||
url: 'https://www.crowdtangle.com/'
|
||
|
||
- regex: 'Sellers\.Guide Crawler by Primis'
|
||
name: 'Sellers.Guide'
|
||
category: 'Crawler'
|
||
url: 'https://sellers.guide/'
|
||
producer:
|
||
name: 'McCann Disciplines, Ltd.'
|
||
url: 'https://www.primis.tech/'
|
||
|
||
- regex: 'OnalyticaBot'
|
||
name: 'Onalytica'
|
||
category: 'Crawler'
|
||
url: 'https://www.airslate.com/bot/explore/onalytica-bot'
|
||
producer:
|
||
name: 'airSlate, Inc.'
|
||
url: 'https://www.airslate.com/'
|
||
|
||
- regex: 'deepnoc'
|
||
name: 'deepnoc'
|
||
category: 'Crawler'
|
||
url: 'https://deepnoc.com/bot'
|
||
producer:
|
||
name: 'deepnoc, GmbH'
|
||
url: 'https://deepnoc.com/'
|
||
|
||
- regex: 'Newslitbot/[\d.]+'
|
||
name: 'Newslitbot'
|
||
category: 'Crawler'
|
||
url: 'https://www.newslit.co/'
|
||
producer:
|
||
name: 'Newslit, LLC.'
|
||
url: 'https://www.newslit.co/'
|
||
|
||
- regex: 'um-LN/[\d.]+'
|
||
name: 'uMBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.ubermetrics-technologies.com/'
|
||
producer:
|
||
name: 'Ubermetrics Technologies GmbH'
|
||
url: 'https://www.ubermetrics-technologies.com/'
|
||
|
||
- regex: 'Abonti/[\d.]+'
|
||
name: 'Abonti'
|
||
category: 'Crawler'
|
||
url: 'http://abonti.com/'
|
||
|
||
- regex: 'collection@infegy\.com'
|
||
name: 'Infegy'
|
||
category: 'Crawler'
|
||
url: 'https://infegy.com/'
|
||
producer:
|
||
name: 'Infegy, Inc.'
|
||
url: 'https://infegy.com/'
|
||
|
||
- regex: 'HTTP Banner Detection \(https://security\.ipip\.net\)'
|
||
name: 'IPIP'
|
||
category: 'Security Checker'
|
||
url: 'https://security.ipip.net/'
|
||
producer:
|
||
name: 'Beijing Tiantexin Tech. Co., Ltd.'
|
||
url: 'https://en.ipip.net/'
|
||
|
||
- regex: 'ev-crawler/[\d.]+'
|
||
name: 'Headline'
|
||
category: 'Crawler'
|
||
url: 'https://headline.com/legal/crawler'
|
||
producer:
|
||
name: 'e.ventures Managementgesellschaft mbH'
|
||
url: 'https://headline.com/'
|
||
|
||
- regex: 'webprosbot/[\d.]+'
|
||
name: 'WebPros'
|
||
category: 'Crawler'
|
||
url: 'https://webpros.com/'
|
||
producer:
|
||
name: 'WebPros Holdco B.V.'
|
||
url: 'https://webpros.com/'
|
||
|
||
- regex: 'ELB-HealthChecker'
|
||
name: 'Amazon ELB'
|
||
category: 'Site Monitor'
|
||
url: 'https://aws.amazon.com/elasticloadbalancing/'
|
||
producer:
|
||
name: 'Amazon.com, Inc.'
|
||
url: 'https://www.amazon.com/'
|
||
|
||
- regex: 'Wheregoes\.com Redirect Checker/[\d.]+'
|
||
name: 'WhereGoes'
|
||
category: 'Crawler'
|
||
url: 'https://wheregoes.com/'
|
||
|
||
- regex: 'project_patchwatch'
|
||
name: 'Project Patchwatch'
|
||
category: 'Crawler'
|
||
url: 'http://66.240.192.82/'
|
||
|
||
- regex: 'InternetMeasurement/[\d.]+'
|
||
name: 'InternetMeasurement'
|
||
category: 'Crawler'
|
||
url: 'https://internet-measurement.com/'
|
||
|
||
- regex: 'DomainAppender /[\d.]+'
|
||
name: 'DomainAppender'
|
||
category: 'Crawler'
|
||
url: 'https://www.profound.net/product/domain_append/'
|
||
producer:
|
||
name: 'Profound Networks, LLC'
|
||
url: 'https://www.profound.net/'
|
||
|
||
- regex: 'FreeWebMonitoring SiteChecker/[\d.]+'
|
||
name: 'FreeWebMonitoring'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.freewebmonitoring.com/bot.html'
|
||
producer:
|
||
name: 'GreenWave Online, Inc.'
|
||
url: 'http://www.greenwaveonline.com/'
|
||
|
||
- regex: 'Page Modified Pinger'
|
||
name: 'Page Modified Pinger'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.pagemodified.com/'
|
||
producer:
|
||
name: 'Valley Hosting, LLC'
|
||
url: 'https://www.pagemodified.com/'
|
||
|
||
- regex: 'adstxtlab\.com'
|
||
name: 'adstxtlab.com'
|
||
category: 'Crawler'
|
||
url: 'https://adstxtlab.com/validator.php'
|
||
producer:
|
||
name: 'Jaohawi AB'
|
||
url: 'https://adstxtlab.com/'
|
||
|
||
- regex: 'Iframely/[\d.]+'
|
||
name: 'Iframely'
|
||
category: 'Crawler'
|
||
url: 'https://iframely.com/'
|
||
producer:
|
||
name: 'Itteco Software, Corp.'
|
||
url: 'https://iframely.com/'
|
||
|
||
- regex: 'DomainStatsBot/[\d.]+'
|
||
name: 'DomainStatsBot'
|
||
category: 'Crawler'
|
||
url: 'https://domainstats.com/pages/our-bot'
|
||
producer:
|
||
name: 'Domainstats Ltd'
|
||
url: 'https://domainstats.com/'
|
||
|
||
- regex: 'aiHitBot/[\d.]+'
|
||
name: 'aiHitBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.aihitdata.com/about'
|
||
|
||
- regex: 'DomainCrawler/'
|
||
name: 'DomainCrawler'
|
||
category: 'Crawler'
|
||
url: 'https://domaincrawler.com/about-us/'
|
||
|
||
- regex: 'DNSResearchBot'
|
||
name: 'DNSResearchBot'
|
||
category: 'Crawler'
|
||
|
||
- regex: 'GitCrawlerBot'
|
||
name: 'GitCrawlerBot'
|
||
category: 'Crawler'
|
||
|
||
- regex: 'AdAuth/[\d.]+'
|
||
name: 'AdAuth'
|
||
category: 'Crawler'
|
||
url: 'https://www.adauth.com'
|
||
|
||
- regex: 'faveeo\.com'
|
||
name: 'Faveeo'
|
||
category: 'Crawler'
|
||
url: 'http://www.faveeo.com'
|
||
|
||
- regex: 'kozmonavt\.'
|
||
name: 'Kozmonavt'
|
||
category: 'Crawler'
|
||
url: 'https://kozmonavt.ml'
|
||
|
||
- regex: 'CriteoBot/'
|
||
name: 'CriteoBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.criteo.com/criteo-crawler/'
|
||
|
||
- regex: 'PayPal IPN'
|
||
name: 'PayPal IPN'
|
||
category: 'Service Agent'
|
||
url: 'https://developer.paypal.com/api/nvp-soap/ipn/IPNIntro/'
|
||
producer:
|
||
name: 'PayPal, Inc.'
|
||
url: 'https://www.paypal.com/'
|
||
|
||
- regex: 'MaCoCu'
|
||
name: 'MaCoCu'
|
||
category: 'Crawler'
|
||
url: 'https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data/'
|
||
producer:
|
||
name: 'Jožef Stefan Institute'
|
||
url: 'https://www.ijs.si/ijsw/JSI'
|
||
|
||
- regex: 'dnt-policy@eff\.org'
|
||
name: 'EFF Do Not Track Verifier'
|
||
category: 'Crawler'
|
||
url: 'https://www.eff.org/issues/do-not-track'
|
||
producer:
|
||
name: 'Electronic Frontier Foundation'
|
||
url: 'https://www.eff.org/'
|
||
|
||
- regex: 'InfoTigerBot'
|
||
name: 'InfoTigerBot'
|
||
category: 'Crawler'
|
||
url: 'https://infotiger.com/bot'
|
||
producer:
|
||
name: 'Infotiger UG'
|
||
url: 'https://infotiger.com/'
|
||
|
||
- regex: '(?:Birdcrawlerbot|CrawlaDeBot)'
|
||
name: 'Birdcrawlerbot'
|
||
category: 'Crawler'
|
||
url: 'https://crawla.de/de/index.php'
|
||
producer:
|
||
name: 'Swoppen Systems GmbH'
|
||
url: 'https://www.swoppen.com/de'
|
||
|
||
- regex: 'ScamadviserExternalHit/[\d.]+'
|
||
name: 'Scamadviser External Hit'
|
||
category: 'Crawler'
|
||
url: 'https://www.scamadviser.com/'
|
||
producer:
|
||
name: 'Ecommerce Operations B.V.'
|
||
url: 'https://www.scamadviser.com/'
|
||
|
||
- regex: 'ZaldamoSearchBot'
|
||
name: 'Zaldamo'
|
||
category: 'Crawler'
|
||
url: 'https://www.zaldamo.com/search.html'
|
||
producer:
|
||
name: 'Zaldamo, LLC.'
|
||
url: 'https://www.zaldamo.com/'
|
||
|
||
- regex: 'AFB/[\d.]+'
|
||
name: 'Allloadin Favicon Bot'
|
||
category: 'Crawler'
|
||
url: 'https://allloadin.com/'
|
||
|
||
- regex: 'SeolytBot/[\d.]+'
|
||
name: 'Seolyt Bot'
|
||
category: 'Crawler'
|
||
url: 'https://seolyt.com'
|
||
|
||
- regex: 'LinkWalker/[\d.]+'
|
||
name: 'LinkWalker'
|
||
category: 'Crawler'
|
||
url: 'https://www.phishlabs.com/'
|
||
producer:
|
||
name: 'PhishLabs, Inc.'
|
||
url: 'https://www.phishlabs.com/'
|
||
|
||
- regex: 'RenovateBot/[\d.]+'
|
||
name: 'RenovateBot'
|
||
category: 'Security Checker'
|
||
url: 'https://github.com/renovatebot/renovate'
|
||
producer:
|
||
name: 'White Source Ltd.'
|
||
url: 'https://www.mend.io/free-developer-tools/renovate/'
|
||
|
||
- regex: 'INETDEX-BOT/[\d.]+'
|
||
name: 'Inetdex Bot'
|
||
category: 'Crawler'
|
||
url: 'https://www.inetdex.com/'
|
||
|
||
- regex: 'NETZZAPPEN'
|
||
name: 'NETZZAPPEN'
|
||
category: 'Crawler'
|
||
url: 'https://www.netzzappen.com/'
|
||
producer:
|
||
name: 'Marc Huemer'
|
||
url: 'https://www.netzzappen.com/'
|
||
|
||
- regex: 'panscient\.com'
|
||
name: 'Panscient'
|
||
category: 'Crawler'
|
||
url: 'https://www.panscient.com/faq.htm'
|
||
producer:
|
||
name: 'Panscient, Inc.'
|
||
url: 'https://www.panscient.com/'
|
||
|
||
- regex: 'research@pdrlabs\.net'
|
||
name: 'PDR Labs'
|
||
category: 'Security Checker'
|
||
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
|
||
producer:
|
||
name: 'PDR Labs'
|
||
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
|
||
|
||
- regex: 'Nicecrawler/[\d.]+'
|
||
name: 'NiceCrawler'
|
||
category: 'Crawler'
|
||
url: 'https://www.nicecrawler.com/'
|
||
producer:
|
||
name: 'Intelium Corp.'
|
||
url: 'https://www.intelium.com/'
|
||
|
||
- regex: 't3versionsBot/[\d.]+'
|
||
name: 't3versions'
|
||
category: 'Crawler'
|
||
url: 'https://www.t3versions.com/bot'
|
||
producer:
|
||
name: 'Torben Hansen'
|
||
url: 'https://www.t3versions.com/'
|
||
|
||
- regex: 'Crawlson/[\d.]+'
|
||
name: 'Crawlson'
|
||
category: 'Crawler'
|
||
url: 'https://www.crawlson.com/about'
|
||
producer:
|
||
name: 'Crawlson'
|
||
url: 'https://www.crawlson.com/'
|
||
|
||
- regex: 'tchelebi/[\d.]+'
|
||
name: 'tchelebi'
|
||
category: 'Crawler'
|
||
url: 'https://tchelebi.io/'
|
||
producer:
|
||
name: 'NormShield, Inc.'
|
||
url: 'https://blackkite.com/'
|
||
|
||
- regex: 'JobboerseBot'
|
||
name: 'JobboerseBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.xing.com/jobs'
|
||
producer:
|
||
name: 'New Work SE'
|
||
url: 'https://www.xing.com/'
|
||
|
||
- regex: 'RepoLookoutBot/v?[\d.]+'
|
||
name: 'Repo Lookout'
|
||
category: 'Security Checker'
|
||
url: 'https://www.repo-lookout.org/'
|
||
producer:
|
||
name: 'Crissy Field GmbH'
|
||
url: 'https://www.crissyfield.de/'
|
||
|
||
- regex: 'PATHspider'
|
||
name: 'PATHspider'
|
||
category: 'Security Checker'
|
||
url: 'https://pathspider.net/'
|
||
producer:
|
||
name: 'MAMI Project'
|
||
url: 'https://mami-project.eu/'
|
||
|
||
- regex: 'everyfeed-spider/[\d.]+'
|
||
name: 'Everyfeed'
|
||
url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'Exchange check'
|
||
name: 'Exchange check'
|
||
category: 'Security Checker'
|
||
url: 'https://github.com/GossiTheDog/scanning'
|
||
producer:
|
||
name: 'Kevin Beaumont'
|
||
url: 'https://doublepulsar.com/'
|
||
|
||
- regex: 'Sublinq'
|
||
name: 'Sublinq'
|
||
category: 'Crawler'
|
||
url: 'https://web.archive.org/web/20220626191617/https://sublinq.com/'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'Gregarius/[\d.]+'
|
||
name: 'Gregarius'
|
||
category: 'Feed Fetcher'
|
||
url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/'
|
||
producer:
|
||
name: ''
|
||
url: ''
|
||
|
||
- regex: 'COMODO DCV'
|
||
name: 'COMODO DCV'
|
||
category: 'Service Agent'
|
||
url: 'https://www.comodo.com/'
|
||
producer:
|
||
name: 'Comodo Security Solutions, Inc.'
|
||
url: 'https://www.comodo.com/'
|
||
|
||
- regex: 'Sectigo DCV'
|
||
name: 'Sectigo DCV'
|
||
category: 'Service Agent'
|
||
url: 'https://sectigo.com/'
|
||
producer:
|
||
name: 'Sectigo Limited'
|
||
url: 'https://sectigo.com/'
|
||
|
||
- regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/[\d.]+'
|
||
name: 'KlarnaBot'
|
||
category: 'Crawler'
|
||
url: 'https://docs.klarna.com/klarna-bot/'
|
||
producer:
|
||
name: 'Klarna Bank AB'
|
||
url: 'https://www.klarna.com/'
|
||
|
||
- regex: 'Taboolabot/[\d.]+'
|
||
name: 'Taboolabot'
|
||
category: 'Crawler'
|
||
url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler'
|
||
producer:
|
||
name: 'Taboola, Inc.'
|
||
url: 'https://www.taboola.com/'
|
||
|
||
- regex: 'Asana/[\d.]+'
|
||
name: 'Asana'
|
||
category: 'Crawler'
|
||
url: 'https://asana.com/'
|
||
producer:
|
||
name: 'Asana, Inc.'
|
||
url: 'https://asana.com/'
|
||
|
||
- regex: 'Chrome Privacy Preserving Prefetch Proxy'
|
||
name: 'Chrome Privacy Preserving Prefetch Proxy'
|
||
category: 'Service Agent'
|
||
url: 'https://developer.chrome.com/blog/private-prefetch-proxy/'
|
||
producer:
|
||
name: 'Google Inc.'
|
||
url: 'https://www.google.com/'
|
||
|
||
- regex: 'URLinspectorBot/[\d.]+'
|
||
name: 'URLinspector'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.urlinspector.com/bot/'
|
||
producer:
|
||
name: 'LinkResearchTools GmbH'
|
||
url: 'https://www.linkresearchtools.com/'
|
||
|
||
- regex: 'EntferBot/[\d.]+'
|
||
name: 'Entfer'
|
||
category: 'Crawler'
|
||
url: 'https://entfer.com/'
|
||
producer:
|
||
name: 'Entfer Ltd.'
|
||
url: 'https://entfer.com/'
|
||
|
||
- regex: 'TagInspector/[\d.]+'
|
||
name: 'Tag Inspector'
|
||
category: 'Crawler'
|
||
url: 'https://taginspector.com/'
|
||
producer:
|
||
name: 'InfoTrust, LLC'
|
||
url: 'https://infotrust.com/'
|
||
|
||
- regex: 'pageburst'
|
||
name: 'Pageburst'
|
||
category: 'Crawler'
|
||
url: 'https://pageburstls.elsevier.com/'
|
||
producer:
|
||
name: 'Elsevier Ltd'
|
||
url: 'https://www.elsevier.com/'
|
||
|
||
- regex: '.+diffbot'
|
||
name: 'Diffbot'
|
||
category: 'Crawler'
|
||
url: 'https://docs.diffbot.com/docs/getting-started-with-crawl'
|
||
producer:
|
||
name: 'Diffbot Technologies Corp.'
|
||
url: 'https://www.diffbot.com/'
|
||
|
||
- regex: 'DisqusAdstxtCrawler/[\d.]+'
|
||
name: 'Disqus'
|
||
category: 'Crawler'
|
||
url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide'
|
||
producer:
|
||
name: 'Disqus, Inc.'
|
||
url: 'https://disqus.com/'
|
||
|
||
- regex: 'startmebot/[\d.]+'
|
||
name: 'start.me'
|
||
category: 'Crawler'
|
||
url: 'https://about.start.me/'
|
||
producer:
|
||
name: 'start.me BV'
|
||
url: 'https://about.start.me/'
|
||
|
||
- regex: '2ip bot/[\d.]+'
|
||
name: '2ip'
|
||
category: 'Crawler'
|
||
url: 'https://2ip.io/'
|
||
|
||
- regex: 'ReqBin Curl Client/[\d.]+'
|
||
name: 'ReqBin'
|
||
category: 'Crawler'
|
||
url: 'https://reqbin.com/curl'
|
||
|
||
- regex: 'XoviBot/[\d.]+'
|
||
name: 'XoviBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.xovibot.net'
|
||
producer:
|
||
name: 'Xovi GmbH'
|
||
url: 'http://www.xovi.de'
|
||
|
||
- regex: 'Overcast/[\d.]+ Podcast Sync'
|
||
name: 'Overcast Podcast Sync'
|
||
category: 'Service Agent'
|
||
url: 'https://overcast.fm/podcasterinfo'
|
||
|
||
- regex: '^Verity/[\d.]+'
|
||
name: 'GumGum Verity'
|
||
category: 'Service Agent'
|
||
url: 'https://gumgum.com/verity'
|
||
|
||
- regex: 'hackermention'
|
||
name: 'hackermention'
|
||
category: 'Feed Reader'
|
||
url: 'https://github.com/snarfed/hackermention'
|
||
|
||
- regex: 'BitSightBot/[\d.]+'
|
||
name: 'BitSight'
|
||
category: 'Security Checker'
|
||
url: 'https://www.bitsight.com/'
|
||
producer:
|
||
name: 'BitSight Technologies, Inc.'
|
||
url: 'https://www.bitsight.com/'
|
||
|
||
- regex: 'Ezgif/[\d.]+'
|
||
name: 'Ezgif'
|
||
category: 'Service Agent'
|
||
url: 'https://ezgif.com/about'
|
||
|
||
- regex: 'intelx\.io_bot'
|
||
name: 'Intelligence X'
|
||
category: 'Crawler'
|
||
url: 'https://intelx.io/'
|
||
producer:
|
||
name: 'Kleissner Investments s.r.o.'
|
||
url: 'https://intelx.io/'
|
||
|
||
- regex: 'FemtosearchBot/[\d.]+'
|
||
name: 'Femtosearch'
|
||
category: 'Crawler'
|
||
url: 'http://femtosearch.com/'
|
||
producer:
|
||
name: 'Grier Forensics, LLC'
|
||
url: 'https://www.grierforensics.com/'
|
||
|
||
- regex: 'AdsTxtCrawler/[\d.]+'
|
||
name: 'AdsTxtCrawler'
|
||
category: 'Crawler'
|
||
url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler'
|
||
producer:
|
||
name: 'IAB Technology Laboratory, Inc.'
|
||
url: 'https://iabtechlab.com/'
|
||
|
||
- regex: 'Morningscore'
|
||
name: 'Morningscore Bot'
|
||
category: 'Crawler'
|
||
url: 'https://morningscore.io/'
|
||
producer:
|
||
name: 'Morningscore'
|
||
url: 'https://morningscore.io/'
|
||
|
||
- regex: 'Uptime-Kuma/[\d.]+'
|
||
name: 'Uptime-Kuma'
|
||
category: 'Site Monitor'
|
||
url: 'https://github.com/louislam/uptime-kuma'
|
||
|
||
- regex: 'ChatGPT-User'
|
||
name: 'ChatGPT'
|
||
category: 'Crawler'
|
||
url: 'https://platform.openai.com/docs/plugins/bot'
|
||
producer:
|
||
name: 'OpenAI OpCo, LLC'
|
||
url: 'https://openai.com/'
|
||
|
||
- regex: 'BrightEdge Crawler/[\d.]+'
|
||
name: 'BrightEdge'
|
||
category: 'Crawler'
|
||
url: 'https://www.brightedge.com/'
|
||
producer:
|
||
name: 'BrightEdge Technologies, Inc'
|
||
url: 'https://www.brightedge.com/'
|
||
|
||
- regex: 'sfFeedReader/[\d.]+'
|
||
name: 'sfFeedReader'
|
||
url: 'https://github.com/diem-project/sfFeed2Plugin'
|
||
category: 'Feed Fetcher'
|
||
|
||
- regex: 'cyberscan\.io'
|
||
name: 'Cyberscan'
|
||
category: 'Security Checker'
|
||
url: 'https://www.cyberscan.io/'
|
||
producer:
|
||
name: 'DGC Verwaltungs GmbH'
|
||
url: 'https://dgc.org/'
|
||
|
||
- regex: 'deepcrawl\.com'
|
||
name: 'Lumar'
|
||
category: 'Crawler'
|
||
url: 'https://deepcrawl.com/bot'
|
||
producer:
|
||
name: 'Lumar'
|
||
url: 'https://www.lumar.io/'
|
||
|
||
- regex: 'researchscan\.comsys\.rwth-aachen\.de'
|
||
name: 'Research Scan'
|
||
category: 'Crawler'
|
||
url: 'http://researchscan.comsys.rwth-aachen.de/'
|
||
producer:
|
||
name: 'RWTH Aachen University'
|
||
url: 'https://www.comsys.rwth-aachen.de/'
|
||
|
||
- regex: 'newspaper/[\d.]+'
|
||
name: 'Scraping Robot'
|
||
category: 'Crawler'
|
||
url: 'https://scrapingrobot.com/'
|
||
producer:
|
||
name: 'Sprious LLC'
|
||
url: 'https://sprious.com/'
|
||
|
||
- regex: 'GPTBot/[\d.]+'
|
||
name: 'GPTBot'
|
||
category: 'Crawler'
|
||
url: 'https://platform.openai.com/docs/gptbot'
|
||
producer:
|
||
name: 'OpenAI OpCo, LLC'
|
||
url: 'https://openai.com/'
|
||
|
||
- regex: 'Ant(?:\.com beta|Bot)(?:/([\d+.]+))?'
|
||
name: 'Ant'
|
||
category: 'Crawler'
|
||
url: 'https://www.ant.com/'
|
||
producer:
|
||
name: 'Ant.com Ltd.'
|
||
url: 'https://www.ant.com/'
|
||
|
||
- regex: 'WebwikiBot/[\d.]+'
|
||
name: 'Webwiki'
|
||
category: 'Crawler'
|
||
url: 'https://www.webwiki.com/'
|
||
producer:
|
||
name: 'webwiki GmbH'
|
||
url: 'https://www.webwiki.com/'
|
||
|
||
- regex: 'phpMyAdmin'
|
||
name: 'phpMyAdmin'
|
||
category: 'Service Agent'
|
||
url: 'https://www.phpmyadmin.net/'
|
||
|
||
- regex: 'Matomo/[\d.]+'
|
||
name: 'Matomo'
|
||
category: 'Service Agent'
|
||
url: 'https://github.com/matomo-org/matomo'
|
||
producer:
|
||
name: 'InnoCraft Ltd'
|
||
url: 'https://matomo.org/'
|
||
|
||
- regex: 'Prometheus/[\d.]+'
|
||
name: 'Prometheus'
|
||
category: 'Service Agent'
|
||
url: 'https://github.com/prometheus/prometheus'
|
||
producer:
|
||
name: 'The Linux Foundation'
|
||
url: 'https://www.cncf.io/'
|
||
|
||
- regex: 'ArchiveTeam ArchiveBot'
|
||
name: 'ArchiveBot'
|
||
category: 'Crawler'
|
||
url: 'https://wiki.archiveteam.org/index.php?title=ArchiveBot'
|
||
producer:
|
||
name: 'ArchiveTeam'
|
||
url: 'https://wiki.archiveteam.org/'
|
||
|
||
- regex: 'MADBbot/[\d.]+'
|
||
name: 'MADBbot'
|
||
category: 'Crawler'
|
||
url: 'https://madb.zapto.org/bot.html'
|
||
|
||
- regex: 'MeltwaterNews'
|
||
name: 'MeltwaterNews'
|
||
category: 'Crawler'
|
||
producer:
|
||
name: 'Meltwater Deutschland GmbH'
|
||
url: 'https://www.meltwater.com/'
|
||
|
||
- regex: '(?:Owler@ows\.eu|OWLer)/[\d.]+'
|
||
name: 'OWLer'
|
||
category: 'Crawler'
|
||
url: 'https://openwebsearch.eu/owler/'
|
||
producer:
|
||
name: 'Open Search Foundation e.V.'
|
||
url: 'https://openwebsearch.eu/'
|
||
|
||
- regex: 'bbc\.co\.uk/display/men/Page\+Monitor'
|
||
name: 'BBC Page Monitor'
|
||
category: 'Site Monitor'
|
||
url: 'https://confluence.dev.bbc.co.uk/display/men/Page+Monitor'
|
||
producer:
|
||
name: 'BBC'
|
||
url: 'https://www.bbc.com/'
|
||
|
||
- regex: 'BBC-Forge-URL-Monitor-Twisted'
|
||
name: 'BBC Forge URL Monitor'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.bbc.com/'
|
||
producer:
|
||
name: 'BBC'
|
||
url: 'https://www.bbc.com/'
|
||
|
||
- regex: 'ClaudeBot'
|
||
name: 'ClaudeBot'
|
||
category: 'Crawler'
|
||
url: 'https://github.com/ClaudeBot/ClaudeBot'
|
||
|
||
- regex: 'Imagesift'
|
||
name: 'ImageSift'
|
||
category: 'Crawler'
|
||
url: 'https://imagesift.com/'
|
||
producer:
|
||
name: 'Castle Global, Inc.'
|
||
url: 'https://thehive.ai/'
|
||
|
||
- regex: 'TactiScout'
|
||
name: 'TactiScout'
|
||
category: 'Crawler'
|
||
url: 'https://find-it.world/TempCrawl/Crawltheque.php'
|
||
producer:
|
||
name: 'Tactikast'
|
||
|
||
- regex: 'Brightbot ([\d+.]+)'
|
||
name: 'BrightBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.brightbot.app/'
|
||
producer:
|
||
name: 'Bright Interactive Ltd'
|
||
url: 'https://www.builtbybright.com/'
|
||
|
||
- regex: 'DaspeedBot/([\d+.]+)'
|
||
name: 'DaspeedBot'
|
||
category: 'Crawler'
|
||
url: 'https://daspeed.io/'
|
||
producer:
|
||
name: 'DAWAP SARL'
|
||
url: 'https://dawap.fr/'
|
||
|
||
- regex: 'StractBot(?:/([\d+.]+))?'
|
||
name: 'Stract'
|
||
category: 'Crawler'
|
||
url: 'https://stract.com/webmasters'
|
||
producer:
|
||
name: 'Stract'
|
||
url: 'https://github.com/StractOrg/stract/'
|
||
|
||
- regex: 'GeedoBot(?:/([\d+.]+))?'
|
||
name: 'GeedoBot'
|
||
category: 'Crawler'
|
||
url: 'https://geedo.com/bot/'
|
||
|
||
- regex: 'GeedoProductSearch'
|
||
name: 'GeedoProductSearch'
|
||
category: 'Crawler'
|
||
url: 'https://geedo.com/product-search/'
|
||
|
||
- regex: 'BackupLand(?:/([\d+.]+))?'
|
||
name: 'BackupLand'
|
||
category: 'Crawler'
|
||
url: 'https://go.backupland.com/'
|
||
producer:
|
||
name: 'ООО «КВАРТА»'
|
||
url: 'https://go.backupland.com/'
|
||
|
||
- regex: 'Konturbot(?:/([\d+.]+))?'
|
||
name: 'Konturbot'
|
||
category: 'Crawler'
|
||
url: 'https://kontur.ru/'
|
||
producer:
|
||
name: 'АО «ПФ «СКБ Контур»'
|
||
url: 'https://kontur.ru/'
|
||
|
||
- regex: 'keys-so-bot'
|
||
name: 'Keys.so'
|
||
category: 'Crawler'
|
||
url: 'https://www.keys.so/'
|
||
producer:
|
||
name: 'ООО «МОДЕСКО»'
|
||
url: 'https://www.modesco.ru/'
|
||
|
||
- regex: 'LetsearchBot(?:/([\d+.]+))?'
|
||
name: 'LetSearch'
|
||
category: 'Crawler'
|
||
url: 'https://letsearch.ru/bots'
|
||
|
||
- regex: 'Example3(?:/([\d+.]+))?'
|
||
name: 'Example3'
|
||
category: 'Crawler'
|
||
url: 'https://www.example3.com/'
|
||
|
||
- regex: 'StatOnlineRuBot(?:/([\d+.]+))?'
|
||
name: 'StatOnline.ru'
|
||
category: 'Crawler'
|
||
url: 'https://statonline.ru/'
|
||
producer:
|
||
name: 'ООО «Регистратор доменных имен РЕГ.РУ»'
|
||
url: 'https://statonline.ru/'
|
||
|
||
- regex: 'Spawning-AI'
|
||
name: 'Spawning AI'
|
||
category: 'Crawler'
|
||
url: 'https://spawning.ai/'
|
||
producer:
|
||
name: 'Spawning, Inc'
|
||
url: 'https://spawning.ai/'
|
||
|
||
- regex: 'domain research project'
|
||
name: 'Domain Research Project'
|
||
category: 'Crawler'
|
||
url: 'https://trentwil.es/domains.html'
|
||
producer:
|
||
name: 'Trent Wiles'
|
||
url: 'https://trentwil.es/'
|
||
|
||
- regex: 'getodin\.com'
|
||
name: 'Odin'
|
||
category: 'Security Checker'
|
||
url: 'https://docs.getodin.com/'
|
||
producer:
|
||
name: 'Cyble Inc.'
|
||
url: 'https://cyble.com/'
|
||
|
||
- regex: 'YouBot'
|
||
name: 'YouBot'
|
||
category: 'Crawler'
|
||
url: 'https://about.you.com/youbot/'
|
||
producer:
|
||
name: 'SuSea, Inc.'
|
||
url: 'https://you.com/'
|
||
|
||
- regex: 'SiteScoreBot'
|
||
name: 'SiteScore'
|
||
category: 'Crawler'
|
||
url: 'https://sitescore.ai/'
|
||
|
||
- regex: 'MBCrawler'
|
||
name: 'Monitor Backlinks'
|
||
category: 'Crawler'
|
||
url: 'https://www.seoptimer.com/monitor-backlinks/'
|
||
producer:
|
||
name: 'SEOptimer'
|
||
url: 'https://www.seoptimer.com/'
|
||
|
||
- regex: 'mariadb-mysql-kbs-bot'
|
||
name: 'MariaDB/MySQL Knowledge Base'
|
||
category: 'Crawler'
|
||
url: 'https://github.com/williamdes/mariadb-mysql-kbs'
|
||
producer:
|
||
name: 'WDES SAS'
|
||
url: 'https://wdes.fr/en/'
|
||
|
||
- regex: 'GitHubCopilotChat'
|
||
name: 'GitHubCopilotChat'
|
||
category: 'Crawler'
|
||
url: 'https://github.com/aaamoon/copilot-gpt4-service'
|
||
|
||
- regex: '^pdrl\.fm'
|
||
name: 'Podroll Analyzer'
|
||
category: 'Crawler'
|
||
url: 'https://podroll.fm'
|
||
|
||
- regex: 'PodUptime/'
|
||
name: 'PodUptime'
|
||
category: 'Site Monitor'
|
||
url: 'https://poduptime.com'
|
||
|
||
- regex: 'anthropic-ai'
|
||
name: 'Anthropic AI'
|
||
category: 'Crawler'
|
||
url: 'https://www.anthropic.com/'
|
||
producer:
|
||
name: 'Anthropic, PBC'
|
||
url: 'https://www.anthropic.com/'
|
||
|
||
- regex: 'NetpeakCheckerBot/[\d.]+'
|
||
name: 'Netpeak Checker'
|
||
category: 'Crawler'
|
||
url: 'https://netpeaksoftware.com/checker'
|
||
producer:
|
||
name: 'Netpeak LTD'
|
||
url: 'https://netpeaksoftware.com/'
|
||
|
||
- regex: 'SandobaCrawler/[\d.]+'
|
||
name: 'Sandoba//Crawler'
|
||
category: 'Crawler'
|
||
url: 'https://www.sandoba.com/en/crawler/'
|
||
producer:
|
||
name: 'SANDOBA//EBUSINESS SOLUTIONS'
|
||
url: 'https://www.sandoba.com/'
|
||
|
||
- regex: 'SirdataBot'
|
||
name: 'Sirdata'
|
||
category: 'Crawler'
|
||
url: 'https://semantic-api.docs.sirdata.net/contextual-api/contextual-api/introduction'
|
||
producer:
|
||
name: 'Sirdata SAS'
|
||
url: 'https://www.sirdata.com/'
|
||
|
||
- regex: 'CheckMarkNetwork/[\d.]+'
|
||
name: 'CheckMark Network'
|
||
category: 'Crawler'
|
||
url: 'https://www.checkmarknetwork.com/spider.html/'
|
||
producer:
|
||
name: 'Exipert, Inc.'
|
||
url: 'https://www.checkmarknetwork.com/'
|
||
|
||
- regex: 'cohere-ai'
|
||
name: 'Cohere AI'
|
||
category: 'Crawler'
|
||
url: 'https://cohere.com/'
|
||
producer:
|
||
name: 'Cohere, Inc.'
|
||
url: 'https://cohere.com/'
|
||
|
||
- regex: 'PerplexityBot/[\d.]+'
|
||
name: 'PerplexityBot'
|
||
category: 'Crawler'
|
||
url: 'https://docs.perplexity.ai/docs/perplexitybot'
|
||
producer:
|
||
name: 'Perplexity AI, Inc.'
|
||
url: 'https://www.perplexity.ai/'
|
||
|
||
- regex: 'TTD-Content'
|
||
name: 'The Trade Desk Content'
|
||
category: 'Crawler'
|
||
url: 'https://www.thetradedesk.com/us/ttd-content'
|
||
producer:
|
||
name: 'The Trade Desk, Inc.'
|
||
url: 'https://www.thetradedesk.com/'
|
||
|
||
- regex: 'montastic-monitor'
|
||
name: 'Montastic Monitor'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.montastic.com/'
|
||
producer:
|
||
name: 'Metadot, Corp.'
|
||
url: 'https://www.metadot.com/'
|
||
|
||
- regex: 'Ruby, Twurly v[\d.]+'
|
||
name: 'Twurly'
|
||
category: 'Crawler'
|
||
url: 'https://twurly.org/'
|
||
|
||
- regex: 'Mixnode(?:(?:Cache)?/[\d.]+)?'
|
||
name: 'Mixnode'
|
||
category: 'Crawler'
|
||
url: 'https://www.mixnode.com/'
|
||
producer:
|
||
name: 'Mixnode Technologies, Inc.'
|
||
url: 'https://www.mixnode.com/'
|
||
|
||
- regex: 'CSSCheck/[\d.]+'
|
||
name: 'CSSCheck'
|
||
category: 'Validator'
|
||
|
||
- regex: 'MicrosoftPreview/[\d.]+'
|
||
name: 'Microsoft Preview'
|
||
category: 'Service Agent'
|
||
url: 'https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0'
|
||
producer:
|
||
name: 'Microsoft Corporation'
|
||
url: 'https://www.microsoft.com/'
|
||
|
||
- regex: 's~virustotalcloud'
|
||
name: 'VirusTotal Cloud'
|
||
category: 'Crawler'
|
||
url: 'https://www.virustotal.com/'
|
||
producer:
|
||
name: 'Chronicle Security Ireland Limited'
|
||
url: 'https://chronicle.security/'
|
||
|
||
- regex: 'TinEye/[\d.]+'
|
||
name: 'TinEye'
|
||
category: 'Crawler'
|
||
url: 'https://tineye.com/'
|
||
producer:
|
||
name: 'Idée, Inc.'
|
||
url: 'https://tineye.com/'
|
||
|
||
- regex: 'e~arsnova-filter-system'
|
||
name: 'ARSNova Filter System'
|
||
category: 'Crawler'
|
||
url: 'https://particify.de/en/'
|
||
producer:
|
||
name: 'Particify Gerhardt & Weingarten OHG'
|
||
url: 'https://particify.de/en/'
|
||
|
||
- regex: 'botify'
|
||
name: 'Botify'
|
||
category: 'Crawler'
|
||
url: 'https://www.botify.com/'
|
||
producer:
|
||
name: 'BOTIFY SAS'
|
||
url: 'https://www.botify.com/'
|
||
|
||
- regex: 'adscanner'
|
||
name: 'Adscanner'
|
||
category: 'Crawler'
|
||
url: 'https://www.alleyesonscreens.com/'
|
||
producer:
|
||
name: 'AdScanner d.o.o'
|
||
url: 'https://www.alleyesonscreens.com/'
|
||
|
||
- regex: 'online-webceo-bot/[\d.]+'
|
||
name: 'WebCEO'
|
||
category: 'Crawler'
|
||
url: 'https://www.webceo.com/'
|
||
producer:
|
||
name: 'WebCEO, LLC'
|
||
url: 'https://www.webceo.com/'
|
||
|
||
- regex: 'NetTrack'
|
||
name: 'NetTrack'
|
||
category: 'Crawler'
|
||
url: 'https://web.archive.org/web/20160607151934/https://nettrack.info/'
|
||
|
||
- regex: 'htmlyse'
|
||
name: 'htmlyse'
|
||
category: 'Crawler'
|
||
url: 'https://www.htmlyse.com/'
|
||
producer:
|
||
name: 'Vistex LTD'
|
||
url: 'https://www.htmlyse.com/'
|
||
|
||
- regex: 'TrendsmapResolver/[\d.]+'
|
||
name: 'Trendsmap'
|
||
category: 'Crawler'
|
||
url: 'https://www.trendsmap.com/'
|
||
producer:
|
||
name: 'Trendsmap Pty Ltd'
|
||
url: 'https://www.trendsmap.com/'
|
||
|
||
- regex: 'Shareaholic(?:bot)?/[\d.]+'
|
||
name: 'Steve Bot'
|
||
category: 'Crawler'
|
||
url: 'https://www.shareaholic.com/steve'
|
||
producer:
|
||
name: 'Shareaholic, Inc.'
|
||
url: 'https://www.shareaholic.com/'
|
||
|
||
- regex: 'keycdn-tools:'
|
||
name: 'KeyCDN Tools'
|
||
category: 'Service Agent'
|
||
url: 'https://tools.keycdn.com/geo'
|
||
|
||
- regex: 'keycdn-tools/'
|
||
name: 'KeyCDN Tools'
|
||
category: 'Service Agent'
|
||
url: 'https://tools.keycdn.com/'
|
||
producer:
|
||
name: 'proinity LLC'
|
||
url: 'https://www.keycdn.com/'
|
||
|
||
- regex: 'Arquivo-web-crawler'
|
||
name: 'Arquivo.pt'
|
||
category: 'Crawler'
|
||
url: 'https://sobre.arquivo.pt/en/help/crawling-and-archiving-web-content/'
|
||
producer:
|
||
name: 'FCT|FCCN'
|
||
url: 'https://www.fct.pt/'
|
||
|
||
- regex: 'WhatsMyIP\.org'
|
||
name: 'WhatsMyIP.org'
|
||
category: 'Service Agent'
|
||
url: 'https://www.whatsmyip.org/ua/'
|
||
|
||
- regex: 'SenutoBot/[\d.]+'
|
||
name: 'Senuto'
|
||
category: 'Crawler'
|
||
url: 'https://www.senuto.com/'
|
||
producer:
|
||
name: 'Senuto Sp. z o.o.'
|
||
url: 'https://www.senuto.com/'
|
||
|
||
- regex: 'spaziodati'
|
||
name: 'SpazioDati'
|
||
category: 'Crawler'
|
||
url: 'https://www.spaziodati.eu/'
|
||
producer:
|
||
name: 'SpazioDati s.r.l.'
|
||
url: 'https://www.spaziodati.eu/'
|
||
|
||
- regex: 'GozleBot'
|
||
name: 'Gozle'
|
||
category: 'Crawler'
|
||
url: 'https://gozle.com.tm/en/blog/post/1'
|
||
producer:
|
||
name: 'Doly Horjun HJ'
|
||
url: 'https://gozle.com.tm/'
|
||
|
||
- regex: 'Quantcastbot/[\d.]+'
|
||
name: 'Quantcast'
|
||
category: 'Crawler'
|
||
url: 'https://www.quantcast.com/bot/'
|
||
producer:
|
||
name: 'Quantcast Corp.'
|
||
url: 'https://www.quantcast.com/'
|
||
|
||
- regex: 'FontRadar'
|
||
name: 'FontRadar'
|
||
category: 'Crawler'
|
||
url: 'https://www.fontradar.com/'
|
||
producer:
|
||
name: 'EMDASH SAS'
|
||
url: 'https://www.fontradar.com/'
|
||
|
||
- regex: 'ViberUrlDownloader'
|
||
name: 'Viber Url Downloader'
|
||
category: 'Service Agent'
|
||
url: 'https://www.viber.com/'
|
||
producer:
|
||
name: 'Viber Media S.à r.l.'
|
||
url: 'https://www.viber.com/'
|
||
|
||
- regex: '^Zeno$'
|
||
name: 'Zeno'
|
||
category: 'Crawler'
|
||
url: 'https://github.com/internetarchive/Zeno'
|
||
producer:
|
||
name: 'The Internet Archive'
|
||
url: 'https://archive.org/'
|
||
|
||
- regex: 'Barracuda Sentinel'
|
||
name: 'Barracuda Sentinel'
|
||
category: 'Service Agent'
|
||
url: 'https://sentinel.barracudanetworks.com/'
|
||
producer:
|
||
name: 'Barracuda Networks, Inc.'
|
||
url: 'https://www.barracudanetworks.com/'
|
||
|
||
- regex: 'RuxitSynthetic/[\d.]+'
|
||
name: 'RuxitSynthetic'
|
||
category: 'Site Monitor'
|
||
url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
|
||
producer:
|
||
name: 'Dynatrace LLC'
|
||
url: 'https://www.dynatrace.com/'
|
||
|
||
- regex: 'DynatraceSynthetic/[\d.]+'
|
||
name: 'DynatraceSynthetic'
|
||
category: 'Site Monitor'
|
||
url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
|
||
producer:
|
||
name: 'Dynatrace LLC'
|
||
url: 'https://www.dynatrace.com/'
|
||
|
||
- regex: 'sitebulb'
|
||
name: 'Sitebulb'
|
||
category: 'Crawler'
|
||
url: 'https://sitebulb.com/'
|
||
producer:
|
||
name: 'Sitebulb Limited'
|
||
url: 'https://sitebulb.com/'
|
||
|
||
- regex: 'Monsidobot/[\d.]+'
|
||
name: 'Monsidobot'
|
||
category: 'Crawler'
|
||
url: 'https://monsido.com/bot-html'
|
||
producer:
|
||
name: 'Monsido LLC'
|
||
url: 'https://monsido.com/'
|
||
|
||
- regex: 'AccompanyBot'
|
||
name: 'AccompanyBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.accompany.com/'
|
||
producer:
|
||
name: 'Accompani, Inc'
|
||
url: 'https://www.accompany.com/'
|
||
|
||
- regex: 'Ghost Inspector'
|
||
name: 'Ghost Inspector'
|
||
category: 'Site Monitor'
|
||
url: 'https://docs.ghostinspector.com/faq/#how-do-i-detect-ghost-inspector-test-runner-traffic-on-my-site'
|
||
producer:
|
||
name: 'Ghost Inspector, Inc.'
|
||
url: 'https://www.ghostinspector.com/'
|
||
|
||
- regex: 'Cypress/[\d.]+'
|
||
name: 'Cypress'
|
||
category: 'Site Monitor'
|
||
url: 'https://github.com/cypress-io/cypress'
|
||
producer:
|
||
name: 'Cypress.io, Inc.'
|
||
url: 'https://www.cypress.io/'
|
||
|
||
- regex: 'Google-Apps-Script'
|
||
name: 'Google Apps Script'
|
||
category: 'Service Agent'
|
||
url: 'https://www.google.com/script/start/'
|
||
|
||
- regex: 'SiteOne-Crawler/[\d.]+'
|
||
name: 'SiteOne Crawler'
|
||
category: 'Crawler'
|
||
url: 'https://crawler.siteone.io/bot/'
|
||
producer:
|
||
name: 'SiteOne s.r.o.'
|
||
url: 'https://www.siteone.io/'
|
||
|
||
- regex: 'Detectify'
|
||
name: 'Detectify'
|
||
category: 'Security Checker'
|
||
url: 'https://support.detectify.com/support/solutions/articles/48001049001-how-to-allow-detectify-to-access-your-site'
|
||
producer:
|
||
name: 'Detectify AB'
|
||
url: 'https://detectify.com/'
|
||
|
||
- regex: 'DomCopBot'
|
||
name: 'DomCop Bot'
|
||
category: 'Crawler'
|
||
url: 'https://www.domcop.com/bot'
|
||
producer:
|
||
name: 'Axeman Technology Solutions LLP'
|
||
url: 'https://axemantech.com/'
|
||
|
||
- regex: 'Paqlebot/[\d.]+'
|
||
name: 'Paqlebot'
|
||
category: 'Crawler'
|
||
url: 'https://www.paqle.dk/about/paqlebot'
|
||
producer:
|
||
name: 'Paqle A/S'
|
||
url: 'https://www.paqle.dk/'
|
||
|
||
- regex: 'Wibybot'
|
||
name: 'Wibybot'
|
||
category: 'Crawler'
|
||
url: 'https://www.wiby.me/'
|
||
|
||
- regex: 'Synapse'
|
||
name: 'Synapse'
|
||
category: 'Crawler'
|
||
url: 'https://github.com/matrix-org/synapse'
|
||
|
||
- regex: 'OSZKbot/[\d.]+'
|
||
name: 'OSZKbot'
|
||
category: 'Crawler'
|
||
url: 'http://mekosztaly.oszk.hu/mia/'
|
||
producer:
|
||
name: 'National Szechenyi Library'
|
||
url: 'https://webarchivum.oszk.hu/'
|
||
|
||
- regex: 'ZoomBot'
|
||
name: 'ZoomBot'
|
||
category: 'Crawler'
|
||
url: 'https://suite.seozoom.it/bot.html'
|
||
producer:
|
||
name: 'SEO Cube S.r.l.'
|
||
url: 'https://www.seocube.it/'
|
||
|
||
- regex: 'RavenCrawler/[\d.]+'
|
||
name: 'RavenCrawler'
|
||
category: 'Crawler'
|
||
url: 'https://raventools.com/site-auditor/'
|
||
producer:
|
||
name: 'TapClicks, Inc.'
|
||
url: 'https://www.tapclicks.com/'
|
||
|
||
- regex: 'KadoBot'
|
||
name: 'KadoBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.kadolijst.nl/bot'
|
||
producer:
|
||
name: 'Kadolijst'
|
||
url: 'https://www.kadolijst.nl/'
|
||
|
||
- regex: 'Dubbotbot/[\d.]+'
|
||
name: 'Dubbotbot'
|
||
category: 'Crawler'
|
||
url: 'https://help.dubbot.com/en/articles/6746594-example-custom-user-agent'
|
||
producer:
|
||
name: 'DubBot'
|
||
url: 'https://dubbot.com/'
|
||
|
||
- regex: 'Swiftbot/[\d.]+'
|
||
name: 'Swiftbot'
|
||
category: 'Crawler'
|
||
url: 'https://swiftype.com/swiftbot'
|
||
producer:
|
||
name: 'Elasticsearch, B.V.'
|
||
url: 'https://www.elastic.co/'
|
||
|
||
- regex: 'EyeMonIT'
|
||
name: 'EyeMonit'
|
||
category: 'Site Monitor'
|
||
url: 'https://eyemonit.com/'
|
||
producer:
|
||
name: 'EyeMonit'
|
||
url: 'https://eyemonit.com/'
|
||
|
||
- regex: 'ThousandEyes'
|
||
name: 'ThousandEyes'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.thousandeyes.com/'
|
||
producer:
|
||
name: 'Cisco Systems, Inc.'
|
||
url: 'https://www.cisco.com/'
|
||
|
||
- regex: 'OmtrBot/[\d.]+'
|
||
name: 'OmtrBot'
|
||
category: 'Site Monitor'
|
||
|
||
- regex: 'WebMon/[\d.]+'
|
||
name: 'WebMon'
|
||
category: 'Site Monitor'
|
||
|
||
- regex: 'AdsTxtCrawlerTP/[\d.]+'
|
||
name: 'AdsTxtCrawlerTP'
|
||
category: 'Crawler'
|
||
|
||
- regex: 'fragFINN'
|
||
name: 'fragFINN'
|
||
category: 'Crawler'
|
||
url: 'https://www.fragfinn.de/'
|
||
producer:
|
||
name: 'fragFINN e.V.'
|
||
url: 'https://www.fragfinn.de/'
|
||
|
||
- regex: 'Clickagy'
|
||
name: 'Clickagy'
|
||
category: 'Crawler'
|
||
url: 'https://www.clickagy.com/'
|
||
producer:
|
||
name: 'Clickagy, LLC'
|
||
url: 'https://www.clickagy.com/'
|
||
|
||
- regex: 'kiwitcms-gitops/[\d.]+'
|
||
name: 'Kiwi TCMS GitOps'
|
||
category: 'Service Agent'
|
||
url: 'https://kiwitcms.org'
|
||
producer:
|
||
name: 'Open Technologies Bulgaria, Ltd.'
|
||
url: 'https://kiwitcms.org'
|
||
|
||
- regex: 'webtru_crawler'
|
||
name: 'webtru'
|
||
category: 'Crawler'
|
||
url: 'https://webtru.io/'
|
||
producer:
|
||
name: 'DataSign Inc.'
|
||
url: 'https://datasign.jp/'
|
||
|
||
- regex: 'URLSuMaBot'
|
||
name: 'URLSuMaBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.urlsuma.de/'
|
||
|
||
- regex: '360JK yunjiankong'
|
||
name: '360JK'
|
||
category: 'Site Monitor'
|
||
url: 'http://jk.cloud.360.cn/'
|
||
producer:
|
||
name: '360 Security Technology Inc.'
|
||
url: 'https://www.360.cn/'
|
||
|
||
- regex: 'UCSBNetworkMeasurement'
|
||
name: 'UCSB Network Measurement'
|
||
category: 'Crawler'
|
||
url: 'https://www.it.ucsb.edu/'
|
||
producer:
|
||
name: 'University of California, Santa Barbara'
|
||
url: 'https://www.it.ucsb.edu/'
|
||
|
||
- regex: 'Plesk screenshot bot'
|
||
name: 'Plesk Screenshot Service'
|
||
category: 'Service Agent'
|
||
url: 'https://support.plesk.com/hc/en-us/articles/13302778306199-What-is-Plesk-Screenshot-Service'
|
||
producer:
|
||
name: 'Plesk International GmbH'
|
||
url: 'https://www.plesk.com/'
|
||
|
||
- regex: 'Who\.is'
|
||
name: 'Who.is Bot'
|
||
category: 'Crawler'
|
||
url: 'https://who.is/'
|
||
|
||
- regex: 'Probely'
|
||
name: 'Probely'
|
||
category: 'Security Checker'
|
||
url: 'https://probely.com/sos/'
|
||
producer:
|
||
name: 'Probely - Soluções de Cibersegurança, S.A.'
|
||
url: 'https://probely.com/'
|
||
|
||
- regex: 'Uptimia(?:/[\d.]+)?'
|
||
name: 'Uptimia'
|
||
category: 'Site Monitor'
|
||
url: 'https://www.uptimia.com/'
|
||
producer:
|
||
name: 'JJ Online GmbH'
|
||
url: 'https://www.uptimia.com/'
|
||
|
||
- regex: '2GDPR/[\d.]+'
|
||
name: '2GDPR'
|
||
category: 'Service Agent'
|
||
url: 'https://2gdpr.com/tos'
|
||
producer:
|
||
name: '2GDPR'
|
||
url: 'https://2gdpr.com/'
|
||
|
||
- regex: 'abuse\.xmco\.fr'
|
||
name: 'Serenety'
|
||
category: 'Security Checker'
|
||
url: 'https://abuse.xmco.fr/'
|
||
producer:
|
||
name: 'XMCO, SASU'
|
||
url: 'https://www.xmco.fr/'
|
||
|
||
- regex: 'CheckHost'
|
||
name: 'CheckHost'
|
||
category: 'Site Monitor'
|
||
url: 'https://check-host.net/'
|
||
producer:
|
||
name: 'CheckHost'
|
||
url: 'https://check-host.net/'
|
||
|
||
- regex: 'LAC_IAHarvester/[\d.]+'
|
||
name: 'LAC IA Harvester'
|
||
category: 'Crawler'
|
||
url: 'https://library-archives.canada.ca/eng/services/government-canada/web-social-media-preservation-program/Pages/web-archive.aspx'
|
||
producer:
|
||
name: 'Library and Archives Canada'
|
||
url: 'https://library-archives.canada.ca/'
|
||
|
||
- regex: 'InsytfulBot/[\d.]+'
|
||
name: 'InsytfulBot'
|
||
category: 'Crawler'
|
||
url: 'https://www.insytful.com/'
|
||
producer:
|
||
name: 'Zengenti Limited'
|
||
url: 'https://www.zengenti.com/'
|
||
|
||
- regex: 'statista\.com'
|
||
name: 'Statista'
|
||
category: 'Crawler'
|
||
url: 'https://www.statista.com/'
|
||
producer:
|
||
name: 'Statista, Inc.'
|
||
url: 'https://www.statista.com/'
|
||
|
||
- regex: 'SubstackContentFetch/[\d.]+'
|
||
name: 'Substack Content Fetch'
|
||
category: 'Crawler'
|
||
url: 'https://substack.com/'
|
||
producer:
|
||
name: 'Substack, Inc.'
|
||
url: 'https://substack.com/'
|
||
|
||
- regex: '^ds9'
|
||
name: 'Deep SEARCH 9'
|
||
category: 'Crawler'
|
||
url: 'https://www.copyright.com/blog/ccc-expands-corporate-solutions-offering-with-new-technology/'
|
||
producer:
|
||
name: 'Copyright Clearance Center, Inc.'
|
||
url: 'https://www.copyright.com/'
|
||
|
||
- regex: 'LiveJournal\.com'
|
||
name: 'LiveJournal'
|
||
url: 'https://www.livejournal.com/'
|
||
category: 'Feed Fetcher'
|
||
producer:
|
||
name: 'ООО "СИМ"'
|
||
url: 'https://www.livejournal.com/'
|
||
|
||
- regex: 'bitdiscovery'
|
||
name: 'Tenable.asm'
|
||
category: 'Security Checker'
|
||
url: 'https://bitdiscovery.com/'
|
||
producer:
|
||
name: 'Tenable, Inc.'
|
||
url: 'https://www.tenable.com/'
|
||
|
||
- regex: 'Castopod/[\d.]+'
|
||
name: 'Castopod'
|
||
category: 'Crawler'
|
||
url: 'https://www.castopod.org/'
|
||
|
||
- regex: 'Elastic/Synthetics'
|
||
name: 'Elastic Synthetics'
|
||
category: 'Site Monitor'
|
||
url: 'https://github.com/elastic/synthetics'
|
||
producer:
|
||
name: 'Elasticsearch B.V.'
|
||
url: 'https://www.elastic.co/'
|
||
|
||
- regex: 'WDG_Validator/[\d.]+'
|
||
name: 'WDG HTML Validator'
|
||
category: 'Validator'
|
||
url: 'http://www.htmlhelp.com/tools/validator/'
|
||
|
||
- regex: 'scan@aegis.network'
|
||
name: 'Aegis'
|
||
category: 'Crawler'
|
||
url: 'https://web.archive.org/web/20180910002802/http://www.aegis.network/'
|
||
|
||
- regex: 'CrawlyProjectCrawler/[\d.]+'
|
||
name: 'Crawly Project'
|
||
category: 'Crawler'
|
||
url: 'https://web.archive.org/web/20240326141952/https://crawlyproject.digitaldragon.dev/'
|
||
|
||
- regex: 'BDFetch'
|
||
name: 'BDFetch'
|
||
category: 'Crawler'
|
||
url: 'https://web.archive.org/web/20130821043949/http://www.branddimensions.com/'
|
||
|
||
- regex: 'PunkMap'
|
||
name: 'Punk Map'
|
||
category: 'Security Checker'
|
||
url: 'https://github.com/openeasm/punkmap'
|
||
|
||
- regex: 'GenomeCrawlerd/[\d.]+'
|
||
name: 'Deepfield Genome'
|
||
category: 'Crawler'
|
||
url: 'https://www.nokia.com/networks/ip-networks/deepfield/genome/'
|
||
producer:
|
||
name: 'Nokia Corporation'
|
||
url: 'https://www.nokia.com/'
|
||
|
||
- regex: 'Gaisbot/[\d.]+'
|
||
name: 'Gaisbot'
|
||
category: 'Crawler'
|
||
url: 'https://web.archive.org/web/20090604121511/https://gais.cs.ccu.edu.tw/robot.php'
|
||
|
||
- regex: 'FAST-WebCrawler/[\d.]+'
|
||
name: 'AlltheWeb'
|
||
category: 'Crawler'
|
||
url: 'https://web.archive.org/web/20041020050801/http://www.alltheweb.com/help/webmaster/crawler'
|
||
|
||
- regex: 'ducks\.party'
|
||
name: 'ducks.party'
|
||
category: 'Security Checker'
|
||
url: 'https://ducks.party/'
|
||
|
||
# Generic bots
|
||
- regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus| CM62| HD65))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|url|Zeus|ZmEu)$'
|
||
name: 'Generic Bot'
|
||
|
||
# Generic detections
|
||
- regex: '[a-z0-9_-]*(?:(?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|(?<!node-|uclient-|Mikrotik/\d\.[x\d] |electron-)fetch(?:er)?|indexer|inspector|monitor|(?<!Microsoft |banshee-)project(?!or)|(?<!Google Wap |Blue )proxy|research|resolver|robots|(?<!Cam)scanner|scraper|script|searcher|(?<!-)security|spider(?! 8)|study|transcoder|uptime|user[ _]?agent|validator)(?:[^a-z]|$)'
|
||
name: 'Generic Bot'
|