2019-09-02 14:29:19 +03:00
###############
# Device Detector - The Universal Device Detection library for parsing User Agents
#
2020-10-14 12:25:18 +03:00
# @link https://matomo.org
2019-09-02 14:29:19 +03:00
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
###############
2022-03-25 12:41:04 +03:00
- regex : '360Spider'
2019-09-02 14:29:19 +03:00
name : '360Spider'
category : 'Search bot'
2022-03-25 12:41:04 +03:00
url : 'https://www.so.com/help/help_3_2.html'
2019-09-02 14:29:19 +03:00
producer :
name : 'Online Media Group, Inc.'
url : ''
- regex : 'Aboundex'
name : 'Aboundexbot'
category : 'Search bot'
url : 'http://www.aboundex.com/crawler/'
producer :
name : 'Aboundex.com'
url : 'http://www.aboundex.com'
- regex : 'AcoonBot'
name : 'Acoon'
category : 'Search bot'
url : 'http://www.acoon.de/robot.asp'
producer :
name : 'Acoon GmbH'
url : 'http://www.acoon.de'
- regex : 'AddThis\.com'
name : 'AddThis.com'
category : 'Social Media Agent'
url : ''
producer :
name : 'Clearspring Technologies, Inc.'
url : 'http://www.clearspring.com'
- regex : 'AhrefsBot'
name : 'aHrefs Bot'
category : 'Crawler'
2022-03-25 12:41:04 +03:00
url : 'https://ahrefs.com/robot'
2019-09-02 14:29:19 +03:00
producer :
name : 'Ahrefs Pte Ltd'
2022-03-25 12:41:04 +03:00
url : 'https://ahrefs.com/robot'
- regex : 'AhrefsSiteAudit/([\d+.]+)'
name : 'AhrefsSiteAudit'
category : 'Site Monitor'
url : 'https://ahrefs.com/robot/site-audit'
producer :
name : 'Ahrefs Pte Ltd'
url : 'https://ahrefs.com/'
2019-09-02 14:29:19 +03:00
- regex : 'ia_archiver|alexabot|verifybot'
name : 'Alexa Crawler'
category : 'Search bot'
2022-03-25 12:41:04 +03:00
url : 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
2019-09-02 14:29:19 +03:00
producer :
name : 'Alexa Internet'
2022-03-25 12:41:04 +03:00
url : 'https://www.alexa.com'
2019-09-02 14:29:19 +03:00
- regex : 'alexa site audit'
name : 'Alexa Site Audit'
category : 'Site Monitor'
2022-03-25 12:41:04 +03:00
url : 'https://support.alexa.com/hc/en-us/articles/200450194'
2019-09-02 14:29:19 +03:00
producer :
name : 'Alexa Internet'
2022-03-25 12:41:04 +03:00
url : 'https://www.alexa.com'
- regex : 'Amazonbot'
name : 'Amazon Bot'
category : 'Crawler'
url : 'https://developer.amazon.com/support/amazonbot'
producer :
name : 'Amazon.com, Inc.'
url : 'https://www.amazon.com/'
2019-09-02 14:29:19 +03:00
2020-10-14 12:25:18 +03:00
- regex : 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
name : 'Amazon Route53 Health Check'
category : 'Service Agent'
producer :
name : 'Amazon Web Services'
url : 'https://aws.amazon.com/'
2019-09-02 14:29:19 +03:00
- regex : 'AmorankSpider'
name : 'Amorank Spider'
category : 'Crawler'
url : 'http://amorank.com/webcrawler.html'
producer :
name : 'Amorank'
url : 'http://www.amorank.com'
- regex : 'ApacheBench'
name : 'ApacheBench'
category : 'Benchmark'
url : 'https://httpd.apache.org/docs/2.4/programs/ab.html'
producer :
name : 'The Apache Software Foundation'
2022-03-25 12:41:04 +03:00
url : 'https://www.apache.org/foundation/'
2019-09-02 14:29:19 +03:00
- regex : 'Applebot'
name : 'Applebot'
category : 'Crawler'
2022-03-25 12:41:04 +03:00
url : 'https://support.apple.com/en-us/HT204683'
2019-09-02 14:29:19 +03:00
producer :
name : 'Apple Inc'
2022-03-25 12:41:04 +03:00
url : 'https://www.apple.com'
- regex : "AppSignalBot"
name : "AppSignalBot"
category : "Site Monitor"
url : "https://docs.appsignal.com/uptime-monitoring/"
producer :
name : "AppSignal"
url : "https://appsignal.com/"
2019-09-02 14:29:19 +03:00
- regex : 'Arachni'
name : 'Arachni'
category : 'Security Checker'
2022-03-25 12:41:04 +03:00
url : 'https://www.arachni-scanner.com/'
2019-09-02 14:29:19 +03:00
producer :
name : 'Sarosys LLC'
2022-03-25 12:41:04 +03:00
url : 'https://www.sarosys.com/'
2019-09-02 14:29:19 +03:00
2020-10-14 12:25:18 +03:00
- regex : 'AspiegelBot'
name : 'AspiegelBot'
category : 'Crawler'
url : 'https://aspiegel.com/'
producer :
name : 'Huawei'
url : 'https://www.huawei.com/'
2019-09-02 14:29:19 +03:00
- regex : 'Castro 2, Episode Duration Lookup'
name : 'Castro 2'
category : 'Service Agent'
url : 'http://supertop.co/castro/'
2022-03-25 12:41:04 +03:00
producer :
2019-09-02 14:29:19 +03:00
name : 'Supertop'
url : 'http://supertop.co'
- regex : 'Curious George'
name : 'Analytics SEO Crawler'
category : 'Crawler'
url : 'http://www.analyticsseo.com/crawler'
producer :
name : 'Analytics SEO'
url : 'http://www.analyticsseo.com'
- regex : 'archive\.org_bot|special_archiver'
name : 'archive.org bot'
category : 'Crawler'
2022-03-25 12:41:04 +03:00
url : 'https://archive.org/details/archive.org_bot'
2019-09-02 14:29:19 +03:00
producer :
name : 'The Internet Archive'
2022-03-25 12:41:04 +03:00
url : 'https://archive.org'
2019-09-02 14:29:19 +03:00
- regex : 'Ask Jeeves/Teoma'
name : 'Ask Jeeves'
category : 'Search bot'
url : ''
producer :
name : 'Ask Jeeves Inc.'
url : 'http://www.ask.com'
- regex : 'Backlink-Check\.de'
name : 'Backlink-Check.de'
category : 'Crawler'
url : 'http://www.backlink-check.de/bot.html'
producer :
name : 'Mediagreen Medienservice'
url : 'http://www.backlink-check.de'
- regex : 'BacklinkCrawler'
name : 'BacklinkCrawler'
category : 'Crawler'
url : 'http://www.backlinktest.com/crawler.html'
producer :
name : '2.0Promotion GbR'
url : 'http://www.backlinktest.com'
2022-03-25 12:41:04 +03:00
- regex : 'Baidu.*spider|baidu Transcoder'
2019-09-02 14:29:19 +03:00
name : 'Baidu Spider'
category : 'Search bot'
url : 'http://www.baidu.com/search/spider.htm'
producer :
name : 'Baidu'
url : 'http://www.baidu.com'
- regex : 'BazQux'
name : 'BazQux Reader'
url : 'https://bazqux.com/fetcher'
category : 'Feed Fetcher'
producer :
name : ''
url : ''
2022-03-25 12:41:04 +03:00
- regex : 'Better Uptime Bot'
name : 'Better Uptime Bot'
category : 'Site Monitor'
url : 'https://betteruptime.com/faq'
producer :
name : 'Better Uptime'
url : 'https://betteruptime.com/'
2019-09-02 14:29:19 +03:00
- regex : 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
name : 'BingBot'
category : 'Search bot'
url : 'http://search.msn.com/msnbot.htmn'
producer :
name : 'Microsoft Corporation'
url : 'http://www.microsoft.com'
- regex : 'Blekkobot'
name : 'Blekkobot'
category : 'Search bot'
url : 'http://blekko.com/about/blekkobot'
producer :
name : 'Blekko'
url : 'http://blekko.com'
2022-03-25 12:41:04 +03:00
- regex : 'BLEXBot'
2019-09-02 14:29:19 +03:00
name : 'BLEXBot Crawler'
category : 'Crawler'
url : 'http://webmeup-crawler.com'
producer :
name : 'WebMeUp'
url : 'http://webmeup.com'
- regex : 'Bloglovin'
name : 'Bloglovin'
url : 'http://www.bloglovin.com'
category : 'Feed Fetcher'
producer :
name : ''
url : ''
- regex : 'Blogtrottr'
name : 'Blogtrottr'
url : ''
category : 'Feed Fetcher'
producer :
name : 'Blogtrottr Ltd'
url : 'https://blogtrottr.com/'
2020-10-14 12:25:18 +03:00
- regex : 'BoardReader Blog Indexer'
name : 'BoardReader Blog Indexer'
category : 'Crawler'
producer :
name : 'BoardReader'
2022-03-25 12:41:04 +03:00
url : 'https://boardreader.com/'
2020-10-14 12:25:18 +03:00
2019-09-02 14:29:19 +03:00
- regex : 'BountiiBot'
name : 'Bountii Bot'
category : 'Search bot'
url : 'http://bountii.com/contact.php'
producer :
name : 'Bountii Inc.'
url : 'http://bountii.com'
- regex : 'Browsershots'
name : 'Browsershots'
category : 'Service Agent'
url : 'http://browsershots.org/faq'
producer :
name : 'Browsershots.org'
url : 'http://browsershots.org'
- regex : 'BUbiNG'
name : 'BUbiNG'
category : 'Crawler'
url : 'http://law.di.unimi.it/BUbiNG.html'
producer :
name : 'The Laboratory for Web Algorithmics (LAW)'
url : 'http://law.di.unimi.it/software.php#buging'
- regex : '(?<!HTC)[ _]Butterfly/'
name : 'Butterfly Robot'
category : 'Search bot'
url : 'http://labs.topsy.com/butterfly'
producer :
name : 'Topsy Labs'
url : 'http://labs.topsy.com'
- regex : 'CareerBot'
name : 'CareerBot'
category : 'Crawler'
url : 'http://www.career-x.de/bot.html'
producer :
name : 'career-x GmbH'
url : 'http://www.career-x.de'
- regex : 'CCBot'
name : 'ccBot crawler'
category : 'Crawler'
url : 'http://commoncrawl.org/faq/'
producer :
name : 'reddit inc.'
url : 'http://www.reddit.com'
- regex : 'Cliqzbot'
name : 'Cliqzbot'
category : 'Crawler'
url : 'http://cliqz.com/company/cliqzbot'
producer :
name : '10betterpages GmbH'
url : 'http://cliqz.com'
- regex : 'Cloudflare-AMP'
name : 'CloudFlare AMP Fetcher'
category : 'Crawler'
url : 'https://amp.cloudflare.com/doc/fetcher.html'
producer :
name : 'CloudFlare'
url : 'http://www.cloudflare.com'
2022-03-25 12:41:04 +03:00
- regex : 'CloudflareDiagnostics'
name : 'Cloudflare Diagnostics'
category : 'Site Monitor'
url : 'https://www.cloudflare.com/'
producer :
name : 'Cloudflare'
url : 'https://www.cloudflare.com'
2019-09-02 14:29:19 +03:00
- regex : 'CloudFlare-AlwaysOnline'
name : 'CloudFlare Always Online'
category : 'Site Monitor'
url : 'http://www.cloudflare.com/always-online'
producer :
name : 'CloudFlare'
url : 'http://www.cloudflare.com'
2022-03-25 12:41:04 +03:00
- regex : 'coccoc.com'
2019-09-02 14:29:19 +03:00
name : 'Cốc Cốc Bot'
2020-10-14 12:25:18 +03:00
url : 'https://help.coccoc.com/en/search-engine/coccoc-robots'
2019-09-02 14:29:19 +03:00
category : 'Search bot'
producer :
name : 'Cốc Cốc'
2020-10-14 12:25:18 +03:00
url : 'https://coccoc.com/'
2019-09-02 14:29:19 +03:00
- regex : 'collectd'
name : 'Collectd'
url : 'https://collectd.org/'
category : 'Site Monitor'
producer :
name : 'Collectd'
url : 'https://collectd.org/'
- regex : 'CommaFeed'
name : 'CommaFeed'
url : 'http://www.commafeed.com'
category : 'Feed Fetcher'
producer :
name : ''
url : ''
- regex : 'CSS Certificate Spider'
name : 'CSS Certificate Spider'
category : 'Crawler'
url : 'http://www.css-security.com/certificatespider/'
producer :
name : 'Certified Security Solutions'
url : 'https://www.css-security.com/company/about-us/'
- regex : 'Datadog Agent'
name : 'Datadog Agent'
url : 'https://github.com/DataDog/dd-agent'
category : 'Site Monitor'
producer :
name : 'Datadog'
url : 'https://www.datadoghq.com/'
2020-10-14 12:25:18 +03:00
- regex : 'Datanyze'
name : 'Datanyze'
url : ''
category : 'Crawler'
producer :
name : 'Datanyze'
url : 'https://www.datanyze.com'
2019-09-02 14:29:19 +03:00
- regex : 'Dataprovider'
name : 'Dataprovider'
category : 'Crawler'
url : ''
producer :
name : 'Dataprovider B.V.'
url : 'https://www.dataprovider.com/'
- regex : 'Daum(oa)?[ /][0-9]'
name : 'Daum'
category : 'Search bot'
url : 'http://tab.search.daum.net/aboutWebSearch_en.html'
producer :
name : 'Daum Communications Corp.'
url : 'http://www.kakaocorp.com/main'
- regex : 'Dazoobot'
name : 'Dazoobot'
category : 'Search bot'
url : ''
producer :
name : 'DAZOO.FR'
url : 'http://dazoo.fr'
2022-03-25 12:41:04 +03:00
- regex : 'discobot'
2019-09-02 14:29:19 +03:00
name : 'Discobot'
category : 'Search bot'
url : 'http://discoveryengine.com/discobot.html'
producer :
name : 'Discovery Engine'
url : 'http://discoveryengine.com'
- regex : 'Domain Re-Animator Bot|support@domainreanimator.com'
name : 'Domain Re-Animator Bot'
category : 'Crawler'
url : ''
producer :
name : 'Domain Re-Animator, LLC'
url : 'http://domainreanimator.com'
- regex : 'DotBot'
name : 'DotBot'
category : 'Crawler'
url : 'http://www.opensiteexplorer.org/dotbot'
producer :
name : 'SEOmoz, Inc.'
url : 'http://moz.com/'
2020-10-14 12:25:18 +03:00
- regex : 'DuckDuck(?:Go-Favicons-)?Bot'
2019-09-02 14:29:19 +03:00
name : 'DuckDuckGo Bot'
category : 'Search bot'
url : 'https://duckduckgo.com/duckduckbot'
producer :
name : 'DuckDuckGo'
url : 'https://duckduckgo.com/'
- regex : 'EasouSpider'
name : 'Easou Spider'
category : 'Search bot'
url : 'http://www.easou.com/search/spider.html'
producer :
name : 'easou ICP'
url : 'http://www.easou.com'
2020-10-14 12:25:18 +03:00
- regex : 'eCairn-Grabber'
name : 'eCairn-Grabber'
category : 'Crawler'
producer :
name : 'eCairn'
url : 'https://ecairn.com'
2019-09-02 14:29:19 +03:00
- regex : 'EMail Exractor'
name : 'EMail Exractor'
category : 'Crawler'
url : ''
producer :
name : ''
url : ''
- regex : 'evc-batch'
name : 'evc-batch'
category : 'Crawler'
url : ''
producer :
name : 'eVenture Capital Partners II, LLC'
url : 'http://www.eventures.vc/'
2022-03-25 12:41:04 +03:00
- regex : 'Exabot|ExaleadCloudview'
2019-09-02 14:29:19 +03:00
name : 'ExaBot'
category : 'Crawler'
url : 'http://www.exabot.com/go/robot'
producer :
name : 'Dassault Systèmes'
url : 'http://www.3ds.com'
- regex : 'ExactSeek Crawler'
name : 'ExactSeek Crawler'
category : 'Search bot'
url : 'http://www.exactseek.com'
producer :
name : 'Jayde Online, Inc.'
url : 'http://www.jaydeonlineinc.com'
- regex : 'Ezooms'
name : 'Ezooms'
category : 'Crawler'
url : ''
producer :
name : 'SEOmoz, Inc.'
url : 'http://moz.com/'
2022-03-25 12:41:04 +03:00
- regex : 'facebookexternalhit|facebookplatform|facebookexternalua|facebookcatalog'
2019-09-02 14:29:19 +03:00
name : 'Facebook External Hit'
category : 'Social Media Agent'
url : 'https://www.facebook.com/externalhit_uatext.php'
producer :
name : 'Facebook'
url : 'http://www.facebook.com'
- regex : 'Feedbin'
name : 'Feedbin'
url : 'http://feedbin.com/'
category : 'Feed Fetcher'
producer :
name : ''
url : ''
- regex : 'FeedBurner'
name : 'FeedBurner'
url : 'http://www.feedburner.com'
category : 'Feed Fetcher'
producer :
name : ''
url : ''
- regex : 'Feed Wrangler'
name : 'Feed Wrangler'
url : 'https://feedwrangler.net/'
category : 'Feed Fetcher'
producer :
name : 'David Smith & Developing Perspective, LLC'
url : 'https://david-smith.org'
2022-03-25 12:41:04 +03:00
- regex : 'Feedly'
2019-09-02 14:29:19 +03:00
name : 'Feedly'
url : 'http://www.feedly.com'
category : 'Feed Fetcher'
producer :
name : ''
url : ''
- regex : 'Feedspot'
name : 'Feedspot'
url : 'http://www.feedspot.com'
category : 'Feed Fetcher'
producer :
name : ''
url : ''
- regex : 'Fever/[0-9]'
name : 'Fever'
url : 'http://feedafever.com/'
category : 'Feed Fetcher'
producer :
name : ''
url : ''
- regex : 'FlipboardProxy|FlipboardRSS'
name : 'Flipboard'
url : 'http://flipboard.com/browserproxy'
category : 'Feed Fetcher'
producer :
name : 'Flipboard'
url : 'http://flipboard.com/'
- regex : 'Findxbot'
name : 'Findxbot'
category : 'Crawler'
url : 'http://www.findxbot.com'
2020-10-14 12:25:18 +03:00
- regex : 'FreshRSS'
name : 'FreshRSS'
category : 'Feed Fetcher'
url : 'https://freshrss.org/'
2019-09-02 14:29:19 +03:00
- regex : 'Genieo'
name : 'Genieo Web filter'
category : ''
url : 'http://www.genieo.com/webfilter.html'
producer :
name : 'Genieo'
url : 'http://www.genieo.com'
- regex : 'GigablastOpenSource'
name : 'Gigablast'
category : 'Search bot'
url : 'https://github.com/gigablast/open-source-search-engine'
producer :
name : 'Matt Wells'
url : 'http://www.gigablast.com/faq.html'
- regex : 'Gluten Free Crawler'
name : 'Gluten Free Crawler'
category : 'Crawler'
url : 'http://glutenfreepleasure.com/'
producer :
name : ''
url : ''
2022-03-25 12:41:04 +03:00
- regex : 'gobuster'
name : 'Gobuster'
url : 'https://github.com/OJ/gobuster'
2019-09-02 14:29:19 +03:00
- regex : 'ichiro/mobile goo'
name : 'Goo'
category : 'Search bot'
url : 'http://search.goo.ne.jp/option/use/sub4/sub4-1'
producer :
name : 'NTT Resonant'
url : 'http://goo.ne.jp'
2022-03-25 12:41:04 +03:00
- regex : 'Storebot-Google'
name : 'Google StoreBot'
category : 'Crawler'
2020-10-14 12:25:18 +03:00
- regex : 'Google Favicon'
name : 'Google Favicon'
category : 'Crawler'
2019-09-02 14:29:19 +03:00
- regex : 'Google Search Console'
name : 'Google Search Console'
category : 'Crawler'
url : 'https://search.google.com/search-console/about'
producer :
name : 'Google Inc.'
url : 'http://www.google.com'
- regex : 'Google Page Speed Insights'
name : 'Google PageSpeed Insights'
category : 'Site Monitor'
url : 'http://developers.google.com/speed/pagespeed/insights/'
producer :
name : 'Google Inc.'
url : 'http://www.google.com'
- regex : 'google_partner_monitoring'
name : 'Google Partner Monitoring'
category : 'Site Monitor'
url : ''
producer :
name : 'Google Inc.'
url : 'http://www.google.com'
2020-10-14 12:25:18 +03:00
- regex : 'Google-Cloud-Scheduler'
name : 'Google Cloud Scheduler'
category : 'Crawler'
url : 'https://cloud.google.com/scheduler'
producer :
name : 'Google Inc.'
url : 'https://www.google.com'
2019-09-02 14:29:19 +03:00
- regex : 'Google-Structured-Data-Testing-Tool'
name : 'Google Structured Data Testing Tool'
category : 'Validator'
url : 'https://search.google.com/structured-data/testing-tool'
producer :
name : 'Google Inc.'
url : 'http://www.google.com'
2020-10-14 12:25:18 +03:00
- regex : 'GoogleStackdriverMonitoring'
name : 'Google Stackdriver Monitoring'
category : 'Site Monitor'
url : 'https://cloud.google.com/monitoring'
producer :
name : 'Google Inc.'
url : 'https://www.google.com'
2019-09-02 14:29:19 +03:00
- regex : 'via ggpht\.com GoogleImageProxy'
name : 'Gmail Image Proxy'
category : 'Crawler'
url : ''
producer :
name : 'Google Inc.'
url : 'http://www.google.com'
2020-10-14 12:25:18 +03:00
2019-09-02 14:29:19 +03:00
- regex : 'SeznamEmailProxy'
name : 'Seznam Email Proxy'
category : 'Crawler'
url : ''
producer :
name : 'Seznam.cz, a.s.'
url : 'http://www.seznam.cz/'
- regex : 'Seznam-Zbozi-robot'
name : 'Seznam Zbozi.cz'
category : 'Crawler'
url : ''
producer :
name : 'Seznam.cz, a.s.'
url : 'https://www.zbozi.cz/'
- regex : 'Heurekabot-Feed'
name : 'Heureka Feed'
category : 'Crawler'
url : 'https://sluzby.heureka.cz/napoveda/heurekabot/'
producer :
name : 'Heureka.cz, a.s.'
url : 'https://www.heureka.cz/'
- regex : 'ShopAlike'
name : 'ShopAlike'
category : 'Crawler'
url : ''
producer :
name : 'Visual Meta'
url : 'https://www.shopalike.cz/'
2022-03-25 12:41:04 +03:00
- regex : 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|Google(?:AssociationService|Producer)|Google.*/\+/web/snippet'
2019-09-02 14:29:19 +03:00
name : 'Googlebot'
category : 'Search bot'
url : 'http://www.google.com/bot.html'
producer :
name : 'Google Inc.'
url : 'http://www.google.com'
- regex : 'heritrix'
name : 'Heritrix'
category : 'Crawler'
url : 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
producer :
name : 'The Internet Archive'
2022-03-25 12:41:04 +03:00
url : 'https://archive.org'
2019-09-02 14:29:19 +03:00
- regex : 'HubSpot '
name : 'HubSpot'
category : 'Crawler'
producer :
name : 'HubSpot Inc.'
url : 'https://www.hubspot.com'
- regex : 'HTTPMon'
name : 'HTTPMon'
category : 'Site Monitor'
url : 'http://www.httpmon.com'
producer :
name : 'towards GmbH'
url : 'http://www.towards.ch/'
- regex : 'ICC-Crawler'
name : 'ICC-Crawler'
category : 'Crawler'
url : 'http://www.nict.go.jp/en/univ-com/plan/crawl.html'
producer :
name : ''
url : ''
2020-10-14 12:25:18 +03:00
- regex : 'inoreader.com'
name : 'inoreader'
category : 'Feed Reader'
url : 'https://www.inoreader.com'
2019-09-02 14:29:19 +03:00
- regex : 'iisbot'
name : 'IIS Site Analysis'
category : 'Crawler'
url : 'http://www.iis.net/iisbot.html'
producer :
name : 'Microsoft Corporation'
url : 'http://www.microsoft.com'
- regex : 'ips-agent'
name : 'IPS Agent'
2022-03-25 12:41:04 +03:00
category : 'Crawler'
2019-09-02 14:29:19 +03:00
producer :
name : 'VeriSign, Inc'
url : 'http://www.verisign.com/'
- regex : 'IP-Guide\.com'
name : 'IP-Guide Crawler'
category : 'Crawler'
url : ''
producer :
name : ''
url : 'https://ip-guide.com'
2022-03-25 12:41:04 +03:00
- regex : 'k6/[0-9\.]+'
name : 'K6'
url : 'https://k6.io/'
2019-09-02 14:29:19 +03:00
- regex : 'kouio'
name : 'Kouio'
url : 'http://kouio.com/'
category : 'Feed Fetcher'
producer :
name : ''
url : ''
- regex : 'larbin'
name : 'Larbin web crawler'
category : 'Crawler'
url : 'http://larbin.sourceforge.net'
producer :
name : ''
url : ''
- regex : '([A-z0-9]*)-Lighthouse'
name : 'Lighthouse'
category : 'Site Monitor'
url : 'https://developers.google.com/web/tools/lighthouse'
producer :
name : 'Lighthouse'
url : 'https://developers.google.com/web/tools/lighthouse'
2022-03-25 12:41:04 +03:00
- regex : 'linkdexbot|linkdex\.com'
2019-09-02 14:29:19 +03:00
name : 'Linkdex Bot'
category : 'Search bot'
url : 'http://www.linkdex.com/bots'
producer :
name : 'Mojeek Ltd.'
url : 'http://www.mojeek.com'
- regex : 'LinkedInBot'
name : 'LinkedIn Bot'
category : 'Social Media Agent'
url : 'http://www.linkedin.com'
producer :
name : 'LinkedIn'
url : 'http://www.linkedin.com'
- regex : 'ltx71'
name : 'LTX71'
url : 'http://ltx71.com/'
producer :
name : ''
url : ''
2022-03-25 12:41:04 +03:00
- regex : 'Mail\.RU'
2019-09-02 14:29:19 +03:00
name : 'Mail.Ru Bot'
category : 'Search bot'
url : 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
producer :
name : 'Mail.Ru Group'
url : 'http://corp.mail.ru'
- regex : 'magpie-crawler'
name : 'Magpie-Crawler'
category : 'Social Media Agent'
url : 'http://www.brandwatch.com/magpie-crawler/'
producer :
name : 'Brandwatch'
url : 'http://www.brandwatch.com'
- regex : 'MagpieRSS'
name : 'MagpieRSS'
url : 'http://magpierss.sourceforge.net/'
category : 'Feed Parser'
producer :
name : ''
url : ''
2022-03-25 12:41:04 +03:00
- regex : 'masscan'
2019-09-02 14:29:19 +03:00
name : 'masscan'
url : 'https://github.com/robertdavidgraham/masscan'
category : 'Crawler'
producer :
name : 'Robert Graham'
url : 'https://github.com/robertdavidgraham'
2020-10-14 12:25:18 +03:00
- regex : 'Mastodon/'
name : 'Mastodon Bot'
category : 'Social Media Agent'
2019-09-02 14:29:19 +03:00
- regex : 'meanpathbot'
name : 'Meanpath Bot'
category : 'Search bot'
url : 'http://www.meanpath.com/meanpathbot.html'
producer :
name : 'Meanpath'
url : 'http://www.meanpath.com'
- regex : 'MetaJobBot'
name : 'MetaJobBot'
category : 'Crawler'
url : 'http://www.metajob.at/the/crawler'
producer :
name : 'MetaJob'
url : 'http://www.metajob.at'
- regex : 'MetaInspector'
name : 'MetaInspector'
category : 'Crawler'
url : 'https://github.com/jaimeiniesta/metainspector'
- regex : 'MixrankBot'
name : 'Mixrank Bot'
category : 'Crawler'
url : 'http://mixrank.com'
producer :
name : 'Online Media Group, Inc.'
url : ''
- regex : 'MJ12bot'
name : 'MJ12 Bot'
category : 'Search bot'
url : 'http://majestic12.co.uk/bot.php'
producer :
name : 'Majestic-12'
url : 'http://majestic12.co.uk'
- regex : 'Mnogosearch'
name : 'Mnogosearch'
category : 'Search bot'
url : 'http://www.mnogosearch.org/'
producer :
name : 'Lavtech.Com Corp.'
url : ''
- regex : 'MojeekBot'
name : 'MojeekBot'
category : 'Search bot'
url : 'http://www.mojeek.com/bot.html'
producer :
name : 'Mojeek Ltd.'
url : 'http://www.mojeek.com'
- regex : 'munin'
name : 'Munin'
category : 'Site Monitor'
url : 'http://munin-monitoring.org/'
producer :
name : 'Munin'
url : 'http://munin-monitoring.org/'
- regex : 'NalezenCzBot'
name : 'NalezenCzBot'
category : 'Crawler'
url : 'http://www.nalezen.cz/about-crawler'
producer :
name : 'Jaroslav Kuboš'
url : ''
- regex : 'check_http/v'
name : 'Nagios check_http'
category : 'Site Monitor'
url : 'https://nagios.org'
producer :
name : 'Nagios Plugins Development Team'
url : 'https://nagios.org'
2020-10-14 12:25:18 +03:00
- regex : 'nbertaupete95\(at\)gmail.com'
name : 'nbertaupete95'
category : 'Crawler'
2019-09-02 14:29:19 +03:00
- regex : 'Netcraft( Web Server Survey| SSL Server Survey|SurveyAgent)'
name : 'Netcraft Survey Bot'
category : 'Search bot'
url : ''
producer :
name : 'Netcraft'
url : 'http://www.netcraft.com'
- regex : 'netEstate NE Crawler'
name : 'netEstate'
2020-10-14 12:25:18 +03:00
category : 'Crawler'
2019-09-02 14:29:19 +03:00
url : 'http://www.website-datenbank.de/Impressum'
producer :
name : 'netEstate GmbH'
url : 'https://www.netestate.de/en/'
- regex : 'Netvibes'
name : 'Netvibes'
url : 'http://www.netvibes.com/'
category : 'Feed Fetcher'
producer :
name : ''
url : ''
- regex : 'NewsBlur .*(Fetcher|Finder)'
name : 'NewsBlur'
url : 'http://www.newsblur.com'
category : 'Feed Fetcher'
producer :
name : ''
url : ''
- regex : 'NewsGatorOnline'
name : 'NewsGator'
url : 'http://www.newsgator.com'
category : 'Feed Fetcher'
producer :
name : ''
url : ''
- regex : 'nlcrawler'
name : 'NLCrawler'
category : 'Crawler'
url : ''
producer :
name : 'Northern Light'
url : 'http://northernlight.com'
- regex : 'Nmap Scripting Engine'
name : 'Nmap'
category : 'Security Checker'
url : 'https://nmap.org/book/nse.html'
producer :
name : 'Nmap'
url : 'https://nmap.org/'
2020-10-14 12:25:18 +03:00
- regex : 'Nuzzel'
name : 'Nuzzel'
category : 'Crawler'
producer :
name : 'Nuzzel'
2022-03-25 12:41:04 +03:00
url : 'https://www.nuzzel.com/'
2020-10-14 12:25:18 +03:00
2019-09-02 14:29:19 +03:00
- regex : 'Octopus [0-9]'
name : 'Octopus'
2022-03-25 12:41:04 +03:00
- regex : 'omgili'
2019-09-02 14:29:19 +03:00
name : 'Omgili bot'
category : 'Search bot'
url : 'http://www.omgili.com/Crawler.html'
producer :
name : 'Omgili'
url : 'http://www.omgili.com'
- regex : 'OpenindexSpider'
name : 'Openindex Spider'
category : 'Search bot'
url : 'http://www.openindex.io/en/webmasters/spider.html'
producer :
name : 'Openindex B.V.'
url : 'http://www.openindex.io'
- regex : 'spbot'
name : 'OpenLinkProfiler'
category : 'Crawler'
url : 'http://openlinkprofiler.org/bot'
producer :
name : 'Axandra GmbH'
url : 'http://www.axandra.com'
- regex : 'OpenWebSpider'
name : 'OpenWebSpider'
category : 'Crawler'
url : 'http://www.openwebspider.org'
producer :
name : 'OpenWebSpider Lab'
url : 'http://lab.openwebspider.org'
- regex : 'OrangeBot|VoilaBot'
name : 'Orange Bot'
category : 'Search bot'
url : 'http://lemoteur.orange.fr'
producer :
name : 'Orange'
url : 'http://www.orange.fr'
- regex : 'PaperLiBot'
name : 'PaperLiBot'
category : 'Search bot'
url : 'http://support.paper.li/entries/20023257-what-is-paper-li'
producer :
name : 'Smallrivers SA'
url : 'http://www.paper.li'
- regex : 'phantomas/'
name : 'Phantomas'
category : 'Site Monitor'
url : 'https://github.com/macbre/phantomas'
- regex : 'phpservermon'
name : 'PHP Server Monitor'
category : 'Site Monitor'
url : 'https://github.com/phpservermon/phpservermon'
producer :
name : 'PHP Server Monitor'
url : 'http://www.phpservermonitor.org/'
- regex : 'PocketParser'
name : 'PocketParser'
category : 'Read-it-later Service'
url : 'https://getpocket.com/pocketparser_ua'
producer :
name : 'Pocket'
url : 'https://getpocket.com/'
- regex : 'PritTorrent'
name : 'PritTorrent'
category : 'Crawler'
url : 'https://github.com/astro/prittorrent'
producer :
name : 'Bitlove'
url : 'http://bitlove.org/'
2020-10-14 12:25:18 +03:00
- regex : 'PRTG Network Monitor'
name : 'PRTG Network Monitor'
category : 'Network Monitor'
url : 'https://www.paessler.com/prtg'
producer :
name : 'Paessler AG'
url : 'https://www.paessler.com'
2022-03-25 12:41:04 +03:00
- regex : 'psbot'
2019-09-02 14:29:19 +03:00
name : 'Picsearch bot'
category : 'Search bot'
url : 'http://www.picsearch.com/bot.html'
producer :
name : 'Picsearch'
url : 'http://www.picsearch.com'
2022-03-25 12:41:04 +03:00
- regex : 'Pingdom(?:\.com|TMS)'
2019-09-02 14:29:19 +03:00
name : 'Pingdom Bot'
category : 'Site Monitor'
url : ''
producer :
name : 'Pingdom AB'
url : 'https://www.pingdom.com'
- regex : 'Quora Link Preview'
name : 'Quora Link Preview'
category : 'Crawler'
url : ''
producer :
name : 'Quora'
url : 'http://www.quora.com'
2022-03-25 12:41:04 +03:00
- regex : 'Quora-Bot'
name : 'Quora Bot'
category : 'Crawler'
url : ''
producer :
name : 'Quora'
url : 'https://www.quora.com/'
2019-09-02 14:29:19 +03:00
- regex : 'RamblerMail'
name : 'RamblerMail Image Proxy'
category : 'Crawler'
url : ''
producer :
name : 'Rambler&Co'
url : 'https://rambler-co.ru/'
- regex : 'QuerySeekerSpider'
name : 'QuerySeekerSpider'
category : 'Crawler'
url : 'http://queryseeker.com/bot.html'
producer :
name : 'QueryEye Inc.'
url : 'http://queryeye.com'
- regex : 'Qwantify'
name : 'Qwantify'
category : 'Crawler'
url : 'https://www.qwant.com/'
producer :
name : 'Qwant Corporation'
url : 'https://www.qwant.com/'
- regex : 'Rainmeter'
name : 'Rainmeter'
category : 'Crawler'
url : 'https://www.rainmeter.net'
- regex : 'redditbot'
name : 'Reddit Bot'
category : 'Social Media Agent'
url : 'http://www.reddit.com/feedback'
producer :
name : 'reddit inc.'
url : 'http://www.reddit.com'
- regex : 'Riddler'
name : 'Riddler'
category : 'Security search bot'
url : 'https://riddler.io/about'
producer :
name : 'F-Secure'
url : 'https://www.f-secure.com'
- regex : 'rogerbot'
name : 'Rogerbot'
category : 'Crawler'
url : 'http://moz.com/help/pro/what-is-rogerbot-'
producer :
name : 'SEOmoz, Inc.'
url : 'http://moz.com/'
- regex : 'ROI Hunter'
name : 'ROI Hunter'
category : 'Crawler'
url : ''
producer :
name : 'Roihunter a.s.'
url : 'http://roihunter.com/'
- regex : 'SafeDNSBot'
name : 'SafeDNSBot'
category : 'Crawler'
url : 'https://www.safedns.com/searchbot'
producer :
name : 'SafeDNS, Inc.'
url : 'https://www.safedns.com/'
- regex : 'Scrapy'
name : 'Scrapy'
category : 'Crawler'
url : 'http://scrapy.org'
- regex : 'Screaming Frog SEO Spider'
name : 'Screaming Frog SEO Spider'
category : 'Crawler'
url : 'http://www.screamingfrog.co.uk/seo-spider'
producer :
name : 'Screaming Frog Ltd'
url : 'http://www.screamingfrog.co.uk'
- regex : 'ScreenerBot'
name : 'ScreenerBot'
category : 'Crawler'
url : 'http://www.screenerbot.com'
producer :
name : ''
url : ''
- regex : 'SemrushBot'
name : 'Semrush Bot'
category : 'Crawler'
url : 'http://www.semrush.com/bot.html'
producer :
name : 'SEMrush'
url : 'http://www.semrush.com'
- regex : 'SensikaBot'
name : 'Sensika Bot'
category : ''
url : ''
producer :
name : 'Sensika'
url : 'http://sensika.com'
- regex : 'SEOENG(World)?Bot'
name : 'SEOENGBot'
category : 'Crawler'
url : 'http://www.seoengine.com/seoengbot.htm'
producer :
name : 'SEO Engine'
url : 'http://www.seoengine.com'
- regex : 'SEOkicks-Robot'
name : 'SEOkicks-Robot'
category : 'Crawler'
url : 'http://www.seokicks.de/robot.html'
producer :
name : 'SEOkicks'
url : 'https://www.seokicks.de/'
- regex : 'seoscanners\.net'
name : 'Seoscanners.net'
category : 'Crawler'
url : ''
- regex : 'SkypeUriPreview'
name : 'Skype URI Preview'
category : 'Service Agent'
url : ''
producer :
name : 'Skype Communications S.à.r.l.'
url : 'https://www.skype.com'
- regex : 'SeznamBot|SklikBot|Seznam screenshot-generator'
name : 'Seznam Bot'
category : 'Search bot'
url : 'http://www.mapy.cz/cz/seznambot.html'
producer :
name : 'Seznam.cz, a.s.'
url : 'http://www.seznam.cz/'
2020-10-14 12:25:18 +03:00
- regex : 'shopify-partner-homepage-scraper'
name : 'Shopify Partner'
category : 'Crawler'
url : 'https://www.shopify.com/partners'
producer :
name : 'Shopify'
url : 'https://www.shopify.com/'
2019-09-02 14:29:19 +03:00
- regex : 'ShopWiki'
name : 'ShopWiki'
category : 'Search tools'
url : 'http://www.shopwiki.com/wiki/Help:Bot'
producer :
name : 'ShopWiki Corp.'
url : 'http://www.shopwiki.com'
- regex : 'SilverReader'
name : 'SilverReader'
url : 'http://silverreader.com'
category : 'Feed Fetcher'
producer :
name : ''
url : ''
- regex : 'SimplePie'
name : 'SimplePie'
url : 'http://www.simplepie.org'
category : 'Feed Parser'
producer :
name : ''
url : ''
- regex : 'SISTRIX Crawler'
name : 'SISTRIX Crawler'
category : 'Crawler'
url : 'http://crawler.sistrix.net'
producer :
name : 'SISTRIX GmbH'
url : 'http://www.sistrix.de'
2020-10-14 12:25:18 +03:00
- regex : 'compatible; (?:SISTRIX )?Optimizer'
name : 'SISTRIX Optimizer'
category : 'Crawler'
url : 'https://optimizer.sistrix.com'
producer :
name : 'SISTRIX GmbH'
url : 'http://www.sistrix.de'
2019-09-02 14:29:19 +03:00
- regex : 'SiteSucker'
name : 'SiteSucker'
category : 'Crawler'
url : 'http://ricks-apps.com/osx/sitesucker/'
- regex : 'sixy.ch'
name : 'Sixy.ch'
category : 'Site Monitor'
url : 'http://sixy.ch'
producer :
name : 'Manuel Kasper'
url : 'https://neon1.net/'
- regex : 'Slackbot|Slack-ImgProxy'
name : 'Slackbot'
category : 'Crawler'
url : 'https://api.slack.com/robots'
producer :
name : 'Slack Technologies'
url : 'http://slack.com'
- regex : '(Sogou (web|inst|Pic) spider)|New-Sogou-Spider'
name : 'Sogou Spider'
category : 'Search bot'
url : 'http://www.sogou.com/docs/help/webmasters.htm'
producer :
name : 'Sohu, Inc.'
url : 'http://www.sogou.com'
- regex : 'Sosospider|Sosoimagespider'
name : 'Soso Spider'
category : 'Search bot'
url : 'http://help.soso.com/webspider.htm'
producer :
name : 'Tencent Holdings'
url : 'http://www.soso.com'
2022-03-25 12:41:04 +03:00
- regex : 'Sprinklr'
name : 'Sprinklr'
category : 'Crawler'
url : ''
producer :
name : 'Sprinklr, Inc.'
url : 'https://www.sprinklr.com/'
2019-09-02 14:29:19 +03:00
- regex : 'sqlmap/'
name : 'sqlmap'
category : 'Security Checker'
url : 'http://sqlmap.org/'
producer :
name : 'sqlmap'
url : 'http://sqlmap.org/'
- regex : 'SSL Labs'
name : 'SSL Labs'
category : 'Validator'
url : 'https://www.ssllabs.com/about/assessment.html'
producer :
name : 'SSL Labs'
url : 'https://www.ssllabs.com/about/assessment.html'
- regex : 'StatusCake'
name : 'StatusCake'
category : 'Site Monitor'
url : 'https://www.statuscake.com'
producer :
name : 'StatusCake'
url : 'https://www.statuscake.com'
- regex : 'Superfeedr bot'
name : 'Superfeedr Bot'
category : 'Feed Fetcher'
url : ''
producer :
name : 'Superfeedr'
url : 'https://superfeedr.com/'
- regex : 'Sparkler/[0-9]'
name : 'Sparkler'
category : 'Crawler'
url : 'https://github.com/USCDataScience/sparkler'
- regex : 'Spinn3r'
name : 'Spinn3r'
category : 'Crawler'
url : 'http://spinn3r.com/robot'
producer :
name : 'Tailrank Inc'
url : 'http://spinn3r.com'
2022-03-25 12:41:04 +03:00
- regex : 'SputnikBot'
2019-09-02 14:29:19 +03:00
name : 'Sputnik Bot'
2022-03-25 12:41:04 +03:00
category : 'Crawler'
url : ''
- regex : 'SputnikFaviconBot'
name : 'Sputnik Favicon Bot'
category : 'Crawler'
url : ''
- regex : 'SputnikImageBot'
name : 'Sputnik Image Bot'
category : 'Crawler'
2019-09-02 14:29:19 +03:00
url : ''
- regex : 'SurveyBot'
name : 'Survey Bot'
category : 'Search bot'
url : 'http://www.domaintools.com/webmasters/surveybot.php'
producer :
name : 'Domain Tools'
url : 'http://www.domaintools.com'
- regex : 'TarmotGezgin'
name : 'Tarmot Gezgin'
url : 'http://www.tarmot.com/gezgin/'
category : 'Search bot'
- regex : 'TelegramBot'
2020-10-14 12:25:18 +03:00
name : 'TelegramBot'
2019-09-02 14:29:19 +03:00
url : 'https://telegram.org/blog/bot-revolution'
- regex : 'TLSProbe'
name : 'TLSProbe'
url : 'https://scan.trustnet.venafi.com/'
category : 'Security search bot'
producer :
name : 'Venafi TrustNet'
url : 'https://www.venafi.com'
- regex : 'TinEye-bot'
name : 'TinEye Crawler'
category : 'Search bot'
url : 'http://www.tineye.com/crawler.html'
producer :
name : 'Idée Inc.'
url : 'http://ideeinc.com'
- regex : 'Tiny Tiny RSS'
name : 'Tiny Tiny RSS'
url : 'http://tt-rss.org'
category : 'Feed Fetcher'
producer :
name : ''
url : ''
2020-10-14 12:25:18 +03:00
- regex : 'theoldreader.com'
name : 'theoldreader'
category : 'Feed Reader'
url : 'https://theoldreader.com'
2019-09-02 14:29:19 +03:00
- regex : 'trendictionbot'
name : 'Trendiction Bot'
category : 'Crawler'
url : 'http://www.trendiction.de/bot'
producer :
name : 'Talkwalker Inc.'
url : 'http://www.talkwalker.com'
- regex : 'TurnitinBot'
name : 'TurnitinBot'
category : 'Crawler'
url : 'http://www.turnitin.com/robot/crawlerinfo.html'
producer :
name : 'iParadigms, LLC.'
url : 'http://www.turnitin.com'
- regex : 'TweetedTimes Bot'
name : 'TweetedTimes Bot'
category : 'Crawler'
url : 'http://tweetedtimes.com'
producer :
name : 'TweetedTimes'
url : 'http://tweetedtimes.com/'
- regex : 'TweetmemeBot'
name : 'Tweetmeme Bot'
category : 'Crawler'
url : 'http://tweetmeme.com/'
producer :
name : 'Mediasift'
url : ''
2020-10-14 12:25:18 +03:00
- regex : 'Twingly Recon'
name : 'Twingly Recon'
category : 'Crawler'
producer :
name : 'Twingly'
url : 'https://www.twingly.com'
2019-09-02 14:29:19 +03:00
- regex : 'Twitterbot'
name : 'Twitterbot'
category : 'Social Media Agent'
url : 'https://dev.twitter.com/docs/cards/getting-started'
producer :
name : 'Twitter'
url : 'http://www.twitter.com'
- regex : 'UniversalFeedParser'
name : 'UniversalFeedParser'
category : 'Feed Fetcher'
url : 'https://github.com/kurtmckee/feedparser'
producer :
name : 'Kurt McKee'
url : 'https://github.com/kurtmckee'
- regex : 'via secureurl\.fwdcdn\.com'
name : 'UkrNet Mail Proxy'
category : 'Crawler'
url : ''
producer :
name : 'UkrNet Ltd'
url : 'https://www.ukr.net/'
- regex : 'Uptimebot'
name : 'Uptimebot'
category : 'Site Monitor'
url : 'https://uptime.com/uptimebot'
producer :
name : 'Uptime'
url : 'https://uptime.com'
- regex : 'UptimeRobot'
name : 'Uptime Robot'
category : 'Site Monitor'
url : ''
producer :
name : 'Uptime Robot'
url : 'http://uptimerobot.com'
- regex : 'URLAppendBot'
name : 'URLAppendBot'
category : 'Crawler'
url : 'http://www.profound.net/urlappendbot.html'
producer :
name : 'Profound Networks'
url : 'http://www.profound.net'
- regex : 'Vagabondo'
name : 'Vagabondo'
category : 'Crawler'
url : ''
producer :
name : 'WiseGuys'
url : 'http://www.wise-guys.nl/'
- regex : 'vkShare; '
name : 'VK Share Button'
category : 'Crawler'
url : 'http://vk.com/dev/Share'
producer :
name : 'VK'
url : 'http://vk.com/'
- regex : 'VSMCrawler'
name : 'Visual Site Mapper Crawler'
category : 'Crawler'
url : 'http://www.visualsitemapper.com/crawler'
producer :
name : 'Alentum Software Ltd.'
url : 'http://www.alentum.com'
- regex : 'Jigsaw'
name : 'W3C CSS Validator'
category : 'Validator'
url : 'http://jigsaw.w3.org/css-validator'
producer :
name : 'W3C'
url : 'http://www.w3.org'
- regex : 'W3C_I18n-Checker'
name : 'W3C I18N Checker'
category : 'Validator'
url : 'http://validator.w3.org/i18n-checker'
producer :
name : 'W3C'
url : 'http://www.w3.org'
- regex : 'W3C-checklink'
name : 'W3C Link Checker'
category : 'Validator'
url : 'http://validator.w3.org/checklink'
producer :
name : 'W3C'
url : 'http://www.w3.org'
- regex : 'W3C_Validator|Validator.nu'
name : 'W3C Markup Validation Service'
category : 'Validator'
url : 'http://validator.w3.org/services'
producer :
name : 'W3C'
url : 'http://www.w3.org'
- regex : 'W3C-mobileOK'
name : 'W3C MobileOK Checker'
category : 'Validator'
url : 'http://validator.w3.org/mobile'
producer :
name : 'W3C'
url : 'http://www.w3.org'
- regex : 'W3C_Unicorn'
name : 'W3C Unified Validator'
category : 'Validator'
url : 'http://validator.w3.org/unicorn'
producer :
name : 'W3C'
url : 'http://www.w3.org'
- regex : 'Wappalyzer'
name : 'Wappalyzer'
url : 'https://github.com/AliasIO/Wappalyzer'
producer :
name : 'AliasIO'
url : 'https://github.com/AliasIO'
- regex : 'PTST/'
name : 'WebPageTest'
category : 'Site Monitor'
url : 'https://www.webpagetest.org'
2022-03-25 12:41:04 +03:00
- regex : 'WeSEE'
2019-09-02 14:29:19 +03:00
name : 'WeSEE:Search'
category : 'Search bot'
url : 'http://www.wesee.com/bot'
producer :
name : 'WeSEE Ltd'
url : 'http://www.wesee.com'
- regex : 'WebbCrawler'
name : 'WebbCrawler'
category : 'Crawler'
url : 'http://badcheese.com/crawler.html'
producer :
name : 'Steve Webb'
url : 'http://badcheese.com'
- regex : 'websitepulse[+ ]checker'
name : 'WebSitePulse'
category : 'Site Monitor'
url : 'http://badcheese.com/crawler.html'
producer :
name : 'WebSitePulse'
url : 'http://www.websitepulse.com/'
- regex : 'WordPress'
name : 'WordPress'
category : 'Service Agent'
url : 'https://wordpress.org/'
producer :
name : 'Wordpress.org'
url : 'https://wordpress.org/'
- regex : 'Wotbox'
name : 'Wotbox'
category : 'Search bot'
url : 'http://www.wotbox.com/bot/'
producer :
name : 'Wotbox'
url : 'http://www.wotbox.com'
2020-10-14 12:25:18 +03:00
- regex : 'XenForo'
name : 'XenForo'
category : 'Service Agent'
url : 'https://xenforo.com/'
producer :
name : 'XenForo Ltd.'
url : 'https://xenforo.com/'
2019-09-02 14:29:19 +03:00
- regex : 'yacybot'
name : 'YaCy'
category : 'Search bot'
url : 'http://yacy.net/bot.html'
producer :
name : 'YaCy'
url : 'http://yacy.net'
- regex : 'Yahoo! Slurp|Yahoo!-AdCrawler'
name : 'Yahoo! Slurp'
category : 'Search bot'
url : 'http://help.yahoo.com/ysearch/slurp'
producer :
name : 'Yahoo! Inc.'
url : 'http://www.yahoo.com'
- regex : 'Yahoo Link Preview|Yahoo:LinkExpander:Slingstone'
name : 'Yahoo! Link Preview'
category : 'Crawler'
url : 'https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html'
producer :
name : 'Yahoo! Inc.'
url : 'http://www.yahoo.com'
2022-03-25 12:41:04 +03:00
- regex : 'YahooMailProxy'
name : 'Yahoo! Mail Proxy'
category : 'Service Agent'
url : 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
producer :
name : 'Yahoo! Inc.'
url : 'http://www.yahoo.com'
2019-09-02 14:29:19 +03:00
- regex : 'YahooCacheSystem'
name : 'Yahoo! Cache System'
category : 'Crawler'
url : ''
producer :
name : 'Yahoo! Inc.'
url : 'http://www.yahoo.com'
2020-10-14 12:25:18 +03:00
- regex : 'Y!J-BRW'
name : 'Yahoo! Japan BRW'
category : 'Crawler'
url : 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて'
producer :
name : 'Yahoo! Japan Corp.'
url : 'https://www.yahoo.co.jp/'
2022-03-25 12:41:04 +03:00
- regex : 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
2019-09-02 14:29:19 +03:00
name : 'Yandex Bot'
category : 'Search bot'
url : 'http://www.yandex.com/bots'
producer :
name : 'Yandex LLC'
url : 'http://company.yandex.com'
2022-03-25 12:41:04 +03:00
- regex : 'Yeti|NaverJapan|AdsBot-Naver'
2019-09-02 14:29:19 +03:00
name : 'Yeti/Naverbot'
category : 'Search bot'
url : 'http://help.naver.com/robots/'
producer :
name : 'Naver'
url : 'http://www.naver.com'
- regex : 'YoudaoBot'
name : 'Youdao Bot'
category : 'Search bot'
url : 'http://www.youdao.com/help/webmaster/spider'
producer :
name : 'NetEase, Inc.'
url : 'http://corp.163.com'
- regex : 'YOURLS v[0-9]'
name : 'Yourls'
category : 'Crawler'
url : 'http://yourls.org'
- regex : 'YRSpider|YYSpider'
name : 'Yunyun Bot'
category : 'Search bot'
url : 'http://www.yunyun.com/SiteInfo.php?r=about'
producer :
name : 'YunYun'
url : 'http://www.yunyun.com'
- regex : 'zgrab'
name : 'zgrab'
category : 'Security Checker'
url : 'https://github.com/zmap/zgrab'
- regex : 'Zookabot'
name : 'Zookabot'
category : 'Crawler'
url : 'http://zookabot.com'
producer :
name : 'Hwacha ApS'
url : 'http://hwacha.dk'
- regex : 'ZumBot'
name : 'ZumBot'
category : 'Search bot'
url : 'http://help.zum.com/inquiry'
producer :
name : 'ZUM internet'
url : 'http://www.zuminternet.com/'
- regex : 'YottaaMonitor'
name : 'Yottaa Site Monitor'
category : 'Site Monitor'
url : 'http://www.yottaa.com/products/site-monitor'
producer :
name : 'Yottaa'
url : 'http://www.yottaa.com/'
- regex : 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857.*'
name : 'Yahoo Gemini'
category : 'Crawler'
url : 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html'
producer :
name : 'Yahoo! Inc.'
url : 'http://www.yahoo.com'
- regex : '.*Java.*outbrain'
name : 'Outbrain'
category : 'Crawler'
url : ''
producer :
name : 'Outbrain'
url : 'http://www.outbrain.com/'
- regex : 'HubPages.*crawlingpolicy'
name : 'HubPages'
category : 'Crawler'
2022-03-25 12:41:04 +03:00
url : 'https://hubpages.com/help/crawlingpolicy'
2019-09-02 14:29:19 +03:00
producer :
2022-03-25 12:41:04 +03:00
name : 'HubPages, Inc.'
url : 'https://discover.hubpages.com/'
2019-09-02 14:29:19 +03:00
2020-10-14 12:25:18 +03:00
- regex : 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
2019-09-02 14:29:19 +03:00
name : 'Pinterest'
2022-03-25 12:41:04 +03:00
url : 'https://help.pinterest.com/en/business/article/pinterest-crawler'
2019-09-02 14:29:19 +03:00
category : 'Crawler'
producer :
name : 'Pinterest'
2022-03-25 12:41:04 +03:00
url : 'https://www.pinterest.com/'
2019-09-02 14:29:19 +03:00
- regex : 'Site24x7'
name : 'Site24x7 Website Monitoring'
category : 'Site Monitor'
url : 'https://www.site24x7.com/site24x7-faq.html'
producer :
name : 'Site24x7'
url : 'https://www.site24x7.com'
- regex : 's~snapchat-proxy'
name : 'Snapchat Proxy'
category : 'Crawler'
url : 'https://www.snapchat.com'
producer :
name : 'Snapchat Inc.'
url : 'https://www.snapchat.com'
2022-03-25 12:41:04 +03:00
- regex : 'Snap URL Preview Service'
name : 'Snap URL Preview Service'
category : 'Service Agent'
url : 'https://developers.snap.com/robots'
producer :
name : 'Snapchat Inc.'
url : 'https://www.snapchat.com/'
2019-09-02 14:29:19 +03:00
- regex : "Let's Encrypt validation server"
name : "Let's Encrypt Validation"
category : 'Service Agent'
url : 'https://letsencrypt.org/how-it-works/'
producer :
name : "Let's Encrypt"
url : 'https://letsencrypt.org'
- regex : 'GrapeshotCrawler'
name : 'Grapeshot'
category : 'Crawler'
url : 'https://www.grapeshot.com/crawler'
producer :
name : 'Grapeshot'
url : 'https://www.grapeshot.com'
- regex : 'www\.monitor\.us'
name : 'Monitor.Us'
category : 'Site Monitor'
url : 'http://www.monitor.us'
producer :
name : 'Monitor.Us'
url : 'http://www.monitor.us'
2022-03-25 12:41:04 +03:00
- regex : 'Catchpoint'
2019-09-02 14:29:19 +03:00
name : 'Catchpoint'
category : 'Site Monitor'
2022-03-25 12:41:04 +03:00
url : 'https://www.catchpoint.com/'
2019-09-02 14:29:19 +03:00
producer :
name : 'Catchpoint Systems'
2022-03-25 12:41:04 +03:00
url : 'https://www.catchpoint.com/'
2019-09-02 14:29:19 +03:00
- regex : 'bitlybot'
name : 'BitlyBot'
category : 'Crawler'
url : 'https://bitly.com'
producer :
name : 'Bitly, Inc.'
url : 'https://bitly.com'
- regex : 'Zao/'
name : 'Zao'
category : 'Crawler'
- regex : 'lycos'
name : 'Lycos'
- regex : 'Slurp'
name : 'Inktomi Slurp'
- regex : 'Speedy Spider'
name : 'Speedy'
- regex : 'ScoutJet'
name : 'ScoutJet'
- regex : 'nrsbot|netresearch'
name : 'NetResearchServer'
- regex : 'scooter'
name : 'Scooter'
- regex : 'gigabot'
name : 'Gigabot'
- regex : 'charlotte'
name : 'Charlotte'
- regex : 'Pompos'
name : 'Pompos'
- regex : 'ichiro'
name : 'ichiro'
- regex : 'PagePeeker'
name : 'PagePeeker'
- regex : 'WebThumbnail'
name : 'WebThumbnail'
- regex : 'Willow Internet Crawler'
name : 'Willow Internet Crawler'
- regex : 'EmailWolf'
name : 'EmailWolf'
- regex : 'NetLyzer FastProbe'
name : 'NetLyzer FastProbe'
- regex : 'AdMantX.*admantx\.com'
name : 'ADMantX'
- regex : 'Server Density Service Monitoring.*'
name : 'Server Density'
2020-10-14 12:25:18 +03:00
- regex : 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
name : 'RSSRadio Bot'
2022-03-25 12:41:04 +03:00
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(? : Build|Plus))|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise)'
2019-09-02 14:29:19 +03:00
name : 'Generic Bot'
- regex : '^sentry'
name : 'Sentry Bot'
producer :
name : 'Sentry'
url : 'https://sentry.io'
2020-10-14 12:25:18 +03:00
- regex : '^Spotify'
name : 'Spotify'
producer :
name : 'Spotify'
url : 'https://www.spotify.com'
- regex : 'The Knowledge AI'
name : 'The Knowledge AI'
category : 'Crawler'
- regex : 'Embedly'
name : 'Embedly'
category : 'Crawler'
url : 'https://support.embed.ly/hc/en-us'
producer :
name : 'A Medium, Corp.'
url : 'https://medium.com/'
- regex : 'BrandVerity'
name : 'BrandVerity'
category : 'Crawler'
url : 'https://www.brandverity.com/why-is-brandverity-visiting-me'
producer :
name : 'BrandVerity, Inc.'
url : 'https://www.brandverity.com/'
- regex : 'Kaspersky Lab CFR link resolver'
name : 'Kaspersky'
category : 'Security Checker'
url : 'https://www.kaspersky.com/'
producer :
name : 'AO Kaspersky Lab'
url : 'https://www.kaspersky.com/'
- regex : 'eZ Publish Link Validator'
name : 'eZ Publish Link Validator'
category : 'Crawler'
url : 'https://ez.no/'
producer :
name : 'eZ Systems AS'
url : 'https://ez.no/'
- regex : 'woorankreview'
name : 'WooRank'
category : 'Search bot'
url : 'https://www.woorank.com/'
producer :
name : 'WooRank sprl'
url : 'https://www.woorank.com/'
- regex : '(Match|LinkCheck) by Siteimprove.com'
name : 'Siteimprove'
category : 'Search bot'
url : 'https://siteimprove.com/'
producer :
name : 'Siteimprove GmbH'
url : 'https://siteimprove.com/'
- regex : 'CATExplorador'
name : 'CATExplorador'
category : 'Search bot'
url : 'https://fundacio.cat/ca/domini/'
producer :
name : 'Fundació puntCAT'
url : 'https://fundacio.cat/ca/domini/'
- regex : 'Buck'
name : 'Buck'
category : 'Search bot'
url : 'https://hypefactors.com/'
producer :
name : 'Hypefactors A/S'
url : 'https://hypefactors.com/'
- regex : 'tracemyfile'
name : 'TraceMyFile'
category : 'Search bot'
url : 'https://www.tracemyfile.com/'
producer :
name : 'Idee Inc.'
url : 'http://ideeinc.com/'
- regex : 'zelist.ro feed parser'
name : 'Ze List'
url : 'https://www.zelist.ro/'
category : 'Feed Fetcher'
producer :
name : 'Treeworks SRL'
url : 'https://www.tree.ro/'
- regex : 'weborama-fetcher'
name : 'Weborama'
category : 'Search bot'
url : 'https://weborama.com/'
producer :
name : 'Weborama SA'
url : 'https://weborama.com/'
- regex : 'BoardReader Favicon Fetcher'
name : 'BoardReader'
category : 'Search bot'
2022-03-25 12:41:04 +03:00
url : 'https://boardreader.com/'
2020-10-14 12:25:18 +03:00
producer :
name : 'Effyis Inc'
2022-03-25 12:41:04 +03:00
url : 'https://boardreader.com/'
2020-10-14 12:25:18 +03:00
- regex : 'IDG/IT'
name : 'IDG/IT'
category : 'Search bot'
url : 'https://spaziodati.eu/'
producer :
name : 'SpazioDati S.r.l.'
url : 'https://spaziodati.eu/'
- regex : 'Bytespider'
name : 'Bytespider'
category : 'Search bot'
url : 'https://bytedance.com/'
producer :
name : 'ByteDance Ltd.'
url : 'https://bytedance.com/'
- regex : 'WikiDo'
name : 'WikiDo'
category : 'Search bot'
url : 'https://www.wikido.com/'
producer :
name : 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
url : 'https://www.wikido.com/'
- regex : 'AwarioSmartBot'
name : 'Awario'
category : 'Search bot'
url : 'https://awario.com/bots.html'
producer :
name : 'Awario'
url : 'https://awario.com/'
- regex : 'AwarioRssBot'
name : 'Awario'
category : 'Feed Fetcher'
url : 'https://awario.com/bots.html'
producer :
name : 'Awario'
url : 'https://awario.com/'
- regex : 'oBot'
name : 'oBot'
category : 'Search bot'
2022-03-25 12:41:04 +03:00
url : 'https://www.xforce-security.com/crawler/'
2020-10-14 12:25:18 +03:00
producer :
name : 'IBM Germany Research & Development GmbH'
url : 'https://exchange.xforce.ibmcloud.com/'
- regex : 'SMTBot'
name : 'SMTBot'
category : 'Search bot'
url : 'https://www.similartech.com/smtbot'
producer :
name : 'SimilarTech Ltd.'
url : 'https://www.similartech.com/'
- regex : 'LCC'
name : 'LCC'
category : 'Search bot'
url : 'https://corpora.uni-leipzig.de/crawler_faq.html'
producer :
name : 'Universität Leipzig'
url : 'https://www.uni-leipzig.de/'
- regex : 'Startpagina-Linkchecker'
name : 'Startpagina Linkchecker'
category : 'Search bot'
url : 'https://www.startpagina.nl/linkchecker'
producer :
name : 'Startpagina B.V.'
url : 'https://www.startpagina.nl/'
- regex : 'GTmetrix'
name : 'GTmetrix'
category : 'Crawler'
url : 'https://gtmetrix.com/'
producer :
name : 'Carbon60 Operating Co. Ltd.'
url : 'https://www.carbon60.com/'
2019-09-02 14:29:19 +03:00
- regex : 'Nutch'
name : 'Nutch-based Bot'
category : 'Crawler'
url : 'https://nutch.apache.org'
producer :
name : 'The Apache Software Foundation'
2022-03-25 12:41:04 +03:00
url : 'https://www.apache.org/foundation/'
2019-09-02 14:29:19 +03:00
2020-10-14 12:25:18 +03:00
- regex : 'Seobility'
name : 'Seobility'
category : 'Crawler'
url : 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
- regex : 'Vercelbot'
name : 'Vercel Bot'
category : 'Service bot'
url : 'https://vercel.com'
- regex : 'Grammarly'
name : 'Grammarly'
category : 'Service bot'
2022-03-25 12:41:04 +03:00
url : 'https://www.grammarly.com'
2020-10-14 12:25:18 +03:00
- regex : 'Robozilla'
name : 'Robozilla'
category : 'Crawler'
- regex : 'Domains Project'
name : 'Domains Project'
category : 'Crawler'
url : 'https://domainsproject.org'
- regex : 'PetalBot'
name : 'Petal Bot'
category : 'Crawler'
url : 'https://aspiegel.com/petalbot'
- regex : 'SerendeputyBot'
name : 'Serendeputy Bot'
category : 'Crawler'
2022-03-25 12:41:04 +03:00
url : 'https://serendeputy.com/about/serendeputy-bot'
2020-10-14 12:25:18 +03:00
- regex : 'ias-va.*admantx.*service-fetcher'
name : 'ADmantX Service Fetcher'
category : 'Service bot'
url : 'https://www.admantx.com/service-fetcher.html'
- regex : 'SemanticScholarBot'
name : 'Semantic Scholar Bot'
category : 'Crawler'
url : 'https://www.semanticscholar.org/crawler'
- regex : 'VelenPublicWebCrawler'
name : 'Velen Public Web Crawler'
category : 'Crawler'
url : 'https://hunter.io/robot'
- regex : 'Barkrowler'
name : 'Barkrowler'
category : 'Crawler'
url : 'http://www.exensa.com/crawl'
2022-03-25 12:41:04 +03:00
- regex : 'BDCbot'
name : 'BDCbot'
category : 'Crawler'
url : 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
producer :
name : 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
url : 'https://bigdatacorp.com.br/'
- regex : 'adbeat'
name : 'Adbeat'
category : 'Crawler'
url : 'https://www.adbeat.com/operation_policy'
producer :
name : 'PPC Labs LLC'
url : 'https://www.adbeat.com/'
- regex : 'BW/(?:(\d+[\.\d]+))'
name : 'BuiltWith'
category : 'Crawler'
url : 'https://builtwith.com/biup'
producer :
name : 'BuiltWith Pty Ltd'
url : 'https://builtwith.com/'
- regex : 'https://whatis.contentkingapp.com'
name : 'ContentKing'
category : 'Site Monitor'
url : 'https://whatis.contentkingapp.com/'
producer :
name : 'ContentKing BV'
url : 'https://www.contentkingapp.com/'
- regex : 'MicroAdBot'
name : 'MicroAdBot'
category : 'Crawler'
url : 'https://www.microad.co.jp/'
producer :
name : 'MicroAd, Inc.'
url : 'https://www.microad.co.jp/'
- regex : 'PingAdmin.Ru'
name : 'PingAdmin.Ru'
category : 'Site Monitor'
url : 'https://ping-admin.ru/'
- regex : 'notifyninja.+monitoring'
name : 'Notify Ninja'
category : 'Site Monitor'
url : 'http://notifyninja.com'
- regex : 'WebDataStats'
name : 'WebDataStats'
category : 'Crawler'
url : 'https://webdatastats.com/policy.html'
producer :
name : 'WebTehRazrabotka LLC'
url : 'https://webdatastats.com/'
- regex : 'parse.ly scraper'
name : 'parse.ly'
category : 'Crawler'
url : 'https://www.parse.ly/help/integration/crawler'
producer :
name : 'Parsely, Inc.'
url : 'https://www.parse.ly/'
- regex : 'Nimbostratus-Bot'
name : 'Nimbostratus Bot'
category : 'Site Monitor'
url : 'http://cloudsystemnetworks.com'
- regex : 'HeartRails_Capture/\d'
name : 'Heart Rails Capture'
category : 'Service Agent'
url : 'http://capture.heartrails.com'
- regex : 'Project-Resonance'
name : 'Project Resonance'
category : 'Crawler'
url : 'http://project-resonance.com'
- regex : 'DataXu/\d'
name : 'DataXu'
category : 'Service Agent'
url : 'https://advertising.roku.com/dataxu'
producer :
name : 'Roku, Inc.'
url : 'https://roku.com'
- regex : 'Cocolyzebot'
name : 'Cocolyzebot'
category : 'Crawler'
url : 'https://cocolyze.com/en/cocolyzebot'
producer :
name : 'VSI INNOVATION SAS'
url : 'https://vsi-innovation.com/'
- regex : 'veryhip'
name : 'VeryHip'
category : 'Crawler'
url : 'https://veryhip.com/'
producer :
name : 'VeryHip'
url : 'https://veryhip.com/'
- regex : 'LinkpadBot'
name : 'LinkpadBot'
category : 'Crawler'
url : 'https://www.linkpad.org/'
producer :
name : 'Solomono LLC'
url : 'https://www.linkpad.org/'
2020-10-14 12:25:18 +03:00
2022-03-25 12:41:04 +03:00
- regex : 'MuscatFerret'
name : 'MuscatFerret'
category : 'Crawler'
url : 'http://www.webtop.com/'
- regex : 'PageThing.com'
name : 'PageThing'
category : 'Crawler'
url : 'https://www.pagething.com/'
producer :
name : 'SPECIALNOISE LTD'
url : 'https://www.specialnoise.com/'
- regex : 'ArchiveBox'
name : 'ArchiveBox'
url : 'https://archivebox.io/'
category : 'Crawler'
producer :
name : ''
url : ''
- regex : 'Choosito'
name : 'Choosito'
url : 'https://www.choosito.com/'
category : 'Crawler'
producer :
name : 'Choosito! Inc.'
url : 'https://www.choosito.com/'
- regex : 'datagnionbot'
name : 'datagnionbot'
url : 'https://www.datagnion.com/bot.html'
category : 'Crawler'
producer :
name : 'DATAGNION GMBH'
url : 'https://www.datagnion.com/'
- regex : 'WhatCMS'
name : 'WhatCMS'
url : 'https://whatcms.org/'
category : 'Crawler'
producer :
name : 'Nineteen Ten LLC'
url : 'https://whatcms.org/'
- regex : 'httpx'
name : 'httpx'
url : 'https://github.com/projectdiscovery/httpx'
category : 'Crawler'
producer :
name : ''
url : ''
- regex : 'scaninfo@expanseinc.com'
name : 'Expanse'
category : 'Security Checker'
url : 'https://expanse.co/'
producer :
name : 'Expanse Inc.'
url : 'https://expanse.co/'
- regex : 'HuaweiWebCatBot'
name : 'HuaweiWebCatBot'
category : 'Crawler'
url : 'https://isecurity.huawei.com'
producer :
name : 'Huawei Technologies Co., Ltd.'
url : 'https://huawei.com'
- regex : 'Hatena-Favicon'
name : 'Hatena Favicon'
category : 'Crawler'
url : 'https://www.hatena.ne.jp/faq/'
producer :
name : 'Hatena Co., Ltd.'
url : 'https://www.hatena.ne.jp'
- regex : 'RyowlEngine/(\d+)'
name : 'Ryowl'
category : 'Crawler'
url : 'https://ryowl.org'
- regex : 'OdklBot/(\d+)'
name : 'Odnoklassniki Bot'
category : 'Crawler'
url : 'https://odnoklassniki.ru'
- regex : 'Mediatoolkitbot'
name : 'Mediatoolkit Bot'
category : 'Crawler'
url : 'https://mediatoolkit.com'
- regex : 'ZoominfoBot'
name : 'ZoominfoBot'
category : 'Crawler'
url : 'https://www.zoominfo.com'
- regex : 'WeViKaBot/([\d+\.])'
name : 'WeViKaBot'
category : 'Crawler'
url : 'http://www.wevika.de'
- regex : 'SEOkicks'
name : 'SEOkicks'
category : 'Crawler'
url : 'https://www.seokicks.de/robot.html'
- regex : 'Plukkie/([\d+\.])'
name : 'Plukkie'
category : 'Crawler'
url : 'http://www.botje.com/plukkie.htm'
- regex : 'proximic;'
name : 'Comscore'
category : 'Crawler'
url : 'https://www.comscore.com/Web-Crawler'
- regex : 'SurdotlyBot/([\d+\.])'
name : 'SurdotlyBot'
category : 'Crawler'
url : 'http://sur.ly/bot.html'
- regex : 'Gowikibot/([\d+\.])'
name : 'Gowikibot'
category : 'Crawler'
url : 'http:/www.gowikibot.com'
- regex : 'SabsimBot/([\d+\.])'
name : 'SabsimBot'
category : 'Crawler'
url : 'https://sabsim.com'
- regex : 'LumtelBot/([\d+\.])'
name : 'LumtelBot'
category : 'Crawler'
url : 'https://umtel.com'
- regex : 'PiplBot'
name : 'PiplBot'
category : 'Crawler'
url : 'http://www.pipl.com/bot'
- regex : 'woobot/([\d+\.])'
name : 'WooRank'
category : 'Crawler'
url : 'https://www.woorank.com/bot'
- regex : 'Cookiebot/([\d+\.])'
name : 'Cookiebot'
category : 'Crawler'
url : 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
producer :
name : 'Cybot A/S'
url : 'https://www.cybot.com/'
- regex : 'NetSystemsResearch'
name : 'NetSystemsResearch'
category : 'Security Checker'
url : 'https://www.netsystemsresearch.com/'
producer :
name : 'NET SYSTEMS RESEARCH LLC'
url : 'https://www.netsystemsresearch.com/'
- regex : 'CensysInspect/([\d+\.])'
name : 'CensysInspect'
category : 'Security Checker'
url : 'https://about.censys.io/'
producer :
name : 'Censys, Inc.'
url : 'https://censys.io/'
- regex : 'gdnplus.com'
name : 'GDNP'
category : 'Crawler'
url : 'https://gdnplus.com/'
producer :
name : 'Global Digital Network Plus, LLC'
url : 'https://gdnplus.com/'
- regex : 'WellKnownBot/([\d+\.])'
name : 'WellKnownBot'
category : 'Crawler'
url : 'https://well-known.dev'
- regex : 'Adsbot/([\d+\.])'
name : 'Adsbot'
category : 'Crawler'
url : 'https://seostar.co/robot/'
- regex : 'MTRobot/([\d+\.])'
name : 'MTRobot'
category : 'Crawler'
url : 'https://metrics-tools.de/robot.html'
producer :
name : 'Metrics Tools'
url : 'https://metrics-tools.de/'
- regex : 'serpstatbot/([\d+\.])'
name : 'serpstatbot'
category : 'Crawler'
url : 'http://serpstatbot.com/'
producer :
name : 'Netpeak Ltd'
url : 'https://netpeak.net/'
- regex : 'colly'
name : 'colly'
category : 'Crawler'
url : 'https://github.com/gocolly/colly/'
- regex : 'l9tcpid/v([\d+\.])'
name : 'l9tcpid'
category : 'Security Checker'
url : 'https://github.com/LeakIX/l9tcpid'
- regex : 'MegaIndex.ru/([\d+\.])'
name : 'MegaIndex'
category : 'Crawler'
url : 'https://megaindex.com/crawler'
- regex : 'Seekport'
name : 'Seekport'
category : 'Crawler'
url : 'http://www.seekport.com/'
producer :
name : 'SISTRIX GmbH'
url : 'https://www.sistrix.de/'
- regex : 'seolyt/([\d+\.])'
name : 'seolyt'
category : 'Crawler'
url : 'https://seolyt.com/'
- regex : 'YaK/([\d+\.])'
name : 'YaK'
category : 'Crawler'
url : 'https://www.linkfluence.com/'
producer :
name : 'Linkfluence SAS'
url : 'https://www.linkfluence.com/'
- regex : 'KomodiaBot/([\d+\.])'
name : 'KomodiaBot'
category : 'Crawler'
url : 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
producer :
name : 'Komodia Inc.'
url : 'https://www.komodia.com/'
- regex : 'Neevabot/([\d+\.])'
name : 'Neevabot'
category : 'Search bot'
url : 'https://neeva.com/neevabot'
producer :
name : 'Neeva Inc.'
url : 'https://neeva.com/'
- regex : 'LinkPreview/([\d+\.])'
name : 'LinkPreview'
category : 'Service Agent'
url : 'https://www.linkpreview.net/'
- regex : 'JungleKeyThumbnail/([\d+\.])'
name : 'JungleKeyThumbnail'
category : 'Crawler'
url : 'https://junglekey.com/'
- regex: 'rocketmonitor(? : |bot/)([\d+\.])'
name : 'RocketMonitorBot'
category : 'Site Monitor'
url : 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
producer :
name : 'Radio Mast, Inc.'
url : 'https://www.radiomast.io/'
- regex : 'SitemapParser-VIPnytt/([\d+\.])'
name : 'SitemapParser-VIPnytt'
category : 'Crawler'
url : 'https://github.com/VIPnytt/SitemapParser/'
- regex : '^Turnitin'
name : 'Turnitin'
category : 'Crawler'
url : 'https://turnitin.com/robot/crawlerinfo.html'
- regex : 'DMBrowser/\d+|DMBrowser-[UB]V'
name : 'Dotcom Monitor'
category : 'Site Monitor'
url : 'https://www.dotcom-monitor.com'
- regex : 'ThinkChaos/'
name : 'ThinkChaos'
category : 'Crawler'
- regex : 'DataForSeoBot'
name : 'DataForSeoBot'
category : 'Crawler'
url : 'https://dataforseo.com/dataforseo-bot'
- regex : 'Discordbot/([\d+.]+)'
name : 'Discord Bot'
category : 'Service Agent'
url : 'https://discordapp.com'
- regex : 'Linespider/([\d+.]+)'
name : 'Linespider'
category : 'Crawler'
url : 'https://lin.ee/4dwXkTH'
- regex : 'Cincraw/([\d+.]+)'
name : 'Cincraw'
category : 'Crawler'
url : 'http://cincrawdata.net/bot/'
- regex : 'CISPA Web Analyzer'
name : 'CISPA Web Analyzer'
category : 'Crawler'
url : 'https://notify.cispa.de/'
producer :
name : 'CISPA - Helmholtz-Zentrum für Informationssicherheit gGmbH'
url : 'https://cispa.de/en'
- regex : 'IonCrawl'
name : 'IONOS Crawler'
category : 'Crawler'
url : 'https://www.ionos.de/terms-gtc/faq-crawler-en/'
producer :
name : 'IONOS SE'
url : 'https://www.ionos.de/'
- regex : 'Crawldad'
name : 'Crawldad'
category : 'Crawler'
url : 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972'
- regex : 'https://securitytxt-scan.cs.hm.edu/'
name : 'security.txt scanserver'
category : 'Security Checker'
url : 'https://securitytxt-scan.cs.hm.edu/'
producer :
name : 'Hochschule für angewandte Wissenschaften München'
url : 'https://www.hm.edu/'
- regex : 'TigerBot/([\d+.]+)'
name : 'TigerBot'
category : 'Crawler'
url : 'https://tiger.ch/'
- regex : 'TestCrawler/([\d+.]+)'
name : 'TestCrawler'
category : 'Crawler'
url : 'https://www.comcepta.com/'
- regex : 'CrowdTanglebot/([\d+.]+)'
name : 'CrowdTangle'
category : 'Crawler'
url : 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
producer :
name : 'CrowdTangle, Inc.'
url : 'https://www.crowdtangle.com/'
- regex : 'Sellers.Guide Crawler by Primis'
name : 'Sellers.Guide'
category : 'Crawler'
url : 'https://sellers.guide/'
producer :
name : 'McCann Disciplines, Ltd.'
url : 'https://www.primis.tech/'
- regex : 'OnalyticaBot'
name : 'Onalytica'
category : 'Crawler'
url : 'https://www.airslate.com/bot/explore/onalytica-bot'
producer :
name : 'airSlate, Inc.'
url : 'https://www.airslate.com/'
- regex : 'deepnoc'
name : 'deepnoc'
category : 'Crawler'
url : 'https://deepnoc.com/bot'
producer :
name : 'deepnoc, GmbH'
url : 'https://deepnoc.com/'
- regex : 'Newslitbot/([\d+.]+)'
name : 'Newslitbot'
category : 'Crawler'
url : 'https://www.newslit.co/'
producer :
name : 'Newslit, LLC.'
url : 'https://www.newslit.co/'
# Generic detections
- regex : '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
2019-09-02 14:29:19 +03:00
name : 'Generic Bot'