1###############
2# Device Detector - The Universal Device Detection library for parsing User Agents
3#
4# @link https://matomo.org
5# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6###############
7
8- regex: 'WireReaderBot'
9  name: 'WireReaderBot'
10  category: 'Feed Fetcher'
11  url: 'https://wirereader.app/'
12
13- regex: 'monitoring360bot'
14  name: '360 Monitoring'
15  category: 'Site Monitor'
16  url: 'https://www.360monitoring.io'
17  producer:
18    name: 'Plesk International GmbH'
19    url: 'https://www.plesk.com'
20
21- regex: 'Cloudflare-Healthchecks'
22  name: 'Cloudflare Health Checks'
23  category: 'Service Agent'
24  url: 'https://developers.cloudflare.com/health-checks/'
25  producer:
26    name: 'CloudFlare'
27    url: 'https://www.cloudflare.com/'
28
29- regex: '360Spider'
30  name: '360Spider'
31  category: 'Search bot'
32  url: 'https://www.so.com/help/help_3_2.html'
33  producer:
34    name: 'Online Media Group, Inc.'
35    url: ''
36
37- regex: 'Aboundex'
38  name: 'Aboundexbot'
39  category: 'Search bot'
40  url: 'http://www.aboundex.com/crawler/'
41  producer:
42    name: 'Aboundex.com'
43    url: 'http://www.aboundex.com'
44
45- regex: 'AcoonBot'
46  name: 'Acoon'
47  category: 'Search bot'
48  url: 'http://www.acoon.de/robot.asp'
49  producer:
50    name: 'Acoon GmbH'
51    url: 'http://www.acoon.de'
52
53- regex: 'AddThis\.com'
54  name: 'AddThis.com'
55  category: 'Social Media Agent'
56  url: ''
57  producer:
58    name: 'Clearspring Technologies, Inc.'
59    url: 'http://www.clearspring.com'
60
61- regex: 'AhrefsBot'
62  name: 'aHrefs Bot'
63  category: 'Crawler'
64  url: 'https://ahrefs.com/robot'
65  producer:
66    name: 'Ahrefs Pte Ltd'
67    url: 'https://ahrefs.com/robot'
68
69- regex: 'AhrefsSiteAudit'
70  name: 'AhrefsSiteAudit'
71  category: 'Site Monitor'
72  url: 'https://ahrefs.com/robot/site-audit'
73  producer:
74    name: 'Ahrefs Pte Ltd'
75    url: 'https://ahrefs.com/'
76
77- regex: 'ia_archiver|alexabot|verifybot'
78  name: 'Alexa Crawler'
79  category: 'Search bot'
80  url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
81  producer:
82    name: 'Alexa Internet'
83    url: 'https://www.alexa.com'
84
85- regex: 'alexa site audit'
86  name: 'Alexa Site Audit'
87  category: 'Site Monitor'
88  url: 'https://support.alexa.com/hc/en-us/articles/200450194'
89  producer:
90    name: 'Alexa Internet'
91    url: 'https://www.alexa.com'
92
93- regex: 'Amazonbot'
94  name: 'Amazon Bot'
95  category: 'Crawler'
96  url: 'https://developer.amazon.com/support/amazonbot'
97  producer:
98    name: 'Amazon.com, Inc.'
99    url: 'https://www.amazon.com/'
100
101- regex: 'AmazonAdBot'
102  name: 'Amazon AdBot'
103  category: 'Crawler'
104  url: 'https://adbot.amazon.com/'
105  producer:
106    name: 'Amazon.com, Inc.'
107    url: 'https://www.amazon.com/'
108
109- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
110  name: 'Amazon Route53 Health Check'
111  category: 'Service Agent'
112  producer:
113    name: 'Amazon Web Services'
114    url: 'https://aws.amazon.com/'
115
116- regex: 'AmorankSpider'
117  name: 'Amorank Spider'
118  category: 'Crawler'
119  url: 'http://amorank.com/webcrawler.html'
120  producer:
121    name: 'Amorank'
122    url: 'http://www.amorank.com'
123
124- regex: 'ApacheBench'
125  name: 'ApacheBench'
126  category: 'Benchmark'
127  url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
128  producer:
129    name: 'The Apache Software Foundation'
130    url: 'https://www.apache.org/foundation/'
131
132- regex: 'Applebot'
133  name: 'Applebot'
134  category: 'Crawler'
135  url: 'https://support.apple.com/en-us/119829'
136  producer:
137    name: 'Apple Inc'
138    url: 'https://www.apple.com/'
139
140- regex: 'iTMS'
141  name: 'iTMS'
142  category: 'Crawler'
143  url: 'https://support.apple.com/en-us/119829'
144  producer:
145    name: 'Apple Inc'
146    url: 'https://www.apple.com/'
147
148- regex: 'AppSignalBot'
149  name: 'AppSignalBot'
150  category: 'Site Monitor'
151  url: 'https://docs.appsignal.com/uptime-monitoring/'
152  producer:
153    name: 'AppSignal'
154    url: 'https://appsignal.com/'
155
156- regex: 'Arachni'
157  name: 'Arachni'
158  category: 'Security Checker'
159  url: 'https://www.arachni-scanner.com/'
160  producer:
161    name: 'Sarosys LLC'
162    url: 'https://www.sarosys.com/'
163
164- regex: 'AspiegelBot'
165  name: 'AspiegelBot'
166  category: 'Crawler'
167  url: 'https://aspiegel.com/'
168  producer:
169    name: 'Huawei'
170    url: 'https://www.huawei.com/'
171
172- regex: 'Castro 2, Episode Duration Lookup'
173  name: 'Castro 2'
174  category: 'Service Agent'
175  url: 'http://supertop.co/castro/'
176  producer:
177    name: 'Supertop'
178    url: 'http://supertop.co'
179
180- regex: 'Curious George'
181  name: 'Analytics SEO Crawler'
182  category: 'Crawler'
183  url: 'http://www.analyticsseo.com/crawler'
184  producer:
185    name: 'Analytics SEO'
186    url: 'http://www.analyticsseo.com'
187
188- regex: 'archive\.org_bot|special_archiver'
189  name: 'archive.org bot'
190  category: 'Crawler'
191  url: 'https://archive.org/details/archive.org_bot'
192  producer:
193    name: 'The Internet Archive'
194    url: 'https://archive.org'
195
196- regex: 'Ask Jeeves/Teoma'
197  name: 'Ask Jeeves'
198  category: 'Search bot'
199  url: ''
200  producer:
201    name: 'Ask Jeeves Inc.'
202    url: 'http://www.ask.com'
203
204- regex: 'Backlink-Check\.de'
205  name: 'Backlink-Check.de'
206  category: 'Crawler'
207  url: 'http://www.backlink-check.de/bot.html'
208  producer:
209    name: 'Mediagreen Medienservice'
210    url: 'http://www.backlink-check.de'
211
212- regex: 'BacklinkCrawler'
213  name: 'BacklinkCrawler'
214  category: 'Crawler'
215  url: 'http://www.backlinktest.com/crawler.html'
216  producer:
217    name: '2.0Promotion GbR'
218    url: 'http://www.backlinktest.com'
219
220- regex: 'Baidu.*spider|baidu Transcoder'
221  name: 'Baidu Spider'
222  category: 'Search bot'
223  url: 'http://www.baidu.com/search/spider.htm'
224  producer:
225    name: 'Baidu'
226    url: 'http://www.baidu.com'
227
228- regex: 'BazQux'
229  name: 'BazQux Reader'
230  url: 'https://bazqux.com/fetcher'
231  category: 'Feed Fetcher'
232  producer:
233    name: ''
234    url: ''
235
236- regex: 'Better Uptime Bot'
237  name: 'Better Uptime Bot'
238  category: 'Site Monitor'
239  url: 'https://betteruptime.com/faq'
240  producer:
241    name: 'Better Uptime'
242    url: 'https://betteruptime.com/'
243
244- regex: 'MSNBot|msrbot|bingbot|bingadsbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
245  name: 'BingBot'
246  category: 'Search bot'
247  url: 'http://search.msn.com/msnbot.htmn'
248  producer:
249    name: 'Microsoft Corporation'
250    url: 'http://www.microsoft.com'
251
252- regex: 'Blackbox Exporter'
253  name: 'Blackbox Exporter'
254  category: 'Site Monitor'
255  url: 'https://github.com/prometheus/blackbox_exporter'
256  producer:
257    name: 'Prometheus'
258    url: 'https://prometheus.io/'
259
260- regex: 'Blekkobot'
261  name: 'Blekkobot'
262  category: 'Search bot'
263  url: 'http://blekko.com/about/blekkobot'
264  producer:
265    name: 'Blekko'
266    url: 'http://blekko.com'
267
268- regex: 'BLEXBot'
269  name: 'BLEXBot Crawler'
270  category: 'Crawler'
271  url: 'http://webmeup-crawler.com'
272  producer:
273    name: 'WebMeUp'
274    url: 'http://webmeup.com'
275
276- regex: 'Bloglovin'
277  name: 'Bloglovin'
278  url: 'http://www.bloglovin.com'
279  category: 'Feed Fetcher'
280  producer:
281    name: ''
282    url: ''
283
284- regex: 'Blogtrottr'
285  name: 'Blogtrottr'
286  url: ''
287  category: 'Feed Fetcher'
288  producer:
289    name: 'Blogtrottr Ltd'
290    url: 'https://blogtrottr.com/'
291
292- regex: 'BoardReader Blog Indexer'
293  name: 'BoardReader Blog Indexer'
294  category: 'Crawler'
295  producer:
296    name: 'BoardReader'
297    url: 'https://boardreader.com/'
298
299- regex: 'BountiiBot'
300  name: 'Bountii Bot'
301  category: 'Search bot'
302  url: 'http://bountii.com/contact.php'
303  producer:
304    name: 'Bountii Inc.'
305    url: 'http://bountii.com'
306
307- regex: 'Browsershots'
308  name: 'Browsershots'
309  category: 'Service Agent'
310  url: 'http://browsershots.org/faq'
311  producer:
312    name: 'Browsershots.org'
313    url: 'http://browsershots.org'
314
315- regex: 'BUbiNG'
316  name: 'BUbiNG'
317  category: 'Crawler'
318  url: 'http://law.di.unimi.it/BUbiNG.html'
319  producer:
320    name: 'The Laboratory for Web Algorithmics (LAW)'
321    url: 'http://law.di.unimi.it/software.php#buging'
322
323- regex: '(?<!HTC)[ _]Butterfly/'
324  name: 'Butterfly Robot'
325  category: 'Search bot'
326  url: 'http://labs.topsy.com/butterfly'
327  producer:
328    name: 'Topsy Labs'
329    url: 'http://labs.topsy.com'
330
331- regex: 'CareerBot'
332  name: 'CareerBot'
333  category: 'Crawler'
334  url: 'http://www.career-x.de/bot.html'
335  producer:
336    name: 'career-x GmbH'
337    url: 'http://www.career-x.de'
338
339- regex: 'CCBot'
340  name: 'ccBot crawler'
341  category: 'Crawler'
342  url: 'http://commoncrawl.org/faq/'
343  producer:
344    name: 'reddit inc.'
345    url: 'http://www.reddit.com'
346
347- regex: 'Cliqzbot'
348  name: 'Cliqzbot'
349  category: 'Crawler'
350  url: 'http://cliqz.com/company/cliqzbot'
351  producer:
352    name: '10betterpages GmbH'
353    url: 'http://cliqz.com'
354
355- regex: 'Cloudflare-AMP'
356  name: 'CloudFlare AMP Fetcher'
357  category: 'Crawler'
358  url: 'https://amp.cloudflare.com/doc/fetcher.html'
359  producer:
360    name: 'CloudFlare'
361    url: 'http://www.cloudflare.com'
362
363- regex: 'Cloudflare-?Diagnostics'
364  name: 'Cloudflare Diagnostics'
365  category: 'Site Monitor'
366  url: 'https://www.cloudflare.com/'
367  producer:
368    name: 'Cloudflare'
369    url: 'https://www.cloudflare.com/'
370
371- regex: 'CloudFlare-AlwaysOnline'
372  name: 'CloudFlare Always Online'
373  category: 'Site Monitor'
374  url: 'https://www.cloudflare.com/always-online'
375  producer:
376    name: 'CloudFlare'
377    url: 'https://www.cloudflare.com/'
378
379- regex: 'Cloudflare-SSLDetector'
380  name: 'Cloudflare SSL Detector'
381  category: 'Site Monitor'
382  url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
383  producer:
384    name: 'CloudFlare'
385    url: 'https://www.cloudflare.com/'
386
387- regex: 'Cloudflare Custom Hostname Verification'
388  name: 'Cloudflare Custom Hostname Verification'
389  category: 'Service Agent'
390  url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
391  producer:
392    name: 'CloudFlare'
393    url: 'https://www.cloudflare.com/'
394
395- regex: 'Cloudflare-Traffic-Manager'
396  name: 'Cloudflare Traffic Manager'
397  category: 'Site Monitor'
398  url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
399  producer:
400    name: 'CloudFlare'
401    url: 'https://www.cloudflare.com/'
402
403- regex: 'Cloudflare-Smart-Transit'
404  name: 'Cloudflare Smart Transit'
405  category: 'Site Monitor'
406  url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
407  producer:
408    name: 'CloudFlare'
409    url: 'https://www.cloudflare.com/'
410
411- regex: 'CloudflareObservatory'
412  name: 'Cloudflare Observatory'
413  category: 'Site Monitor'
414  url: 'https://developers.cloudflare.com/speed/speed-test/run-speed-test'
415  producer:
416    name: 'CloudFlare'
417    url: 'https://www.cloudflare.com/'
418
419- regex: 'https://developers\.cloudflare\.com/security-center/'
420  name: 'Cloudflare Security Insights'
421  category: 'Site Monitor'
422  url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
423  producer:
424    name: 'CloudFlare'
425    url: 'https://www.cloudflare.com/'
426
427- regex: 'coccoc\.com'
428  name: 'Cốc Cốc Bot'
429  url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
430  category: 'Search bot'
431  producer:
432    name: 'Cốc Cốc'
433    url: 'https://coccoc.com/'
434
435- regex: 'collectd'
436  name: 'Collectd'
437  url: 'https://collectd.org/'
438  category: 'Site Monitor'
439  producer:
440    name: 'Collectd'
441    url: 'https://collectd.org/'
442
443- regex: 'CommaFeed'
444  name: 'CommaFeed'
445  url: 'http://www.commafeed.com'
446  category: 'Feed Fetcher'
447  producer:
448    name: ''
449    url: ''
450
451- regex: 'CSS Certificate Spider'
452  name: 'CSS Certificate Spider'
453  category: 'Crawler'
454  url: 'http://www.css-security.com/certificatespider/'
455  producer:
456    name: 'Certified Security Solutions'
457    url: 'https://www.css-security.com/company/about-us/'
458
459- regex: 'Datadog Agent|Datadog/?Synthetics'
460  name: 'Datadog Agent'
461  url: 'https://github.com/DataDog/dd-agent'
462  category: 'Site Monitor'
463  producer:
464    name: 'Datadog'
465    url: 'https://www.datadoghq.com/'
466
467- regex: 'Datanyze'
468  name: 'Datanyze'
469  url: ''
470  category: 'Crawler'
471  producer:
472    name: 'Datanyze'
473    url: 'https://www.datanyze.com'
474
475- regex: 'Dataprovider'
476  name: 'Dataprovider'
477  category: 'Crawler'
478  url: ''
479  producer:
480    name: 'Dataprovider B.V.'
481    url: 'https://www.dataprovider.com/'
482
483- regex: 'Daum(?!(?:Apps|Device))'
484  name: 'Daum'
485  category: 'Search bot'
486  url: 'http://tab.search.daum.net/aboutWebSearch_en.html'
487  producer:
488    name: 'Daum Communications Corp.'
489    url: 'http://www.kakaocorp.com/main'
490
491- regex: 'Dazoobot'
492  name: 'Dazoobot'
493  category: 'Search bot'
494  url: ''
495  producer:
496    name: 'DAZOO.FR'
497    url: 'http://dazoo.fr'
498
499- regex: 'discobot'
500  name: 'Discobot'
501  category: 'Search bot'
502  url: 'http://discoveryengine.com/discobot.html'
503  producer:
504    name: 'Discovery Engine'
505    url: 'http://discoveryengine.com'
506
507- regex: 'Domain Re-Animator Bot|support@domainreanimator\.com'
508  name: 'Domain Re-Animator Bot'
509  category: 'Crawler'
510  url: ''
511  producer:
512    name: 'Domain Re-Animator, LLC'
513    url: 'http://domainreanimator.com'
514
515- regex: 'DotBot'
516  name: 'DotBot'
517  category: 'Crawler'
518  url: 'http://www.opensiteexplorer.org/dotbot'
519  producer:
520    name: 'SEOmoz, Inc.'
521    url: 'http://moz.com/'
522
523- regex: 'DuckDuck(?:Go-Favicons-)?Bot'
524  name: 'DuckDuckBot'
525  category: 'Search bot'
526  url: 'https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/'
527  producer:
528    name: 'DuckDuckGo'
529    url: 'https://duckduckgo.com/'
530
531- regex: 'DuckAssistBot'
532  name: 'DuckAssistBot'
533  category: 'Search bot'
534  url: 'https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot/'
535  producer:
536    name: 'DuckDuckGo'
537    url: 'https://duckduckgo.com/'
538
539- regex: 'EasouSpider'
540  name: 'Easou Spider'
541  category: 'Search bot'
542  url: 'http://www.easou.com/search/spider.html'
543  producer:
544    name: 'easou ICP'
545    url: 'http://www.easou.com'
546
547- regex: 'eCairn-Grabber'
548  name: 'eCairn-Grabber'
549  category: 'Crawler'
550  producer:
551    name: 'eCairn'
552    url: 'https://ecairn.com'
553
554- regex: 'EMail Exractor'
555  name: 'EMail Exractor'
556  category: 'Crawler'
557  url: ''
558  producer:
559    name: ''
560    url: ''
561
562- regex: 'evc-batch'
563  name: 'evc-batch'
564  category: 'Crawler'
565  url: ''
566  producer:
567    name: 'eVenture Capital Partners II, LLC'
568    url: 'http://www.eventures.vc/'
569
570- regex: 'Exabot|ExaleadCloudview'
571  name: 'ExaBot'
572  category: 'Crawler'
573  url: 'http://www.exabot.com/go/robot'
574  producer:
575    name: 'Dassault Systèmes'
576    url: 'http://www.3ds.com'
577
578- regex: 'ExactSeek Crawler'
579  name: 'ExactSeek Crawler'
580  category: 'Search bot'
581  url: 'http://www.exactseek.com'
582  producer:
583    name: 'Jayde Online, Inc.'
584    url: 'http://www.jaydeonlineinc.com'
585
586- regex: 'Ezooms'
587  name: 'Ezooms'
588  category: 'Crawler'
589  url: ''
590  producer:
591    name: 'SEOmoz, Inc.'
592    url: 'http://moz.com/'
593
594- regex: 'facebook(?:catalog|externalhit|externalua|platform|scraper)'
595  name: 'Facebook Crawler'
596  category: 'Social Media Agent'
597  url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers'
598  producer:
599    name: 'Meta Platforms, Inc.'
600    url: 'https://www.meta.com/'
601
602- regex: 'meta-externalagent'
603  name: 'Meta-ExternalAgent'
604  category: 'Crawler'
605  url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers'
606  producer:
607    name: 'Meta Platforms, Inc.'
608    url: 'https://www.meta.com/'
609
610- regex: 'meta-externalfetcher'
611  name: 'Meta-ExternalFetcher'
612  category: 'Social Media Agent'
613  url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers'
614  producer:
615    name: 'Meta Platforms, Inc.'
616    url: 'https://www.meta.com/'
617
618- regex: 'FacebookBot'
619  name: 'FacebookBot'
620  category: 'Crawler'
621  url: 'https://developers.facebook.com/docs/sharing/bot'
622  producer:
623    name: 'Meta Platforms, Inc.'
624    url: 'https://www.meta.com/'
625
626- regex: 'Feedbin'
627  name: 'Feedbin'
628  url: 'http://feedbin.com/'
629  category: 'Feed Fetcher'
630  producer:
631    name: ''
632    url: ''
633
634- regex: 'FeedBurner'
635  name: 'FeedBurner'
636  url: 'http://www.feedburner.com'
637  category: 'Feed Fetcher'
638  producer:
639    name: ''
640    url: ''
641
642- regex: 'Feed Wrangler'
643  name: 'Feed Wrangler'
644  url: 'https://feedwrangler.net/'
645  category: 'Feed Fetcher'
646  producer:
647    name: 'David Smith & Developing Perspective, LLC'
648    url: 'https://david-smith.org'
649
650- regex: 'Feedly'
651  name: 'Feedly'
652  url: 'http://www.feedly.com'
653  category: 'Feed Fetcher'
654  producer:
655    name: ''
656    url: ''
657
658- regex: 'Feedspot'
659  name: 'Feedspot'
660  url: 'http://www.feedspot.com'
661  category: 'Feed Fetcher'
662  producer:
663    name: ''
664    url: ''
665
666- regex: 'Fever/'
667  name: 'Fever'
668  url: 'http://feedafever.com/'
669  category: 'Feed Fetcher'
670  producer:
671    name: ''
672    url: ''
673
674- regex: 'FlipboardProxy|FlipboardRSS'
675  name: 'Flipboard'
676  url: 'http://flipboard.com/browserproxy'
677  category: 'Feed Fetcher'
678  producer:
679    name: 'Flipboard'
680    url: 'http://flipboard.com/'
681
682- regex: 'Findxbot'
683  name: 'Findxbot'
684  category: 'Crawler'
685  url: 'http://www.findxbot.com'
686
687- regex: 'FreshRSS'
688  name: 'FreshRSS'
689  category: 'Feed Fetcher'
690  url: 'https://freshrss.org/'
691
692- regex: 'Genieo'
693  name: 'Genieo Web filter'
694  category: ''
695  url: 'http://www.genieo.com/webfilter.html'
696  producer:
697    name: 'Genieo'
698    url: 'http://www.genieo.com'
699
700- regex: 'GigablastOpenSource'
701  name: 'Gigablast'
702  category: 'Search bot'
703  url: 'https://github.com/gigablast/open-source-search-engine'
704  producer:
705    name: 'Matt Wells'
706    url: 'http://www.gigablast.com/faq.html'
707
708- regex: 'Gluten Free Crawler'
709  name: 'Gluten Free Crawler'
710  category: 'Crawler'
711  url: 'http://glutenfreepleasure.com/'
712  producer:
713    name: ''
714    url: ''
715
716- regex: 'gobuster'
717  name: 'Gobuster'
718  url: 'https://github.com/OJ/gobuster'
719
720- regex: 'ichiro/mobile goo'
721  name: 'Goo'
722  category: 'Search bot'
723  url: 'http://search.goo.ne.jp/option/use/sub4/sub4-1'
724  producer:
725    name: 'NTT Resonant'
726    url: 'http://goo.ne.jp'
727
728- regex: 'Storebot-Google'
729  name: 'Google StoreBot'
730  category: 'Crawler'
731
732- regex: 'Google Favicon'
733  name: 'Google Favicon'
734  category: 'Crawler'
735
736- regex: 'Google Search Console'
737  name: 'Google Search Console'
738  category: 'Crawler'
739  url: 'https://search.google.com/search-console/about'
740  producer:
741    name: 'Google Inc.'
742    url: 'https://www.google.com/'
743
744- regex: 'Google Page Speed Insights'
745  name: 'Google PageSpeed Insights'
746  category: 'Site Monitor'
747  url: 'http://developers.google.com/speed/pagespeed/insights/'
748  producer:
749    name: 'Google Inc.'
750    url: 'https://www.google.com/'
751
752- regex: 'google_partner_monitoring'
753  name: 'Google Partner Monitoring'
754  category: 'Site Monitor'
755  url: ''
756  producer:
757    name: 'Google Inc.'
758    url: 'https://www.google.com/'
759
760- regex: 'Google-Cloud-Scheduler'
761  name: 'Google Cloud Scheduler'
762  category: 'Crawler'
763  url: 'https://cloud.google.com/scheduler'
764  producer:
765    name: 'Google Inc.'
766    url: 'https://www.google.com'
767
768- regex: 'Google-Structured-Data-Testing-Tool'
769  name: 'Google Structured Data Testing Tool'
770  category: 'Validator'
771  url: 'https://search.google.com/structured-data/testing-tool'
772  producer:
773    name: 'Google Inc.'
774    url: 'https://www.google.com/'
775
776- regex: 'GoogleStackdriverMonitoring'
777  name: 'Google Stackdriver Monitoring'
778  category: 'Site Monitor'
779  url: 'https://cloud.google.com/monitoring'
780  producer:
781    name: 'Google Inc.'
782    url: 'https://www.google.com'
783
784- regex: 'Google-Transparency-Report'
785  name: 'Google Transparency Report'
786  category: 'Site Monitor'
787  url: 'https://transparencyreport.google.com/'
788  producer:
789    name: 'Google Inc.'
790    url: 'https://www.google.com/'
791
792- regex: 'Google-CloudVertexBot'
793  name: 'Google-CloudVertexBot'
794  category: 'Crawler'
795  url: 'https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers#google-cloudvertexbot'
796  producer:
797    name: 'Google Inc.'
798    url: 'https://www.google.com/'
799
800- regex: 'via ggpht\.com GoogleImageProxy'
801  name: 'Gmail Image Proxy'
802  category: 'Crawler'
803  url: ''
804  producer:
805    name: 'Google Inc.'
806    url: 'https://www.google.com/'
807
808- regex: 'Google-Document-Conversion'
809  name: 'Google-Document-Conversion'
810  category: 'Service Agent'
811  url: 'https://support.google.com/drive/answer/176692?hl=en'
812  producer:
813    name: 'Google Inc.'
814    url: 'https://www.google.com/'
815
816- regex: 'GoogleDocs; apps-spreadsheets'
817  name: 'Google Sheets'
818  category: 'Service Agent'
819  url: 'https://workspace.google.com/products/sheets/'
820  producer:
821    name: 'Google Inc.'
822    url: 'https://www.google.com/'
823
824- regex: 'GoogleDocs; apps-presentations'
825  name: 'Google Slides'
826  category: 'Service Agent'
827  url: 'https://workspace.google.com/products/slides/'
828  producer:
829    name: 'Google Inc.'
830    url: 'https://www.google.com/'
831
832- regex: 'GoogleDocs;'
833  name: 'Google Docs'
834  category: 'Service Agent'
835  url: 'https://docs.google.com/'
836  producer:
837    name: 'Google Inc.'
838    url: 'https://www.google.com/'
839
840- regex: 'SeznamEmailProxy'
841  name: 'Seznam Email Proxy'
842  category: 'Crawler'
843  url: ''
844  producer:
845    name: 'Seznam.cz, a.s.'
846    url: 'http://www.seznam.cz/'
847
848- regex: 'Seznam-Zbozi-robot'
849  name: 'Seznam Zbozi.cz'
850  category: 'Crawler'
851  url: ''
852  producer:
853    name: 'Seznam.cz, a.s.'
854    url: 'https://www.zbozi.cz/'
855
856- regex: 'Heurekabot-Feed'
857  name: 'Heureka Feed'
858  category: 'Crawler'
859  url: 'https://sluzby.heureka.cz/napoveda/heurekabot/'
860  producer:
861    name: 'Heureka.cz, a.s.'
862    url: 'https://www.heureka.cz/'
863
864- regex: 'ShopAlike'
865  name: 'ShopAlike'
866  category: 'Crawler'
867  url: ''
868  producer:
869    name: 'Visual Meta'
870    url: 'https://www.shopalike.cz/'
871
872- regex: 'deepcrawl\.com'
873  name: 'Lumar'
874  category: 'Crawler'
875  url: 'https://deepcrawl.com/bot'
876  producer:
877    name: 'Lumar'
878    url: 'https://www.lumar.io/'
879
880- regex: 'Googlebot-News'
881  name: 'Googlebot News'
882  category: 'Search bot'
883  url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
884  producer:
885    name: 'Google Inc.'
886    url: 'https://www.google.com/'
887
888- regex: 'Adwords-(?:DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(?:adstxt|Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Shopping-Quality|Site-Verification|Sites-Thumbnails|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer|Sites)|Google.*/\+/web/snippet'
889  name: 'Googlebot'
890  category: 'Search bot'
891  url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
892  producer:
893    name: 'Google Inc.'
894    url: 'https://www.google.com/'
895
896- regex: '^Google$'
897  name: 'Googlebot'
898  category: 'Search bot'
899  url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
900  producer:
901    name: 'Google Inc.'
902    url: 'https://www.google.com/'
903
904- regex: 'Google-Safety'
905  name: 'Google-Safety'
906  category: 'Crawler'
907  url: 'https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers'
908  producer:
909    name: 'Google Inc.'
910    url: 'https://www.google.com/'
911
912- regex: 'DuplexWeb-Google'
913  name: 'DuplexWeb-Google'
914  category: 'Crawler'
915  url: 'https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers'
916  producer:
917    name: 'Google Inc.'
918    url: 'https://www.google.com/'
919
920- regex: 'Google-Area120-PrivacyPolicyFetcher'
921  name: 'Google Area 120 Privacy Policy Fetcher'
922  category: 'Crawler'
923  url: 'https://area120.google.com/'
924  producer:
925    name: 'Google Inc.'
926    url: 'https://www.google.com/'
927
928- regex: 'HubSpot '
929  name: 'HubSpot'
930  category: 'Crawler'
931  producer:
932    name: 'HubSpot Inc.'
933    url: 'https://www.hubspot.com'
934
935- regex: 'vuhuv(?:Bot|RBT)'
936  name: 'vuhuvBot'
937  category: 'Search bot'
938  url: 'https://vuhuv.com/bot.html'
939
940- regex: 'HTTPMon'
941  name: 'HTTPMon'
942  category: 'Site Monitor'
943  url: 'http://www.httpmon.com'
944  producer:
945    name: 'towards GmbH'
946    url: 'http://www.towards.ch/'
947
948- regex: 'ICC-Crawler'
949  name: 'ICC-Crawler'
950  category: 'Crawler'
951  url: 'http://www.nict.go.jp/en/univ-com/plan/crawl.html'
952  producer:
953    name: ''
954    url: ''
955
956- regex: 'inoreader\.com'
957  name: 'inoreader'
958  category: 'Feed Reader'
959  url: 'https://www.inoreader.com'
960
961- regex: 'iisbot'
962  name: 'IIS Site Analysis'
963  category: 'Crawler'
964  url: 'http://www.iis.net/iisbot.html'
965  producer:
966    name: 'Microsoft Corporation'
967    url: 'http://www.microsoft.com'
968
969- regex: 'ips-agent'
970  name: 'IPS Agent'
971  category: 'Crawler'
972  producer:
973    name: 'VeriSign, Inc'
974    url: 'http://www.verisign.com/'
975
976- regex: 'IP-Guide\.com'
977  name: 'IP-Guide Crawler'
978  category: 'Crawler'
979  url: ''
980  producer:
981    name: ''
982    url: 'https://ip-guide.com'
983
984- regex: 'k6/'
985  name: 'K6'
986  url: 'https://k6.io/'
987
988- regex: 'kouio'
989  name: 'Kouio'
990  url: 'http://kouio.com/'
991  category: 'Feed Fetcher'
992  producer:
993    name: ''
994    url: ''
995
996- regex: 'larbin'
997  name: 'Larbin web crawler'
998  category: 'Crawler'
999  url: 'http://larbin.sourceforge.net'
1000  producer:
1001    name: ''
1002    url: ''
1003
1004- regex: '[A-z0-9]*-Lighthouse'
1005  name: 'Lighthouse'
1006  category: 'Site Monitor'
1007  url: 'https://developers.google.com/web/tools/lighthouse'
1008  producer:
1009    name: 'Lighthouse'
1010    url: 'https://developers.google.com/web/tools/lighthouse'
1011
1012- regex: 'last-modified\.com'
1013  name: 'LastMod Bot'
1014  category: 'Site Monitor'
1015  url: 'https://last-modified.com/en/about'
1016  producer:
1017    name: ''
1018    url: 'https://last-modified.com/en'
1019
1020- regex: 'linkdexbot|linkdex\.com'
1021  name: 'Linkdex Bot'
1022  category: 'Search bot'
1023  url: 'http://www.linkdex.com/bots'
1024  producer:
1025    name: 'Mojeek Ltd.'
1026    url: 'http://www.mojeek.com'
1027
1028- regex: 'LinkedInBot'
1029  name: 'LinkedIn Bot'
1030  category: 'Social Media Agent'
1031  url: 'http://www.linkedin.com'
1032  producer:
1033    name: 'LinkedIn'
1034    url: 'http://www.linkedin.com'
1035
1036- regex: 'ltx71'
1037  name: 'LTX71'
1038  category: 'Security Checker'
1039  url: 'https://ltx71.com/'
1040  producer:
1041    name: ''
1042    url: ''
1043
1044- regex: 'Mail\.RU'
1045  name: 'Mail.Ru Bot'
1046  category: 'Search bot'
1047  url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
1048  producer:
1049    name: 'Mail.Ru Group'
1050    url: 'http://corp.mail.ru'
1051
1052- regex: 'magpie-crawler'
1053  name: 'Magpie-Crawler'
1054  category: 'Social Media Agent'
1055  url: 'http://www.brandwatch.com/magpie-crawler/'
1056  producer:
1057    name: 'Brandwatch'
1058    url: 'http://www.brandwatch.com'
1059
1060- regex: 'MagpieRSS'
1061  name: 'MagpieRSS'
1062  url: 'http://magpierss.sourceforge.net/'
1063  category: 'Feed Parser'
1064  producer:
1065    name: ''
1066    url: ''
1067
1068- regex: 'masscan-ng'
1069  name: 'masscan-ng'
1070  url: 'https://github.com/bi-zone/masscan-ng'
1071  category: 'Crawler'
1072  producer:
1073    name: 'BIZON, OOO'
1074    url: 'https://bi.zone/'
1075
1076- regex: '.*masscan'
1077  name: 'masscan'
1078  url: 'https://github.com/robertdavidgraham/masscan'
1079  category: 'Crawler'
1080  producer:
1081    name: 'Robert Graham'
1082    url: 'https://github.com/robertdavidgraham'
1083
1084- regex: 'Mastodon/'
1085  name: 'Mastodon Bot'
1086  category: 'Social Media Agent'
1087
1088- regex: 'meanpathbot'
1089  name: 'Meanpath Bot'
1090  category: 'Search bot'
1091  url: 'http://www.meanpath.com/meanpathbot.html'
1092  producer:
1093    name: 'Meanpath'
1094    url: 'http://www.meanpath.com'
1095
1096- regex: 'MetaJobBot'
1097  name: 'MetaJobBot'
1098  category: 'Crawler'
1099  url: 'http://www.metajob.at/the/crawler'
1100  producer:
1101    name: 'MetaJob'
1102    url: 'http://www.metajob.at'
1103
1104- regex: 'MetaInspector'
1105  name: 'MetaInspector'
1106  category: 'Crawler'
1107  url: 'https://github.com/jaimeiniesta/metainspector'
1108
1109- regex: 'MixrankBot'
1110  name: 'Mixrank Bot'
1111  category: 'Crawler'
1112  url: 'http://mixrank.com'
1113  producer:
1114    name: 'Online Media Group, Inc.'
1115    url: ''
1116
1117- regex: 'MJ12bot'
1118  name: 'MJ12 Bot'
1119  category: 'Search bot'
1120  url: 'http://majestic12.co.uk/bot.php'
1121  producer:
1122    name: 'Majestic-12'
1123    url: 'http://majestic12.co.uk'
1124
1125- regex: 'Mnogosearch'
1126  name: 'Mnogosearch'
1127  category: 'Search bot'
1128  url: 'http://www.mnogosearch.org/'
1129  producer:
1130    name: 'Lavtech.Com Corp.'
1131    url: ''
1132- regex: 'MojeekBot'
1133  name: 'MojeekBot'
1134  category: 'Search bot'
1135  url: 'http://www.mojeek.com/bot.html'
1136  producer:
1137    name: 'Mojeek Ltd.'
1138    url: 'http://www.mojeek.com'
1139
1140- regex: 'munin'
1141  name: 'Munin'
1142  category: 'Site Monitor'
1143  url: 'http://munin-monitoring.org/'
1144  producer:
1145    name: 'Munin'
1146    url: 'http://munin-monitoring.org/'
1147
1148- regex: 'NalezenCzBot'
1149  name: 'NalezenCzBot'
1150  category: 'Crawler'
1151  url: 'http://www.nalezen.cz/about-crawler'
1152  producer:
1153    name: 'Jaroslav Kuboš'
1154    url: ''
1155
1156- regex: 'check_http/v'
1157  name: 'Nagios check_http'
1158  category: 'Site Monitor'
1159  url: 'https://nagios.org'
1160  producer:
1161    name: 'Nagios Plugins Development Team'
1162    url: 'https://nagios.org'
1163
1164- regex: 'nbertaupete95\(at\)gmail\.com'
1165  name: 'nbertaupete95'
1166  category: 'Crawler'
1167
1168- regex: 'Netcraft(?: Web Server Survey| SSL Server Survey|SurveyAgent)'
1169  name: 'Netcraft Survey Bot'
1170  category: 'Search bot'
1171  url: ''
1172  producer:
1173    name: 'Netcraft'
1174    url: 'http://www.netcraft.com'
1175
1176- regex: 'netEstate NE Crawler'
1177  name: 'netEstate'
1178  category: 'Crawler'
1179  url: 'http://www.website-datenbank.de/Impressum'
1180  producer:
1181    name: 'netEstate GmbH'
1182    url: 'https://www.netestate.de/en/'
1183
1184- regex: 'Netvibes'
1185  name: 'Netvibes'
1186  url: 'http://www.netvibes.com/'
1187  category: 'Feed Fetcher'
1188  producer:
1189    name: ''
1190    url: ''
1191
1192- regex: 'NewsBlur .*(?:Fetcher|Finder)'
1193  name: 'NewsBlur'
1194  url: 'http://www.newsblur.com'
1195  category: 'Feed Fetcher'
1196  producer:
1197    name: ''
1198    url: ''
1199
1200- regex: 'NewsGatorOnline'
1201  name: 'NewsGator'
1202  url: 'http://www.newsgator.com'
1203  category: 'Feed Fetcher'
1204  producer:
1205    name: ''
1206    url: ''
1207
1208- regex: 'nlcrawler'
1209  name: 'NLCrawler'
1210  category: 'Crawler'
1211  url: ''
1212  producer:
1213    name: 'Northern Light'
1214    url: 'http://northernlight.com'
1215
1216- regex: 'Nmap Scripting Engine'
1217  name: 'Nmap'
1218  category: 'Security Checker'
1219  url: 'https://nmap.org/book/nse.html'
1220  producer:
1221    name: 'Nmap'
1222    url: 'https://nmap.org/'
1223
1224- regex: 'Nuzzel'
1225  name: 'Nuzzel'
1226  category: 'Crawler'
1227  producer:
1228    name: 'Nuzzel'
1229    url: 'https://www.nuzzel.com/'
1230
1231- regex: 'NodePing'
1232  name: 'NodePing'
1233  category: 'Site Monitor'
1234  url: 'https://nodeping.com'
1235  producer:
1236    name: 'NodePing'
1237    url: 'https://nodeping.com'
1238
1239- regex: 'Octopus [\d.]+'
1240  name: 'Octopus'
1241
1242- regex: 'OnlineOrNot\.com_bot'
1243  name: 'OnlineOrNot Bot'
1244  category: 'Site Monitor'
1245  url: 'https://onlineornot.com/website-monitoring'
1246  producer:
1247    name: 'OnlineOrNot'
1248    url: 'https://onlineornot.com'
1249
1250- regex: 'omgili'
1251  name: 'Omgili bot'
1252  category: 'Search bot'
1253  url: 'http://www.omgili.com/Crawler.html'
1254  producer:
1255    name: 'Omgili'
1256    url: 'http://www.omgili.com'
1257
1258- regex: 'OpenindexSpider'
1259  name: 'Openindex Spider'
1260  category: 'Search bot'
1261  url: 'http://www.openindex.io/en/webmasters/spider.html'
1262  producer:
1263    name: 'Openindex B.V.'
1264    url: 'http://www.openindex.io'
1265
1266- regex: 'spbot'
1267  name: 'OpenLinkProfiler'
1268  category: 'Crawler'
1269  url: 'http://openlinkprofiler.org/bot'
1270  producer:
1271    name: 'Axandra GmbH'
1272    url: 'http://www.axandra.com'
1273
1274- regex: 'OpenWebSpider'
1275  name: 'OpenWebSpider'
1276  category: 'Crawler'
1277  url: 'http://www.openwebspider.org'
1278  producer:
1279    name: 'OpenWebSpider Lab'
1280    url: 'http://lab.openwebspider.org'
1281
1282- regex: 'OrangeBot|VoilaBot'
1283  name: 'Orange Bot'
1284  category: 'Search bot'
1285  url: 'http://lemoteur.orange.fr'
1286  producer:
1287    name: 'Orange'
1288    url: 'http://www.orange.fr'
1289
1290- regex: 'PaperLiBot'
1291  name: 'PaperLiBot'
1292  category: 'Search bot'
1293  url: 'http://support.paper.li/entries/20023257-what-is-paper-li'
1294  producer:
1295    name: 'Smallrivers SA'
1296    url: 'http://www.paper.li'
1297
1298- regex: 'phantomas/'
1299  name: 'Phantomas'
1300  category: 'Site Monitor'
1301  url: 'https://github.com/macbre/phantomas'
1302
1303- regex: 'phpservermon'
1304  name: 'PHP Server Monitor'
1305  category: 'Site Monitor'
1306  url: 'https://github.com/phpservermon/phpservermon'
1307  producer:
1308    name: 'PHP Server Monitor'
1309    url: 'http://www.phpservermonitor.org/'
1310
1311- regex: 'Pocket(?:ImageCache|Parser)'
1312  name: 'Pocket'
1313  category: 'Read-it-later Service'
1314  url: 'https://getpocket.com/pocketparser_ua'
1315  producer:
1316    name: 'Read It Later, Inc.'
1317    url: 'https://getpocket.com/'
1318
1319- regex: 'PritTorrent'
1320  name: 'PritTorrent'
1321  category: 'Crawler'
1322  url: 'https://github.com/astro/prittorrent'
1323  producer:
1324    name: 'Bitlove'
1325    url: 'http://bitlove.org/'
1326
1327- regex: 'PRTG Network Monitor'
1328  name: 'PRTG Network Monitor'
1329  category: 'Network Monitor'
1330  url: 'https://www.paessler.com/prtg'
1331  producer:
1332    name: 'Paessler AG'
1333    url: 'https://www.paessler.com'
1334
1335- regex: 'psbot'
1336  name: 'Picsearch bot'
1337  category: 'Search bot'
1338  url: 'http://www.picsearch.com/bot.html'
1339  producer:
1340    name: 'Picsearch'
1341    url: 'http://www.picsearch.com'
1342
1343- regex: 'Pingdom(?:\.com|TMS)'
1344  name: 'Pingdom Bot'
1345  category: 'Site Monitor'
1346  url: ''
1347  producer:
1348    name: 'Pingdom AB'
1349    url: 'https://www.pingdom.com'
1350
1351- regex: 'Quora Link Preview'
1352  name: 'Quora Link Preview'
1353  category: 'Crawler'
1354  url: ''
1355  producer:
1356    name: 'Quora'
1357    url: 'http://www.quora.com'
1358
1359- regex: 'Quora-Bot'
1360  name: 'Quora Bot'
1361  category: 'Crawler'
1362  url: ''
1363  producer:
1364    name: 'Quora'
1365    url: 'https://www.quora.com/'
1366
1367- regex: 'RamblerMail'
1368  name: 'RamblerMail Image Proxy'
1369  category: 'Crawler'
1370  url: ''
1371  producer:
1372    name: 'Rambler&Co'
1373    url: 'https://rambler-co.ru/'
1374
1375- regex: 'QuerySeekerSpider'
1376  name: 'QuerySeekerSpider'
1377  category: 'Crawler'
1378  url: 'http://queryseeker.com/bot.html'
1379  producer:
1380    name: 'QueryEye Inc.'
1381    url: 'http://queryeye.com'
1382
1383- regex: 'Qwantify|Qwantbot'
1384  name: 'Qwantbot'
1385  category: 'Crawler'
1386  url: 'https://help.qwant.com/bot/'
1387  producer:
1388    name: 'Qwant Corporation'
1389    url: 'https://www.qwant.com/'
1390
1391- regex: 'Rainmeter'
1392  name: 'Rainmeter'
1393  category: 'Crawler'
1394  url: 'https://www.rainmeter.net'
1395
1396- regex: 'redditbot'
1397  name: 'Reddit Bot'
1398  category: 'Social Media Agent'
1399  url: 'http://www.reddit.com/feedback'
1400  producer:
1401    name: 'reddit inc.'
1402    url: 'http://www.reddit.com'
1403
1404- regex: 'Riddler'
1405  name: 'Riddler'
1406  category: 'Security search bot'
1407  url: 'https://riddler.io/about'
1408  producer:
1409    name: 'F-Secure'
1410    url: 'https://www.f-secure.com'
1411
1412- regex: 'rogerbot'
1413  name: 'Rogerbot'
1414  category: 'Crawler'
1415  url: 'http://moz.com/help/pro/what-is-rogerbot-'
1416  producer:
1417    name: 'SEOmoz, Inc.'
1418    url: 'http://moz.com/'
1419
1420- regex: 'ROI Hunter'
1421  name: 'ROI Hunter'
1422  category: 'Crawler'
1423  url: ''
1424  producer:
1425    name: 'Roihunter a.s.'
1426    url: 'http://roihunter.com/'
1427
1428- regex: 'SafeDNSBot'
1429  name: 'SafeDNSBot'
1430  category: 'Crawler'
1431  url: 'https://www.safedns.com/searchbot'
1432  producer:
1433    name: 'SafeDNS, Inc.'
1434    url: 'https://www.safedns.com/'
1435
1436- regex: 'Scrapy'
1437  name: 'Scrapy'
1438  category: 'Crawler'
1439  url: 'http://scrapy.org'
1440
1441- regex: 'Screaming Frog SEO Spider'
1442  name: 'Screaming Frog SEO Spider'
1443  category: 'Crawler'
1444  url: 'http://www.screamingfrog.co.uk/seo-spider'
1445  producer:
1446    name: 'Screaming Frog Ltd'
1447    url: 'http://www.screamingfrog.co.uk'
1448
1449- regex: 'ScreenerBot'
1450  name: 'ScreenerBot'
1451  category: 'Crawler'
1452  url: 'http://www.screenerbot.com'
1453  producer:
1454    name: ''
1455    url: ''
1456
1457- regex: 'SemrushBot'
1458  name: 'SemrushBot'
1459  category: 'Crawler'
1460  url: 'https://www.semrush.com/bot/'
1461  producer:
1462    name: 'Semrush Inc.'
1463    url: 'https://www.semrush.com/'
1464
1465- regex: 'BacklinksExtendedBot'
1466  name: 'BacklinksExtendedBot'
1467  category: 'Crawler'
1468  url: 'https://www.semrush.com/bot/'
1469  producer:
1470    name: 'Semrush Inc.'
1471    url: 'https://www.semrush.com/'
1472
1473- regex: 'SerpReputationManagementAgent'
1474  name: 'Semrush Reputation Management'
1475  category: 'Service Agent'
1476  url: 'https://www.semrush.com/bot/'
1477  producer:
1478    name: 'Semrush Inc.'
1479    url: 'https://www.semrush.com/'
1480
1481- regex: 'SplitSignalBot'
1482  name: 'SplitSignalBot'
1483  category: 'Crawler'
1484  url: 'https://www.semrush.com/bot/'
1485  producer:
1486    name: 'Semrush Inc.'
1487    url: 'https://www.semrush.com/'
1488
1489- regex: 'SiteAuditBot'
1490  name: 'SiteAuditBot'
1491  category: 'Crawler'
1492  url: 'https://www.semrush.com/bot/'
1493  producer:
1494    name: 'Semrush Inc.'
1495    url: 'https://www.semrush.com/'
1496
1497- regex: 'SensikaBot'
1498  name: 'Sensika Bot'
1499  category: ''
1500  url: ''
1501  producer:
1502    name: 'Sensika'
1503    url: 'http://sensika.com'
1504
1505- regex: 'SEOENG(?:World)?Bot'
1506  name: 'SEOENGBot'
1507  category: 'Crawler'
1508  url: 'http://www.seoengine.com/seoengbot.htm'
1509  producer:
1510    name: 'SEO Engine'
1511    url: 'http://www.seoengine.com'
1512
1513- regex: 'seoscanners\.net'
1514  name: 'Seoscanners.net'
1515  category: 'Crawler'
1516  url: ''
1517
1518- regex: 'SkypeUriPreview'
1519  name: 'Skype URI Preview'
1520  category: 'Service Agent'
1521  url: ''
1522  producer:
1523    name: 'Skype Communications S.à.r.l.'
1524    url: 'https://www.skype.com'
1525
1526- regex: 'SeznamBot|SklikBot|Seznam screenshot-generator'
1527  name: 'Seznam Bot'
1528  category: 'Search bot'
1529  url: 'http://www.mapy.cz/cz/seznambot.html'
1530  producer:
1531    name: 'Seznam.cz, a.s.'
1532    url: 'http://www.seznam.cz/'
1533
1534- regex: 'shopify-partner-homepage-scraper'
1535  name: 'Shopify Partner'
1536  category: 'Crawler'
1537  url: 'https://www.shopify.com/partners'
1538  producer:
1539    name: 'Shopify'
1540    url: 'https://www.shopify.com/'
1541
1542- regex: 'ShopWiki'
1543  name: 'ShopWiki'
1544  category: 'Search tools'
1545  url: 'http://www.shopwiki.com/wiki/Help:Bot'
1546  producer:
1547    name: 'ShopWiki Corp.'
1548    url: 'http://www.shopwiki.com'
1549
1550- regex: 'SilverReader'
1551  name: 'SilverReader'
1552  url: 'http://silverreader.com'
1553  category: 'Feed Fetcher'
1554  producer:
1555    name: ''
1556    url: ''
1557
1558- regex: 'SimplePie'
1559  name: 'SimplePie'
1560  url: 'http://www.simplepie.org'
1561  category: 'Feed Parser'
1562  producer:
1563    name: ''
1564    url: ''
1565
1566- regex: 'SISTRIX Crawler'
1567  name: 'SISTRIX Crawler'
1568  category: 'Crawler'
1569  url: 'http://crawler.sistrix.net'
1570  producer:
1571    name: 'SISTRIX GmbH'
1572    url: 'http://www.sistrix.de'
1573
1574- regex: 'compatible; (?:SISTRIX )?Optimizer'
1575  name: 'SISTRIX Optimizer'
1576  category: 'Crawler'
1577  url: 'https://optimizer.sistrix.com'
1578  producer:
1579    name: 'SISTRIX GmbH'
1580    url: 'http://www.sistrix.de'
1581
1582- regex: 'SiteSucker'
1583  name: 'SiteSucker'
1584  category: 'Crawler'
1585  url: 'http://ricks-apps.com/osx/sitesucker/'
1586
1587- regex: 'sixy\.ch'
1588  name: 'Sixy.ch'
1589  category: 'Site Monitor'
1590  url: 'http://sixy.ch'
1591  producer:
1592    name: 'Manuel Kasper'
1593    url: 'https://neon1.net/'
1594
1595- regex: 'Slackbot|Slack-ImgProxy'
1596  name: 'Slackbot'
1597  category: 'Crawler'
1598  url: 'https://api.slack.com/robots'
1599  producer:
1600    name: 'Slack Technologies'
1601    url: 'http://slack.com'
1602
1603- regex: 'Sogou[ -](?:head|inst|Orion|Pic|Test|web)[ -]spider|New-Sogou-Spider'
1604  name: 'Sogou Spider'
1605  category: 'Search bot'
1606  url: 'http://www.sogou.com/docs/help/webmasters.htm'
1607  producer:
1608    name: 'Sohu, Inc.'
1609    url: 'http://www.sogou.com'
1610
1611- regex: 'Sosospider|Sosoimagespider'
1612  name: 'Soso Spider'
1613  category: 'Search bot'
1614  url: 'http://help.soso.com/webspider.htm'
1615  producer:
1616    name: 'Tencent Holdings'
1617    url: 'http://www.soso.com'
1618
1619- regex: 'Sprinklr'
1620  name: 'Sprinklr'
1621  category: 'Crawler'
1622  url: ''
1623  producer:
1624    name: 'Sprinklr, Inc.'
1625    url: 'https://www.sprinklr.com/'
1626
1627- regex: 'SSL Labs'
1628  name: 'SSL Labs'
1629  category: 'Validator'
1630  url: 'https://www.ssllabs.com/about/assessment.html'
1631  producer:
1632    name: 'SSL Labs'
1633    url: 'https://www.ssllabs.com/about/assessment.html'
1634
1635- regex: 'StatusCake'
1636  name: 'StatusCake'
1637  category: 'Site Monitor'
1638  url: 'https://www.statuscake.com'
1639  producer:
1640    name: 'StatusCake'
1641    url: 'https://www.statuscake.com'
1642
1643- regex: 'Superfeedr bot'
1644  name: 'Superfeedr Bot'
1645  category: 'Feed Fetcher'
1646  url: ''
1647  producer:
1648    name: 'Superfeedr'
1649    url: 'https://superfeedr.com/'
1650
1651- regex: 'Sparkler'
1652  name: 'Sparkler'
1653  category: 'Crawler'
1654  url: 'https://github.com/USCDataScience/sparkler'
1655
1656- regex: 'Spinn3r'
1657  name: 'Spinn3r'
1658  category: 'Crawler'
1659  url: 'http://spinn3r.com/robot'
1660  producer:
1661    name: 'Tailrank Inc'
1662    url: 'http://spinn3r.com'
1663
1664- regex: 'SputnikBot'
1665  name: 'Sputnik Bot'
1666  category: 'Crawler'
1667  url: ''
1668
1669- regex: 'SputnikFaviconBot'
1670  name: 'Sputnik Favicon Bot'
1671  category: 'Crawler'
1672  url: ''
1673
1674- regex: 'SputnikImageBot'
1675  name: 'Sputnik Image Bot'
1676  category: 'Crawler'
1677  url: ''
1678
1679- regex: 'SurveyBot'
1680  name: 'Survey Bot'
1681  category: 'Search bot'
1682  url: 'http://www.domaintools.com/webmasters/surveybot.php'
1683  producer:
1684    name: 'Domain Tools'
1685    url: 'http://www.domaintools.com'
1686
1687- regex: 'TarmotGezgin'
1688  name: 'Tarmot Gezgin'
1689  url: 'http://www.tarmot.com/gezgin/'
1690  category: 'Search bot'
1691
1692- regex: 'TelegramBot'
1693  name: 'TelegramBot'
1694  url: 'https://telegram.org/blog/bot-revolution'
1695
1696- regex: 'TLSProbe'
1697  name: 'TLSProbe'
1698  url: 'https://scan.trustnet.venafi.com/'
1699  category: 'Security search bot'
1700  producer:
1701    name: 'Venafi TrustNet'
1702    url: 'https://www.venafi.com'
1703
1704- regex: 'TinEye-bot'
1705  name: 'TinEye Crawler'
1706  category: 'Search bot'
1707  url: 'http://www.tineye.com/crawler.html'
1708  producer:
1709    name: 'Idée Inc.'
1710    url: 'http://ideeinc.com'
1711
1712- regex: 'Tiny Tiny RSS'
1713  name: 'Tiny Tiny RSS'
1714  url: 'http://tt-rss.org'
1715  category: 'Feed Fetcher'
1716  producer:
1717    name: ''
1718    url: ''
1719
1720- regex: 'theoldreader\.com'
1721  name: 'theoldreader'
1722  category: 'Feed Reader'
1723  url: 'https://theoldreader.com'
1724
1725- regex: 'Trackable/0\.1'
1726  name: 'Chartable'
1727  category: 'Site Monitor'
1728  url: 'https://help.chartable.com/article/34-what-is-the-trackable-analytics-prefix'
1729  producer:
1730    name: 'Chartable'
1731    url: 'https://chartable.com'
1732
1733- regex: 'trendictionbot'
1734  name: 'Trendiction Bot'
1735  category: 'Crawler'
1736  url: 'http://www.trendiction.de/bot'
1737  producer:
1738    name: 'Talkwalker Inc.'
1739    url: 'http://www.talkwalker.com'
1740
1741- regex: 'TurnitinBot'
1742  name: 'TurnitinBot'
1743  category: 'Crawler'
1744  url: 'http://www.turnitin.com/robot/crawlerinfo.html'
1745  producer:
1746    name: 'iParadigms, LLC.'
1747    url: 'http://www.turnitin.com'
1748
1749- regex: 'TweetedTimes'
1750  name: 'TweetedTimes Bot'
1751  category: 'Crawler'
1752  url: 'https://tweetedtimes.com/'
1753  producer:
1754    name: 'TweetedTimes'
1755    url: 'https://tweetedtimes.com/'
1756
1757- regex: 'TweetmemeBot'
1758  name: 'Tweetmeme Bot'
1759  category: 'Crawler'
1760  url: 'http://tweetmeme.com/'
1761  producer:
1762    name: 'Mediasift'
1763    url: ''
1764
1765- regex: 'Twingly Recon'
1766  name: 'Twingly Recon'
1767  category: 'Crawler'
1768  producer:
1769    name: 'Twingly'
1770    url: 'https://www.twingly.com'
1771
1772- regex: 'Twitterbot'
1773  name: 'Twitterbot'
1774  category: 'Social Media Agent'
1775  url: 'https://dev.twitter.com/docs/cards/getting-started'
1776  producer:
1777    name: 'Twitter'
1778    url: 'http://www.twitter.com'
1779
1780- regex: 'UniversalFeedParser'
1781  name: 'UniversalFeedParser'
1782  category: 'Feed Fetcher'
1783  url: 'https://github.com/kurtmckee/feedparser'
1784  producer:
1785    name: 'Kurt McKee'
1786    url: 'https://github.com/kurtmckee'
1787
1788- regex: 'via secureurl\.fwdcdn\.com'
1789  name: 'UkrNet Mail Proxy'
1790  category: 'Crawler'
1791  url: ''
1792  producer:
1793    name: 'UkrNet Ltd'
1794    url: 'https://www.ukr.net/'
1795
1796- regex: 'Uptime(?:bot)?/'
1797  name: 'Uptimebot'
1798  category: 'Site Monitor'
1799  url: 'https://uptime.com/uptime-bot'
1800  producer:
1801    name: 'Uptime'
1802    url: 'https://uptime.com/'
1803
1804- regex: 'UptimeRobot'
1805  name: 'UptimeRobot'
1806  category: 'Site Monitor'
1807  url: 'https://uptimerobot.com/'
1808  producer:
1809    name: 'Uptime Robot'
1810    url: 'https://uptimerobot.com/'
1811
1812- regex: 'URLAppendBot'
1813  name: 'URLAppendBot'
1814  category: 'Crawler'
1815  url: 'http://www.profound.net/urlappendbot.html'
1816  producer:
1817    name: 'Profound Networks'
1818    url: 'http://www.profound.net'
1819
1820- regex: 'Vagabondo'
1821  name: 'Vagabondo'
1822  category: 'Crawler'
1823  url: ''
1824  producer:
1825    name: 'WiseGuys'
1826    url: 'http://www.wise-guys.nl/'
1827
1828- regex: 'vkShare; '
1829  name: 'VK Share Button'
1830  category: 'Crawler'
1831  url: 'https://dev.vk.com/en/widgets/share'
1832  producer:
1833    name: 'VK'
1834    url: 'https://vk.com/'
1835
1836- regex: 'VKRobot'
1837  name: 'VK Robot'
1838  category: 'Crawler'
1839  url: 'https://dev.vk.com/en/'
1840  producer:
1841    name: 'VK'
1842    url: 'https://vk.com/'
1843
1844- regex: 'VSMCrawler'
1845  name: 'Visual Site Mapper Crawler'
1846  category: 'Crawler'
1847  url: 'http://www.visualsitemapper.com/crawler'
1848  producer:
1849    name: 'Alentum Software Ltd.'
1850    url: 'http://www.alentum.com'
1851
1852- regex: 'Jigsaw'
1853  name: 'W3C CSS Validator'
1854  category: 'Validator'
1855  url: 'http://jigsaw.w3.org/css-validator'
1856  producer:
1857    name: 'W3C'
1858    url: 'http://www.w3.org'
1859
1860- regex: 'W3C_I18n-Checker'
1861  name: 'W3C I18N Checker'
1862  category: 'Validator'
1863  url: 'http://validator.w3.org/i18n-checker'
1864  producer:
1865    name: 'W3C'
1866    url: 'http://www.w3.org'
1867
1868- regex: 'W3C-checklink'
1869  name: 'W3C Link Checker'
1870  category: 'Validator'
1871  url: 'http://validator.w3.org/checklink'
1872  producer:
1873    name: 'W3C'
1874    url: 'http://www.w3.org'
1875
1876- regex: 'W3C_Validator|Validator\.nu'
1877  name: 'W3C Markup Validation Service'
1878  category: 'Validator'
1879  url: 'http://validator.w3.org/services'
1880  producer:
1881    name: 'W3C'
1882    url: 'http://www.w3.org'
1883
1884- regex: 'W3C-mobileOK'
1885  name: 'W3C MobileOK Checker'
1886  category: 'Validator'
1887  url: 'http://validator.w3.org/mobile'
1888  producer:
1889    name: 'W3C'
1890    url: 'http://www.w3.org'
1891
1892- regex: 'W3C_Unicorn'
1893  name: 'W3C Unified Validator'
1894  category: 'Validator'
1895  url: 'http://validator.w3.org/unicorn'
1896  producer:
1897    name: 'W3C'
1898    url: 'http://www.w3.org'
1899
1900- regex: 'P3P Validator'
1901  name: 'W3C P3P Validator'
1902  category: 'Validator'
1903  url: 'https://www.w3.org/P3P/validator.html'
1904  producer:
1905    name: 'W3C'
1906    url: 'https://www.w3.org'
1907
1908- regex: 'Wappalyzer'
1909  name: 'Wappalyzer'
1910  url: 'https://github.com/AliasIO/Wappalyzer'
1911  producer:
1912    name: 'AliasIO'
1913    url: 'https://github.com/AliasIO'
1914
1915- regex: 'PTST/'
1916  name: 'WebPageTest'
1917  category: 'Site Monitor'
1918  url: 'https://www.webpagetest.org'
1919
1920- regex: 'WeSEE'
1921  name: 'WeSEE:Search'
1922  category: 'Search bot'
1923  url: 'http://www.wesee.com/bot'
1924  producer:
1925    name: 'WeSEE Ltd'
1926    url: 'http://www.wesee.com'
1927
1928- regex: 'WebbCrawler'
1929  name: 'WebbCrawler'
1930  category: 'Crawler'
1931  url: 'http://badcheese.com/crawler.html'
1932  producer:
1933    name: 'Steve Webb'
1934    url: 'http://badcheese.com'
1935
1936- regex: 'websitepulse[+ ]checker'
1937  name: 'WebSitePulse'
1938  category: 'Site Monitor'
1939  url: 'http://badcheese.com/crawler.html'
1940  producer:
1941    name: 'WebSitePulse'
1942    url: 'http://www.websitepulse.com/'
1943
1944- regex: 'WordPress.+isitwp\.com'
1945  name: 'IsItWP'
1946  category: 'Crawler'
1947  url: 'https://www.isitwp.com/'
1948  producer:
1949    name: 'WPBeginner, LLC'
1950    url: 'https://www.wpbeginner.com/'
1951
1952- regex: 'Automattic Analytics Crawler'
1953  name: 'Automattic Analytics'
1954  category: 'Crawler'
1955  url: 'https://wordpress.com/crawler/'
1956  producer:
1957    name: 'Wordpress.org'
1958    url: 'https://wordpress.org/'
1959
1960- regex: 'WordPress\.com mShots'
1961  name: 'WordPress.com mShots'
1962  category: 'Service Agent'
1963  url: 'https://wordpress.org/'
1964  producer:
1965    name: 'Wordpress.org'
1966    url: 'https://wordpress.org/'
1967
1968- regex: 'wp\.com feedbot'
1969  name: 'wp.com feedbot'
1970  category: 'Feed Fetcher'
1971  url: 'https://wordpress.com/'
1972  producer:
1973    name: 'Automattic, Inc.'
1974    url: 'https://automattic.com/'
1975
1976- regex: 'WordPress'
1977  name: 'WordPress'
1978  category: 'Service Agent'
1979  url: 'https://wordpress.org/'
1980  producer:
1981    name: 'Wordpress.org'
1982    url: 'https://wordpress.org/'
1983
1984- regex: 'Wotbox'
1985  name: 'Wotbox'
1986  category: 'Search bot'
1987  url: 'http://www.wotbox.com/bot/'
1988  producer:
1989    name: 'Wotbox'
1990    url: 'http://www.wotbox.com'
1991
1992- regex: 'XenForo'
1993  name: 'XenForo'
1994  category: 'Service Agent'
1995  url: 'https://xenforo.com/'
1996  producer:
1997    name: 'XenForo Ltd.'
1998    url: 'https://xenforo.com/'
1999
2000- regex: 'yacybot'
2001  name: 'YaCy'
2002  category: 'Search bot'
2003  url: 'http://yacy.net/bot.html'
2004  producer:
2005    name: 'YaCy'
2006    url: 'http://yacy.net'
2007
2008- regex: 'Yahoo! Slurp|Yahoo!-AdCrawler'
2009  name: 'Yahoo! Slurp'
2010  category: 'Search bot'
2011  url: 'http://help.yahoo.com/ysearch/slurp'
2012  producer:
2013    name: 'Yahoo! Inc.'
2014    url: 'http://www.yahoo.com'
2015
2016- regex: 'Yahoo Link Preview|Yahoo:LinkExpander:Slingstone'
2017  name: 'Yahoo! Link Preview'
2018  category: 'Crawler'
2019  url: 'https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html'
2020  producer:
2021    name: 'Yahoo! Inc.'
2022    url: 'http://www.yahoo.com'
2023
2024- regex: 'YahooMailProxy'
2025  name: 'Yahoo! Mail Proxy'
2026  category: 'Service Agent'
2027  url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
2028  producer:
2029    name: 'Yahoo! Inc.'
2030    url: 'http://www.yahoo.com'
2031
2032- regex: 'YahooCacheSystem'
2033  name: 'Yahoo! Cache System'
2034  category: 'Crawler'
2035  url: ''
2036  producer:
2037    name: 'Yahoo! Inc.'
2038    url: 'http://www.yahoo.com'
2039
2040- regex: 'Y!J-BRW'
2041  name: 'Yahoo! Japan BRW'
2042  category: 'Crawler'
2043  url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
2044  producer:
2045    name: 'Yahoo! Japan Corp.'
2046    url: 'https://www.yahoo.co.jp/'
2047
2048- regex: 'Y!J-WSC'
2049  name: 'Yahoo! Japan WSC'
2050  category: 'Crawler'
2051  url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
2052  producer:
2053    name: 'Yahoo! Japan Corp.'
2054    url: 'https://www.yahoo.co.jp/'
2055
2056- regex: 'Y!J-ASR'
2057  name: 'Yahoo! Japan ASR'
2058  category: 'Crawler'
2059  url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
2060  producer:
2061    name: 'Yahoo! Japan Corp.'
2062    url: 'https://www.yahoo.co.jp/'
2063
2064- regex: '^Y!J'
2065  name: 'Yahoo! Japan'
2066  category: 'Crawler'
2067  url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
2068  producer:
2069    name: 'Yahoo! Japan Corp.'
2070    url: 'https://www.yahoo.co.jp/'
2071
2072- regex: 'Yandex(?:(?:\.Gazeta |Accessibility|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:AdNet|Antivirus|Blogs|Calendar|Catalog|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Userproxy|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher'
2073  name: 'Yandex Bot'
2074  category: 'Search bot'
2075  url: 'https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html'
2076  producer:
2077    name: 'Yandex LLC'
2078    url: 'https://yandex.com/company/'
2079
2080- regex: 'Yeti|NaverJapan|AdsBot-Naver'
2081  name: 'Yeti/Naverbot'
2082  category: 'Search bot'
2083  url: 'http://help.naver.com/robots/'
2084  producer:
2085    name: 'Naver'
2086    url: 'http://www.naver.com'
2087
2088- regex: 'YoudaoBot'
2089  name: 'Youdao Bot'
2090  category: 'Search bot'
2091  url: 'http://www.youdao.com/help/webmaster/spider'
2092  producer:
2093    name: 'NetEase, Inc.'
2094    url: 'http://corp.163.com'
2095
2096- regex: 'YOURLS'
2097  name: 'Yourls'
2098  category: 'Crawler'
2099  url: 'http://yourls.org'
2100
2101- regex: 'YRSpider|YYSpider'
2102  name: 'Yunyun Bot'
2103  category: 'Search bot'
2104  url: 'http://www.yunyun.com/SiteInfo.php?r=about'
2105  producer:
2106    name: 'YunYun'
2107    url: 'http://www.yunyun.com'
2108
2109- regex: 'zgrab'
2110  name: 'zgrab'
2111  category: 'Security Checker'
2112  url: 'https://github.com/zmap/zgrab'
2113
2114- regex: 'Zookabot'
2115  name: 'Zookabot'
2116  category: 'Crawler'
2117  url: 'http://zookabot.com'
2118  producer:
2119    name: 'Hwacha ApS'
2120    url: 'http://hwacha.dk'
2121
2122- regex: 'ZumBot'
2123  name: 'ZumBot'
2124  category: 'Search bot'
2125  url: 'http://help.zum.com/inquiry'
2126  producer:
2127    name: 'ZUM internet'
2128    url: 'http://www.zuminternet.com/'
2129
2130- regex: 'YottaaMonitor'
2131  name: 'Yottaa Site Monitor'
2132  category: 'Site Monitor'
2133  url: 'http://www.yottaa.com/products/site-monitor'
2134  producer:
2135    name: 'Yottaa'
2136    url: 'http://www.yottaa.com/'
2137
2138- regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857'
2139  name: 'Yahoo Gemini'
2140  category: 'Crawler'
2141  url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html'
2142  producer:
2143    name: 'Yahoo! Inc.'
2144    url: 'http://www.yahoo.com'
2145
2146- regex: '.*Java.*outbrain'
2147  name: 'Outbrain'
2148  category: 'Crawler'
2149  url: ''
2150  producer:
2151    name: 'Outbrain'
2152    url: 'http://www.outbrain.com/'
2153
2154- regex: 'HubPages.*crawlingpolicy'
2155  name: 'HubPages'
2156  category: 'Crawler'
2157  url: 'https://hubpages.com/help/crawlingpolicy'
2158  producer:
2159    name: 'HubPages, Inc.'
2160    url: 'https://discover.hubpages.com/'
2161
2162- regex: 'Pinterest(?:bot)?/.*www\.pinterest\.com'
2163  name: 'Pinterest'
2164  url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
2165  category: 'Crawler'
2166  producer:
2167    name: 'Pinterest'
2168    url: 'https://www.pinterest.com/'
2169
2170- regex: '.*Site24x7'
2171  name: 'Site24x7 Website Monitoring'
2172  category: 'Site Monitor'
2173  url: 'https://www.site24x7.com/site24x7-faq.html'
2174  producer:
2175    name: 'Site24x7'
2176    url: 'https://www.site24x7.com'
2177
2178- regex: '.* HLB'
2179  name: 'Site24x7 Defacement Monitor'
2180  category: 'Site Monitor'
2181  url: 'https://support.site24x7.com/portal/en/kb/articles/default-user-agent-used-in-website-defacement-monitor'
2182  producer:
2183    name: 'Site24x7'
2184    url: 'https://www.site24x7.com/'
2185
2186- regex: 's~snapchat-proxy'
2187  name: 'Snapchat Proxy'
2188  category: 'Crawler'
2189  url: 'https://www.snapchat.com'
2190  producer:
2191    name: 'Snapchat Inc.'
2192    url: 'https://www.snapchat.com'
2193
2194- regex: 'Snap URL Preview Service'
2195  name: 'Snap URL Preview Service'
2196  category: 'Service Agent'
2197  url: 'https://developers.snap.com/robots'
2198  producer:
2199    name: 'Snapchat Inc.'
2200    url: 'https://www.snapchat.com/'
2201
2202- regex: 'SnapchatAds'
2203  name: 'Snapchat Ads'
2204  category: 'Crawler'
2205  url: 'https://businesshelp.snapchat.com/s/article/adsbot-crawler?language=en_US'
2206  producer:
2207    name: 'Snapchat Inc.'
2208    url: 'https://www.snapchat.com/'
2209
2210- regex: "Let's Encrypt validation server"
2211  name: "Let's Encrypt Validation"
2212  category: 'Service Agent'
2213  url: 'https://letsencrypt.org/how-it-works/'
2214  producer:
2215    name: "Let's Encrypt"
2216    url: 'https://letsencrypt.org'
2217
2218- regex: 'GrapeshotCrawler'
2219  name: 'Grapeshot'
2220  category: 'Crawler'
2221  url: 'https://www.grapeshot.com/crawler'
2222  producer:
2223    name: 'Grapeshot'
2224    url: 'https://www.grapeshot.com'
2225
2226- regex: 'www\.monitor\.us'
2227  name: 'Monitor.Us'
2228  category: 'Site Monitor'
2229  url: 'http://www.monitor.us'
2230  producer:
2231    name: 'Monitor.Us'
2232    url: 'http://www.monitor.us'
2233
2234- regex: 'Catchpoint'
2235  name: 'Catchpoint'
2236  category: 'Site Monitor'
2237  url: 'https://www.catchpoint.com/'
2238  producer:
2239    name: 'Catchpoint Systems'
2240    url: 'https://www.catchpoint.com/'
2241
2242- regex: 'bitlybot'
2243  name: 'BitlyBot'
2244  category: 'Crawler'
2245  url: 'https://bitly.com'
2246  producer:
2247    name: 'Bitly, Inc.'
2248    url: 'https://bitly.com'
2249
2250- regex: 'Zao/'
2251  name: 'Zao'
2252  category: 'Crawler'
2253
2254- regex: 'lycos'
2255  name: 'Lycos'
2256
2257- regex: 'Slurp'
2258  name: 'Inktomi Slurp'
2259
2260- regex: 'Speedy Spider'
2261  name: 'Speedy'
2262
2263- regex: 'ScoutJet'
2264  name: 'ScoutJet'
2265
2266- regex: 'nrsbot|netresearch'
2267  name: 'NetResearchServer'
2268
2269- regex: 'scooter'
2270  name: 'Scooter'
2271
2272- regex: 'gigabot'
2273  name: 'Gigabot'
2274
2275- regex: 'charlotte'
2276  name: 'Charlotte'
2277
2278- regex: 'Pompos'
2279  name: 'Pompos'
2280
2281- regex: 'ichiro'
2282  name: 'ichiro'
2283
2284- regex: 'PagePeeker'
2285  name: 'PagePeeker'
2286  category: 'Crawler'
2287  url: 'https://pagepeeker.com/robots/'
2288  producer:
2289    name: 'PAGEPEEKER SRL'
2290    url: 'https://pagepeeker.com/'
2291
2292- regex: 'WebThumbnail'
2293  name: 'WebThumbnail'
2294
2295- regex: 'Willow Internet Crawler'
2296  name: 'Willow Internet Crawler'
2297
2298- regex: 'EmailWolf'
2299  name: 'EmailWolf'
2300
2301- regex: 'NetLyzer FastProbe'
2302  name: 'NetLyzer FastProbe'
2303
2304- regex: 'AdMantX.*admantx\.com'
2305  name: 'ADMantX'
2306
2307- regex: 'Server Density Service Monitoring'
2308  name: 'Server Density'
2309
2310- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
2311  name: 'RSSRadio Bot'
2312
2313- regex: '^sentry'
2314  name: 'Sentry Bot'
2315  producer:
2316    name: 'Sentry'
2317    url: 'https://sentry.io'
2318
2319- regex: '^Spotify/[\d.]+$'
2320  name: 'Spotify'
2321  producer:
2322    name: 'Spotify'
2323    url: 'https://www.spotify.com'
2324
2325- regex: 'The Knowledge AI'
2326  name: 'The Knowledge AI'
2327  category: 'Crawler'
2328
2329- regex: 'Embedly'
2330  name: 'Embedly'
2331  category: 'Crawler'
2332  url: 'https://support.embed.ly/hc/en-us'
2333  producer:
2334    name: 'A Medium, Corp.'
2335    url: 'https://medium.com/'
2336
2337- regex: 'BrandVerity'
2338  name: 'BrandVerity'
2339  category: 'Crawler'
2340  url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
2341  producer:
2342    name: 'BrandVerity, Inc.'
2343    url: 'https://www.brandverity.com/'
2344
2345- regex: 'Kaspersky Lab CFR link resolver'
2346  name: 'Kaspersky'
2347  category: 'Security Checker'
2348  url: 'https://www.kaspersky.com/'
2349  producer:
2350    name: 'AO Kaspersky Lab'
2351    url: 'https://www.kaspersky.com/'
2352
2353- regex: 'eZ Publish Link Validator'
2354  name: 'eZ Publish Link Validator'
2355  category: 'Crawler'
2356  url: 'https://ez.no/'
2357  producer:
2358    name: 'eZ Systems AS'
2359    url: 'https://ez.no/'
2360
2361- regex: 'woorankreview'
2362  name: 'WooRank'
2363  category: 'Search bot'
2364  url: 'https://www.woorank.com/'
2365  producer:
2366    name: 'WooRank sprl'
2367    url: 'https://www.woorank.com/'
2368
2369- regex: 'Siteimprove'
2370  name: 'Siteimprove'
2371  category: 'Search bot'
2372  url: 'https://siteimprove.com/'
2373  producer:
2374    name: 'Siteimprove GmbH'
2375    url: 'https://siteimprove.com/'
2376
2377- regex: 'CATExplorador'
2378  name: 'CATExplorador'
2379  category: 'Search bot'
2380  url: 'https://fundacio.cat/ca/domini/'
2381  producer:
2382    name: 'Fundació puntCAT'
2383    url: 'https://fundacio.cat/ca/domini/'
2384
2385- regex: 'Buck'
2386  name: 'Buck'
2387  category: 'Search bot'
2388  url: 'https://hypefactors.com/'
2389  producer:
2390    name: 'Hypefactors A/S'
2391    url: 'https://hypefactors.com/'
2392
2393- regex: 'tracemyfile'
2394  name: 'TraceMyFile'
2395  category: 'Search bot'
2396  url: 'https://www.tracemyfile.com/'
2397  producer:
2398    name: 'Idee Inc.'
2399    url: 'http://ideeinc.com/'
2400
2401- regex: 'zelist\.ro feed parser'
2402  name: 'Ze List'
2403  url: 'https://www.zelist.ro/'
2404  category: 'Feed Fetcher'
2405  producer:
2406    name: 'Treeworks SRL'
2407    url: 'https://www.tree.ro/'
2408
2409- regex: 'weborama-fetcher'
2410  name: 'Weborama'
2411  category: 'Search bot'
2412  url: 'https://weborama.com/'
2413  producer:
2414    name: 'Weborama SA'
2415    url: 'https://weborama.com/'
2416
2417- regex: 'BoardReader Favicon Fetcher'
2418  name: 'BoardReader'
2419  category: 'Search bot'
2420  url: 'https://boardreader.com/'
2421  producer:
2422    name: 'Effyis Inc'
2423    url: 'https://boardreader.com/'
2424
2425- regex: 'IDG/(?:EU|IT|RU|UK)'
2426  name: 'IDG'
2427  category: 'Crawler'
2428  url: 'https://www.spaziodati.eu/'
2429  producer:
2430    name: 'SpazioDati S.r.l.'
2431    url: 'https://www.spaziodati.eu/'
2432
2433- regex: 'Bytespider'
2434  name: 'Bytespider'
2435  category: 'Search bot'
2436  url: 'https://bytedance.com/'
2437  producer:
2438    name: 'ByteDance Ltd.'
2439    url: 'https://bytedance.com/'
2440
2441- regex: 'WikiDo'
2442  name: 'WikiDo'
2443  category: 'Search bot'
2444  url: 'https://www.wikido.com/'
2445  producer:
2446    name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
2447    url: 'https://www.wikido.com/'
2448
2449- regex: 'Awario(?:Smart)?Bot'
2450  name: 'Awario'
2451  category: 'Search bot'
2452  url: 'https://awario.com/bots.html'
2453  producer:
2454    name: 'TechFusion Ltd.'
2455    url: 'https://www.techfusion.com.cy/'
2456
2457- regex: 'AwarioRssBot'
2458  name: 'Awario'
2459  category: 'Feed Fetcher'
2460  url: 'https://awario.com/bots.html'
2461  producer:
2462    name: 'TechFusion Ltd.'
2463    url: 'https://www.techfusion.com.cy/'
2464
2465- regex: 'oBot'
2466  name: 'oBot'
2467  category: 'Search bot'
2468  url: 'https://www.xforce-security.com/crawler/'
2469  producer:
2470    name: 'IBM Germany Research & Development GmbH'
2471    url: 'https://exchange.xforce.ibmcloud.com/'
2472
2473- regex: 'SMTBot'
2474  name: 'SMTBot'
2475  category: 'Search bot'
2476  url: 'https://www.similartech.com/smtbot'
2477  producer:
2478    name: 'SimilarTech Ltd.'
2479    url: 'https://www.similartech.com/'
2480
2481- regex: 'LCC'
2482  name: 'LCC'
2483  category: 'Search bot'
2484  url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
2485  producer:
2486    name: 'Universität Leipzig'
2487    url: 'https://www.uni-leipzig.de/'
2488
2489- regex: 'Startpagina-Linkchecker'
2490  name: 'Startpagina Linkchecker'
2491  category: 'Search bot'
2492  url: 'https://www.startpagina.nl/linkchecker'
2493  producer:
2494    name: 'Startpagina B.V.'
2495    url: 'https://www.startpagina.nl/'
2496
2497- regex: 'MoodleBot-Linkchecker'
2498  name: 'MoodleBot Linkchecker'
2499  category: 'Search bot'
2500  url: 'hhttps://docs.moodle.org/en/Usage'
2501  producer:
2502    name: 'Moodle Pty Ltd'
2503    url: 'https://moodle.org/'
2504
2505- regex: 'GTmetrix'
2506  name: 'GTmetrix'
2507  category: 'Crawler'
2508  url: 'https://gtmetrix.com/'
2509  producer:
2510    name: 'Carbon60 Operating Co. Ltd.'
2511    url: 'https://www.carbon60.com/'
2512
2513- regex: 'CyberFind ?Crawler'
2514  name: 'CyberFind Crawler'
2515  category: 'Crawler'
2516  url: 'https://www.cyberfind.net/bot.html'
2517  producer:
2518    name: 'Find.tf'
2519    url: 'https://find.tf/'
2520
2521- regex: 'Nutch'
2522  name: 'Nutch-based Bot'
2523  category: 'Crawler'
2524  url: 'https://nutch.apache.org'
2525  producer:
2526    name: 'The Apache Software Foundation'
2527    url: 'https://www.apache.org/foundation/'
2528
2529- regex: 'Seobility'
2530  name: 'Seobility'
2531  category: 'Crawler'
2532  url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
2533
2534- regex: 'Vercelbot'
2535  name: 'Vercel Bot'
2536  category: 'Service bot'
2537  url: 'https://vercel.com'
2538
2539- regex: 'Grammarly'
2540  name: 'Grammarly'
2541  category: 'Service bot'
2542  url: 'https://www.grammarly.com'
2543
2544- regex: 'Robozilla'
2545  name: 'Robozilla'
2546  category: 'Crawler'
2547
2548- regex: 'Domains Project'
2549  name: 'Domains Project'
2550  category: 'Crawler'
2551  url: 'https://domainsproject.org'
2552
2553- regex: 'PetalBot'
2554  name: 'Petal Bot'
2555  category: 'Crawler'
2556  url: 'https://aspiegel.com/petalbot'
2557
2558- regex: 'SerendeputyBot'
2559  name: 'Serendeputy Bot'
2560  category: 'Crawler'
2561  url: 'https://serendeputy.com/about/serendeputy-bot'
2562
2563- regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx\.com.*service-fetcher'
2564  name: 'ADmantX Service Fetcher'
2565  category: 'Service bot'
2566  url: 'https://www.admantx.com/service-fetcher.html'
2567
2568- regex: 'SemanticScholarBot'
2569  name: 'Semantic Scholar Bot'
2570  category: 'Crawler'
2571  url: 'https://www.semanticscholar.org/crawler'
2572
2573- regex: 'VelenPublicWebCrawler'
2574  name: 'Velen Public Web Crawler'
2575  category: 'Crawler'
2576  url: 'https://hunter.io/robot'
2577
2578- regex: 'Barkrowler'
2579  name: 'Barkrowler'
2580  category: 'Crawler'
2581  url: 'http://www.exensa.com/crawl'
2582
2583- regex: 'BDCbot'
2584  name: 'BDCbot'
2585  category: 'Crawler'
2586  url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
2587  producer:
2588    name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
2589    url: 'https://bigdatacorp.com.br/'
2590
2591- regex: 'adbeat'
2592  name: 'Adbeat'
2593  category: 'Crawler'
2594  url: 'https://www.adbeat.com/operation_policy'
2595  producer:
2596    name: 'PPC Labs LLC'
2597    url: 'https://www.adbeat.com/'
2598
2599- regex: '(?:BuiltWith|BW/)'
2600  name: 'BuiltWith'
2601  category: 'Crawler'
2602  url: 'https://builtwith.com/biup'
2603  producer:
2604    name: 'BuiltWith Pty Ltd'
2605    url: 'https://builtwith.com/'
2606
2607- regex: 'https://whatis\.contentkingapp\.com'
2608  name: 'ContentKing'
2609  category: 'Site Monitor'
2610  url: 'https://whatis.contentkingapp.com/'
2611  producer:
2612    name: 'ContentKing BV'
2613    url: 'https://www.contentkingapp.com/'
2614
2615- regex: 'MicroAdBot'
2616  name: 'MicroAdBot'
2617  category: 'Crawler'
2618  url: 'https://www.microad.co.jp/'
2619  producer:
2620    name: 'MicroAd, Inc.'
2621    url: 'https://www.microad.co.jp/'
2622
2623- regex: 'PingAdmin\.Ru'
2624  name: 'PingAdmin.Ru'
2625  category: 'Site Monitor'
2626  url: 'https://ping-admin.ru/'
2627
2628- regex: 'notifyninja.+monitoring'
2629  name: 'Notify Ninja'
2630  category: 'Site Monitor'
2631  url: 'http://notifyninja.com'
2632
2633- regex: 'WebDataStats'
2634  name: 'WebDataStats'
2635  category: 'Crawler'
2636  url: 'https://webdatastats.com/policy.html'
2637  producer:
2638    name: 'WebTehRazrabotka LLC'
2639    url: 'https://webdatastats.com/'
2640
2641- regex: 'parse\.ly scraper'
2642  name: 'parse.ly'
2643  category: 'Crawler'
2644  url: 'https://www.parse.ly/help/integration/crawler'
2645  producer:
2646    name: 'Parsely, Inc.'
2647    url: 'https://www.parse.ly/'
2648
2649- regex: 'Nimbostratus-Bot'
2650  name: 'Nimbostratus Bot'
2651  category: 'Site Monitor'
2652  url: 'http://cloudsystemnetworks.com'
2653
2654- regex: 'HeartRails_Capture'
2655  name: 'Heart Rails Capture'
2656  category: 'Service Agent'
2657  url: 'http://capture.heartrails.com'
2658
2659- regex: 'Project-Resonance'
2660  name: 'Project Resonance'
2661  category: 'Crawler'
2662  url: 'https://project-resonance.com/'
2663  producer:
2664    name: 'RedHunt Labs Limited'
2665    url: 'https://redhuntlabs.com/'
2666
2667- regex: 'DataXu'
2668  name: 'DataXu'
2669  category: 'Service Agent'
2670  url: 'https://advertising.roku.com/dataxu'
2671  producer:
2672    name: 'Roku, Inc.'
2673    url: 'https://roku.com'
2674
2675- regex: 'Cocolyzebot'
2676  name: 'Cocolyzebot'
2677  category: 'Crawler'
2678  url: 'https://cocolyze.com/en/cocolyzebot'
2679  producer:
2680    name: 'VSI INNOVATION SAS'
2681    url: 'https://vsi-innovation.com/'
2682
2683- regex: 'veryhip'
2684  name: 'VeryHip'
2685  category: 'Crawler'
2686  url: 'https://veryhip.com/'
2687  producer:
2688    name: 'VeryHip'
2689    url: 'https://veryhip.com/'
2690
2691- regex: 'LinkpadBot'
2692  name: 'LinkpadBot'
2693  category: 'Crawler'
2694  url: 'https://www.linkpad.org/'
2695  producer:
2696    name: 'Solomono LLC'
2697    url: 'https://www.linkpad.org/'
2698
2699- regex: 'MuscatFerret'
2700  name: 'MuscatFerret'
2701  category: 'Crawler'
2702  url: 'http://www.webtop.com/'
2703
2704- regex: 'PageThing\.com'
2705  name: 'PageThing'
2706  category: 'Crawler'
2707  url: 'https://www.pagething.com/'
2708  producer:
2709    name: 'SPECIALNOISE LTD'
2710    url: 'https://www.specialnoise.com/'
2711
2712- regex: 'ArchiveBox'
2713  name: 'ArchiveBox'
2714  url: 'https://archivebox.io/'
2715  category: 'Crawler'
2716  producer:
2717    name: ''
2718    url: ''
2719
2720- regex: 'Choosito'
2721  name: 'Choosito'
2722  url: 'https://www.choosito.com/'
2723  category: 'Crawler'
2724  producer:
2725    name: 'Choosito! Inc.'
2726    url: 'https://www.choosito.com/'
2727
2728- regex: 'datagnionbot'
2729  name: 'datagnionbot'
2730  url: 'https://www.datagnion.com/bot.html'
2731  category: 'Crawler'
2732  producer:
2733    name: 'DATAGNION GMBH'
2734    url: 'https://www.datagnion.com/'
2735
2736- regex: 'WhatCMS'
2737  name: 'WhatCMS'
2738  url: 'https://whatcms.org/'
2739  category: 'Crawler'
2740  producer:
2741    name: 'Nineteen Ten LLC'
2742    url: 'https://whatcms.org/'
2743
2744- regex: 'httpx'
2745  name: 'httpx'
2746  url: 'https://github.com/projectdiscovery/httpx'
2747  category: 'Crawler'
2748  producer:
2749    name: 'ProjectDiscovery, Inc.'
2750    url: 'https://projectdiscovery.io/'
2751
2752- regex: '.*\.oast\.'
2753  name: 'Interactsh'
2754  category: 'Security Checker'
2755  url: 'https://github.com/projectdiscovery/interactsh'
2756  producer:
2757    name: 'ProjectDiscovery, Inc.'
2758    url: 'https://projectdiscovery.io/'
2759
2760- regex: 'scaninfo@(?:expanseinc|paloaltonetworks)\.com'
2761  name: 'Expanse'
2762  category: 'Security Checker'
2763  url: 'https://expanse.co/'
2764  producer:
2765    name: 'Expanse Inc.'
2766    url: 'https://expanse.co/'
2767
2768- regex: 'HuaweiWebCatBot'
2769  name: 'HuaweiWebCatBot'
2770  category: 'Crawler'
2771  url: 'https://isecurity.huawei.com'
2772  producer:
2773    name: 'Huawei Technologies Co., Ltd.'
2774    url: 'https://huawei.com'
2775
2776- regex: 'Hatena-Favicon'
2777  name: 'Hatena Favicon'
2778  category: 'Crawler'
2779  url: 'https://www.hatena.ne.jp/faq/'
2780  producer:
2781    name: 'Hatena Co., Ltd.'
2782    url: 'https://www.hatena.ne.jp'
2783- regex: 'Hatena-?Bookmark'
2784  name: 'Hatena Bookmark'
2785  category: 'Crawler'
2786  url: 'https://www.hatena.ne.jp/faq/'
2787  producer:
2788    name: 'Hatena Co., Ltd.'
2789    url: 'https://www.hatena.ne.jp'
2790
2791- regex: 'RyowlEngine'
2792  name: 'Ryowl'
2793  category: 'Crawler'
2794  url: 'https://ryowl.org'
2795
2796- regex: 'OdklBot'
2797  name: 'Odnoklassniki Bot'
2798  category: 'Crawler'
2799  url: 'https://odnoklassniki.ru'
2800
2801- regex: 'Mediatoolkitbot'
2802  name: 'Mediatoolkit Bot'
2803  category: 'Crawler'
2804  url: 'https://mediatoolkit.com'
2805
2806- regex: 'ZoominfoBot'
2807  name: 'ZoominfoBot'
2808  category: 'Crawler'
2809  url: 'https://www.zoominfo.com'
2810
2811- regex: 'WeViKaBot'
2812  name: 'WeViKaBot'
2813  category: 'Crawler'
2814  url: 'http://www.wevika.de'
2815
2816- regex: 'SEOkicks'
2817  name: 'SEOkicks'
2818  category: 'Crawler'
2819  url: 'https://www.seokicks.de/robot.html'
2820  producer:
2821    name: 'SEOkicks'
2822    url: 'https://www.seokicks.de/'
2823
2824- regex: 'Plukkie'
2825  name: 'Plukkie'
2826  category: 'Crawler'
2827  url: 'http://www.botje.com/plukkie.htm'
2828
2829- regex: 'proximic;'
2830  name: 'Comscore'
2831  category: 'Crawler'
2832  url: 'https://www.comscore.com/Web-Crawler'
2833
2834- regex: 'SurdotlyBot'
2835  name: 'SurdotlyBot'
2836  category: 'Crawler'
2837  url: 'http://sur.ly/bot.html'
2838
2839- regex: 'Gowikibot'
2840  name: 'Gowikibot'
2841  category: 'Crawler'
2842  url: 'http:/www.gowikibot.com'
2843
2844- regex: 'SabsimBot'
2845  name: 'SabsimBot'
2846  category: 'Crawler'
2847  url: 'https://sabsim.com'
2848
2849- regex: 'LumtelBot'
2850  name: 'LumtelBot'
2851  category: 'Crawler'
2852  url: 'https://umtel.com'
2853
2854- regex: 'PiplBot'
2855  name: 'PiplBot'
2856  category: 'Crawler'
2857  url: 'http://www.pipl.com/bot'
2858
2859- regex: 'woobot'
2860  name: 'WooRank'
2861  category: 'Crawler'
2862  url: 'https://www.woorank.com/bot'
2863
2864- regex: 'Cookiebot'
2865  name: 'Cookiebot'
2866  category: 'Crawler'
2867  url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
2868  producer:
2869    name: 'Cybot A/S'
2870    url: 'https://www.cybot.com/'
2871
2872- regex: 'NetSystemsResearch'
2873  name: 'NetSystemsResearch'
2874  category: 'Security Checker'
2875  url: 'https://www.netsystemsresearch.com/'
2876  producer:
2877    name: 'NET SYSTEMS RESEARCH LLC'
2878    url: 'https://www.netsystemsresearch.com/'
2879
2880- regex: 'CensysInspect'
2881  name: 'CensysInspect'
2882  category: 'Security Checker'
2883  url: 'https://about.censys.io/'
2884  producer:
2885    name: 'Censys, Inc.'
2886    url: 'https://censys.io/'
2887
2888- regex: 'gdnplus\.com'
2889  name: 'GDNP'
2890  category: 'Crawler'
2891  url: 'https://gdnplus.com/'
2892  producer:
2893    name: 'Global Digital Network Plus, LLC'
2894    url: 'https://gdnplus.com/'
2895
2896- regex: 'WellKnownBot'
2897  name: 'WellKnownBot'
2898  category: 'Crawler'
2899  url: 'https://well-known.dev'
2900
2901- regex: 'Adsbot'
2902  name: 'Adsbot'
2903  category: 'Crawler'
2904  url: 'https://seostar.co/robot/'
2905
2906- regex: 'MTRobot'
2907  name: 'MTRobot'
2908  category: 'Crawler'
2909  url: 'https://metrics-tools.de/robot.html'
2910  producer:
2911    name: 'Metrics Tools'
2912    url: 'https://metrics-tools.de/'
2913
2914- regex: 'serpstatbot'
2915  name: 'serpstatbot'
2916  category: 'Crawler'
2917  url: 'http://serpstatbot.com/'
2918  producer:
2919    name: 'Netpeak Ltd'
2920    url: 'https://netpeak.net/'
2921
2922- regex: 'colly'
2923  name: 'colly'
2924  category: 'Crawler'
2925  url: 'https://github.com/gocolly/colly/'
2926
2927- regex: 'l9tcpid'
2928  name: 'l9tcpid'
2929  category: 'Security Checker'
2930  url: 'https://github.com/LeakIX/l9tcpid'
2931
2932- regex: 'l9explore'
2933  name: 'l9explore'
2934  category: 'Security Checker'
2935  url: 'https://github.com/LeakIX/l9explore'
2936
2937- regex: 'l9scan/|^Lkx-.*/'
2938  name: 'LeakIX'
2939  category: 'Security Checker'
2940  url: 'https://leakix.net/'
2941  producer:
2942    name: 'BaDaaS SRL'
2943    url: 'https://leakix.net/'
2944
2945- regex: 'MegaIndex\.ru'
2946  name: 'MegaIndex'
2947  category: 'Crawler'
2948  url: 'https://megaindex.com/crawler'
2949
2950- regex: 'Seekport'
2951  name: 'Seekport'
2952  category: 'Crawler'
2953  url: 'https://bot.seekport.com/'
2954  producer:
2955    name: 'SISTRIX GmbH'
2956    url: 'https://www.sistrix.de/'
2957
2958- regex: 'Seolyt(?:Bot)?'
2959  name: 'SeolytBot'
2960  category: 'Crawler'
2961  url: 'https://seolyt.com/'
2962
2963- regex: 'YaK/'
2964  name: 'YaK'
2965  category: 'Crawler'
2966  url: 'https://www.linkfluence.com/'
2967  producer:
2968    name: 'Linkfluence SAS'
2969    url: 'https://www.linkfluence.com/'
2970
2971- regex: 'KomodiaBot'
2972  name: 'KomodiaBot'
2973  category: 'Crawler'
2974  url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
2975  producer:
2976    name: 'Komodia Inc.'
2977    url: 'https://www.komodia.com/'
2978
2979- regex: 'KStandBot'
2980  name: 'KStandBot'
2981  category: 'Crawler'
2982  url: 'https://url-classification.io/wiki/index.php?title=URL_server_crawler'
2983  producer:
2984    name: 'Komodia Inc.'
2985    url: 'https://www.komodia.com/'
2986
2987- regex: 'Neevabot'
2988  name: 'Neevabot'
2989  category: 'Search bot'
2990  url: 'https://neeva.com/neevabot'
2991  producer:
2992    name: 'Neeva Inc.'
2993    url: 'https://neeva.com/'
2994
2995- regex: 'Chatwork LinkPreview'
2996  name: 'Chatwork LinkPreview'
2997  category: 'Service Agent'
2998  url: 'https://go.chatwork.com/en/'
2999  producer:
3000    name: 'kubell Co., Ltd.'
3001    url: 'https://www.kubell.com/en/'
3002
3003- regex: 'LinkPreview'
3004  name: 'LinkPreview'
3005  category: 'Service Agent'
3006  url: 'https://www.linkpreview.net/'
3007
3008- regex: 'JungleKeyThumbnail'
3009  name: 'JungleKeyThumbnail'
3010  category: 'Crawler'
3011  url: 'https://junglekey.com/'
3012
3013- regex: 'rocketmonitor(?:bot)?'
3014  name: 'RocketMonitorBot'
3015  category: 'Site Monitor'
3016  url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
3017  producer:
3018    name: 'Radio Mast, Inc.'
3019    url: 'https://www.radiomast.io/'
3020
3021- regex: 'SitemapParser-VIPnytt'
3022  name: 'SitemapParser-VIPnytt'
3023  category: 'Crawler'
3024  url: 'https://github.com/VIPnytt/SitemapParser/'
3025
3026- regex: '^Turnitin'
3027  name: 'Turnitin'
3028  category: 'Crawler'
3029  url: 'https://turnitin.com/robot/crawlerinfo.html'
3030
3031- regex: 'DMBrowser|DMBrowser-[UB]V'
3032  name: 'Dotcom Monitor'
3033  category: 'Site Monitor'
3034  url: 'https://www.dotcom-monitor.com'
3035
3036- regex: 'ThinkChaos/'
3037  name: 'ThinkChaos'
3038  category: 'Crawler'
3039
3040- regex: 'DataForSeoBot'
3041  name: 'DataForSeoBot'
3042  category: 'Crawler'
3043  url: 'https://dataforseo.com/dataforseo-bot'
3044
3045- regex: 'Discordbot'
3046  name: 'Discord Bot'
3047  category: 'Service Agent'
3048  url: 'https://discordapp.com'
3049
3050- regex: 'Linespider'
3051  name: 'Linespider'
3052  category: 'Crawler'
3053  url: 'https://lin.ee/4dwXkTH'
3054
3055- regex: 'Cincraw'
3056  name: 'Cincraw'
3057  category: 'Crawler'
3058  url: 'http://cincrawdata.net/bot/'
3059
3060- regex: 'CISPA Web Analyzer'
3061  name: 'CISPA Web Analyzer'
3062  category: 'Crawler'
3063  url: 'https://notify.cispa.de/'
3064  producer:
3065    name: 'CISPA - Helmholtz-Zentrum für Informationssicherheit gGmbH'
3066    url: 'https://cispa.de/en'
3067
3068- regex: 'IonCrawl'
3069  name: 'IONOS Crawler'
3070  category: 'Crawler'
3071  url: 'https://www.ionos.de/terms-gtc/faq-crawler-en/'
3072  producer:
3073    name: 'IONOS SE'
3074    url: 'https://www.ionos.de/'
3075
3076- regex: 'Crawldad'
3077  name: 'Crawldad'
3078  category: 'Crawler'
3079  url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972'
3080
3081- regex: 'https://securitytxt-scan\.cs\.hm\.edu/'
3082  name: 'security.txt scanserver'
3083  category: 'Security Checker'
3084  url: 'https://securitytxt-scan.cs.hm.edu/'
3085  producer:
3086    name: 'Hochschule für angewandte Wissenschaften München'
3087    url: 'https://www.hm.edu/'
3088
3089- regex: 'TigerBot'
3090  name: 'TigerBot'
3091  category: 'Crawler'
3092  url: 'https://tiger.ch/'
3093
3094- regex: 'TestCrawler'
3095  name: 'TestCrawler'
3096  category: 'Crawler'
3097  url: 'https://www.comcepta.com/'
3098
3099- regex: 'CrowdTanglebot'
3100  name: 'CrowdTangle'
3101  category: 'Crawler'
3102  url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
3103  producer:
3104    name: 'CrowdTangle, Inc.'
3105    url: 'https://www.crowdtangle.com/'
3106
3107- regex: 'Sellers\.Guide Crawler by Primis'
3108  name: 'Sellers.Guide'
3109  category: 'Crawler'
3110  url: 'https://sellers.guide/'
3111  producer:
3112    name: 'McCann Disciplines, Ltd.'
3113    url: 'https://www.primis.tech/'
3114
3115- regex: 'OnalyticaBot'
3116  name: 'Onalytica'
3117  category: 'Crawler'
3118  url: 'https://www.airslate.com/bot/explore/onalytica-bot'
3119  producer:
3120    name: 'airSlate, Inc.'
3121    url: 'https://www.airslate.com/'
3122
3123- regex: 'deepnoc'
3124  name: 'deepnoc'
3125  category: 'Crawler'
3126  url: 'https://deepnoc.com/bot'
3127  producer:
3128    name: 'deepnoc, GmbH'
3129    url: 'https://deepnoc.com/'
3130
3131- regex: 'Newslitbot'
3132  name: 'Newslitbot'
3133  category: 'Crawler'
3134  url: 'https://www.newslit.co/'
3135  producer:
3136    name: 'Newslit, LLC.'
3137    url: 'https://www.newslit.co/'
3138
3139- regex: 'um-(?:ANS|CC|FC|IC|LN)'
3140  name: 'uMBot'
3141  category: 'Crawler'
3142  url: 'https://www.ubermetrics-technologies.com/'
3143  producer:
3144    name: 'Ubermetrics Technologies GmbH'
3145    url: 'https://www.ubermetrics-technologies.com/'
3146
3147- regex: 'Abonti'
3148  name: 'Abonti'
3149  category: 'Crawler'
3150  url: 'http://abonti.com/'
3151
3152- regex: 'collection@infegy\.com'
3153  name: 'Infegy'
3154  category: 'Crawler'
3155  url: 'https://infegy.com/'
3156  producer:
3157    name: 'Infegy, Inc.'
3158    url: 'https://infegy.com/'
3159
3160- regex: 'HTTP Banner Detection \(https://security\.ipip\.net\)'
3161  name: 'IPIP'
3162  category: 'Security Checker'
3163  url: 'https://security.ipip.net/'
3164  producer:
3165    name: 'Beijing Tiantexin Tech. Co., Ltd.'
3166    url: 'https://en.ipip.net/'
3167
3168- regex: 'ev-crawler'
3169  name: 'Headline'
3170  category: 'Crawler'
3171  url: 'https://headline.com/legal/crawler'
3172  producer:
3173    name: 'e.ventures Managementgesellschaft mbH'
3174    url: 'https://headline.com/'
3175
3176- regex: 'webprosbot'
3177  name: 'WebPros'
3178  category: 'Crawler'
3179  url: 'https://webpros.com/'
3180  producer:
3181    name: 'WebPros Holdco B.V.'
3182    url: 'https://webpros.com/'
3183
3184- regex: 'ELB-HealthChecker'
3185  name: 'Amazon ELB'
3186  category: 'Site Monitor'
3187  url: 'https://aws.amazon.com/elasticloadbalancing/'
3188  producer:
3189    name: 'Amazon.com, Inc.'
3190    url: 'https://www.amazon.com/'
3191
3192- regex: 'Wheregoes\.com Redirect Checker'
3193  name: 'WhereGoes'
3194  category: 'Crawler'
3195  url: 'https://wheregoes.com/'
3196
3197- regex: 'project_patchwatch'
3198  name: 'Project Patchwatch'
3199  category: 'Crawler'
3200  url: 'http://66.240.192.82/'
3201
3202- regex: 'InternetMeasurement'
3203  name: 'InternetMeasurement'
3204  category: 'Crawler'
3205  url: 'https://internet-measurement.com/'
3206
3207- regex: 'DomainAppender'
3208  name: 'DomainAppender'
3209  category: 'Crawler'
3210  url: 'https://www.profound.net/product/domain_append/'
3211  producer:
3212    name: 'Profound Networks, LLC'
3213    url: 'https://www.profound.net/'
3214
3215- regex: 'FreeWebMonitoring SiteChecker'
3216  name: 'FreeWebMonitoring'
3217  category: 'Site Monitor'
3218  url: 'https://www.freewebmonitoring.com/bot.html'
3219  producer:
3220    name: 'GreenWave Online, Inc.'
3221    url: 'http://www.greenwaveonline.com/'
3222
3223- regex: 'Page Modified Pinger'
3224  name: 'Page Modified Pinger'
3225  category: 'Site Monitor'
3226  url: 'https://www.pagemodified.com/'
3227  producer:
3228    name: 'Valley Hosting, LLC'
3229    url: 'https://www.pagemodified.com/'
3230
3231- regex: 'adstxtlab\.com'
3232  name: 'adstxtlab.com'
3233  category: 'Crawler'
3234  url: 'https://adstxtlab.com/validator.php'
3235  producer:
3236    name: 'Jaohawi AB'
3237    url: 'https://adstxtlab.com/'
3238
3239- regex: 'Iframely'
3240  name: 'Iframely'
3241  category: 'Crawler'
3242  url: 'https://iframely.com/'
3243  producer:
3244    name: 'Itteco Software, Corp.'
3245    url: 'https://iframely.com/'
3246
3247- regex: 'DomainStatsBot'
3248  name: 'DomainStatsBot'
3249  category: 'Crawler'
3250  url: 'https://domainstats.com/pages/our-bot'
3251  producer:
3252    name: 'Domainstats Ltd'
3253    url: 'https://domainstats.com/'
3254
3255- regex: 'aiHitBot'
3256  name: 'aiHitBot'
3257  category: 'Crawler'
3258  url: 'https://www.aihitdata.com/about'
3259
3260- regex: 'DomainCrawler/'
3261  name: 'DomainCrawler'
3262  category: 'Crawler'
3263  url: 'https://domaincrawler.com/about-us/'
3264
3265- regex: 'DNSResearchBot'
3266  name: 'DNSResearchBot'
3267  category: 'Crawler'
3268
3269- regex: 'GitCrawlerBot'
3270  name: 'GitCrawlerBot'
3271  category: 'Crawler'
3272
3273- regex: 'AdAuth'
3274  name: 'AdAuth'
3275  category: 'Crawler'
3276  url: 'https://www.adauth.com'
3277
3278- regex: 'faveeo\.com'
3279  name: 'Faveeo'
3280  category: 'Crawler'
3281  url: 'http://www.faveeo.com'
3282
3283- regex: 'kozmonavt\.'
3284  name: 'Kozmonavt'
3285  category: 'Crawler'
3286  url: 'https://kozmonavt.ml'
3287
3288- regex: 'CriteoBot/'
3289  name: 'CriteoBot'
3290  category: 'Crawler'
3291  url: 'https://www.criteo.com/criteo-crawler/'
3292
3293- regex: 'PayPal IPN'
3294  name: 'PayPal IPN'
3295  category: 'Service Agent'
3296  url: 'https://developer.paypal.com/api/nvp-soap/ipn/IPNIntro/'
3297  producer:
3298    name: 'PayPal, Inc.'
3299    url: 'https://www.paypal.com/'
3300
3301- regex: 'MaCoCu'
3302  name: 'MaCoCu'
3303  category: 'Crawler'
3304  url: 'https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data/'
3305  producer:
3306    name: 'Jožef Stefan Institute'
3307    url: 'https://www.ijs.si/ijsw/JSI'
3308
3309- regex: 'CLASSLA'
3310  name: 'CLASSLA-web'
3311  category: 'Crawler'
3312  url: 'https://www.clarin.si/info/classla-web-crawler/'
3313  producer:
3314    name: 'Jožef Stefan Institute'
3315    url: 'https://www.ijs.si/ijsw/JSI'
3316
3317- regex: 'dnt-policy@eff\.org'
3318  name: 'EFF Do Not Track Verifier'
3319  category: 'Crawler'
3320  url: 'https://www.eff.org/issues/do-not-track'
3321  producer:
3322    name: 'Electronic Frontier Foundation'
3323    url: 'https://www.eff.org/'
3324
3325- regex: 'InfoTigerBot'
3326  name: 'InfoTigerBot'
3327  category: 'Crawler'
3328  url: 'https://infotiger.com/bot'
3329  producer:
3330    name: 'Infotiger UG'
3331    url: 'https://infotiger.com/'
3332
3333- regex: '(?:Birdcrawlerbot|CrawlaDeBot)'
3334  name: 'Birdcrawlerbot'
3335  category: 'Crawler'
3336  url: 'https://crawla.de/de/index.php'
3337  producer:
3338    name: 'Swoppen Systems GmbH'
3339    url: 'https://www.swoppen.com/de'
3340
3341- regex: 'ScamadviserExternalHit'
3342  name: 'Scamadviser External Hit'
3343  category: 'Crawler'
3344  url: 'https://www.scamadviser.com/'
3345  producer:
3346    name: 'Ecommerce Operations B.V.'
3347    url: 'https://www.scamadviser.com/'
3348
3349- regex: 'ZaldamoSearchBot'
3350  name: 'Zaldamo'
3351  category: 'Crawler'
3352  url: 'https://www.zaldamo.com/search.html'
3353  producer:
3354    name: 'Zaldamo, LLC.'
3355    url: 'https://www.zaldamo.com/'
3356
3357- regex: 'AFB'
3358  name: 'Allloadin Favicon Bot'
3359  category: 'Crawler'
3360  url: 'https://allloadin.com/'
3361
3362- regex: 'LinkWalker'
3363  name: 'LinkWalker'
3364  category: 'Crawler'
3365  url: 'https://www.phishlabs.com/'
3366  producer:
3367    name: 'PhishLabs, Inc.'
3368    url: 'https://www.phishlabs.com/'
3369
3370- regex: 'RenovateBot'
3371  name: 'RenovateBot'
3372  category: 'Security Checker'
3373  url: 'https://github.com/renovatebot/renovate'
3374  producer:
3375    name: 'White Source Ltd.'
3376    url: 'https://www.mend.io/free-developer-tools/renovate/'
3377
3378- regex: 'INETDEX-BOT'
3379  name: 'Inetdex Bot'
3380  category: 'Crawler'
3381  url: 'https://www.inetdex.com/'
3382
3383- regex: 'NETZZAPPEN'
3384  name: 'NETZZAPPEN'
3385  category: 'Crawler'
3386  url: 'https://www.netzzappen.com/'
3387  producer:
3388    name: 'Marc Huemer'
3389    url: 'https://www.netzzappen.com/'
3390
3391- regex: 'panscient\.com'
3392  name: 'Panscient'
3393  category: 'Crawler'
3394  url: 'https://www.panscient.com/faq.htm'
3395  producer:
3396    name: 'Panscient, Inc.'
3397    url: 'https://www.panscient.com/'
3398
3399- regex: 'research@pdrlabs\.net'
3400  name: 'PDR Labs'
3401  category: 'Security Checker'
3402  url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
3403  producer:
3404    name: 'PDR Labs'
3405    url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
3406
3407- regex: 'Nicecrawler'
3408  name: 'NiceCrawler'
3409  category: 'Crawler'
3410  url: 'https://www.nicecrawler.com/'
3411  producer:
3412    name: 'Intelium Corp.'
3413    url: 'https://www.intelium.com/'
3414
3415- regex: 't3versionsBot'
3416  name: 't3versions'
3417  category: 'Crawler'
3418  url: 'https://www.t3versions.com/bot'
3419  producer:
3420    name: 'Torben Hansen'
3421    url: 'https://www.t3versions.com/'
3422
3423- regex: 'Crawlson'
3424  name: 'Crawlson'
3425  category: 'Crawler'
3426  url: 'https://www.crawlson.com/about'
3427  producer:
3428    name: 'Crawlson'
3429    url: 'https://www.crawlson.com/'
3430
3431- regex: 'tchelebi'
3432  name: 'tchelebi'
3433  category: 'Crawler'
3434  url: 'https://tchelebi.io/'
3435  producer:
3436    name: 'NormShield, Inc.'
3437    url: 'https://blackkite.com/'
3438
3439- regex: 'JobboerseBot'
3440  name: 'JobboerseBot'
3441  category: 'Crawler'
3442  url: 'https://www.xing.com/jobs'
3443  producer:
3444    name: 'New Work SE'
3445    url: 'https://www.xing.com/'
3446
3447- regex: 'RepoLookoutBot'
3448  name: 'Repo Lookout'
3449  category: 'Security Checker'
3450  url: 'https://www.repo-lookout.org/'
3451  producer:
3452    name: 'Crissy Field GmbH'
3453    url: 'https://www.crissyfield.de/'
3454
3455- regex: 'PATHspider'
3456  name: 'PATHspider'
3457  category: 'Security Checker'
3458  url: 'https://pathspider.net/'
3459  producer:
3460    name: 'MAMI Project'
3461    url: 'https://mami-project.eu/'
3462
3463- regex: 'everyfeed-spider'
3464  name: 'Everyfeed'
3465  url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/'
3466  category: 'Feed Fetcher'
3467  producer:
3468    name: ''
3469    url: ''
3470
3471- regex: 'Exchange check'
3472  name: 'Exchange check'
3473  category: 'Security Checker'
3474  url: 'https://github.com/GossiTheDog/scanning'
3475  producer:
3476    name: 'Kevin Beaumont'
3477    url: 'https://doublepulsar.com/'
3478
3479- regex: 'Sublinq'
3480  name: 'Sublinq'
3481  category: 'Crawler'
3482  url: 'https://web.archive.org/web/20220626191617/https://sublinq.com/'
3483  producer:
3484    name: ''
3485    url: ''
3486
3487- regex: 'Gregarius'
3488  name: 'Gregarius'
3489  category: 'Feed Fetcher'
3490  url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/'
3491  producer:
3492    name: ''
3493    url: ''
3494
3495- regex: 'COMODO DCV'
3496  name: 'COMODO DCV'
3497  category: 'Service Agent'
3498  url: 'https://www.comodo.com/'
3499  producer:
3500    name: 'Comodo Security Solutions, Inc.'
3501    url: 'https://www.comodo.com/'
3502
3503- regex: 'Sectigo DCV|acme\.sectigo\.com'
3504  name: 'Sectigo DCV'
3505  category: 'Service Agent'
3506  url: 'https://sectigo.com/'
3507  producer:
3508    name: 'Sectigo Limited'
3509    url: 'https://sectigo.com/'
3510
3511- regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)'
3512  name: 'KlarnaBot'
3513  category: 'Crawler'
3514  url: 'https://docs.klarna.com/klarna-bot/'
3515  producer:
3516    name: 'Klarna Bank AB'
3517    url: 'https://www.klarna.com/'
3518
3519- regex: 'Taboolabot'
3520  name: 'Taboolabot'
3521  category: 'Crawler'
3522  url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler'
3523  producer:
3524    name: 'Taboola, Inc.'
3525    url: 'https://www.taboola.com/'
3526
3527- regex: 'Asana'
3528  name: 'Asana'
3529  category: 'Crawler'
3530  url: 'https://asana.com/'
3531  producer:
3532    name: 'Asana, Inc.'
3533    url: 'https://asana.com/'
3534
3535- regex: 'Chrome Privacy Preserving Prefetch Proxy'
3536  name: 'Chrome Privacy Preserving Prefetch Proxy'
3537  category: 'Service Agent'
3538  url: 'https://developer.chrome.com/blog/private-prefetch-proxy/'
3539  producer:
3540    name: 'Google Inc.'
3541    url: 'https://www.google.com/'
3542
3543- regex: 'URLinspectorBot'
3544  name: 'URLinspector'
3545  category: 'Site Monitor'
3546  url: 'https://www.urlinspector.com/bot/'
3547  producer:
3548    name: 'LinkResearchTools GmbH'
3549    url: 'https://www.linkresearchtools.com/'
3550
3551- regex: 'EntferBot'
3552  name: 'Entfer'
3553  category: 'Crawler'
3554  url: 'https://entfer.com/'
3555  producer:
3556    name: 'Entfer Ltd.'
3557    url: 'https://entfer.com/'
3558
3559- regex: 'TagInspector'
3560  name: 'Tag Inspector'
3561  category: 'Crawler'
3562  url: 'https://taginspector.com/'
3563  producer:
3564    name: 'InfoTrust, LLC'
3565    url: 'https://infotrust.com/'
3566
3567- regex: 'pageburst'
3568  name: 'Pageburst'
3569  category: 'Crawler'
3570  url: 'https://pageburstls.elsevier.com/'
3571  producer:
3572    name: 'Elsevier Ltd'
3573    url: 'https://www.elsevier.com/'
3574
3575- regex: '.+diffbot'
3576  name: 'Diffbot'
3577  category: 'Crawler'
3578  url: 'https://docs.diffbot.com/docs/getting-started-with-crawl'
3579  producer:
3580    name: 'Diffbot Technologies Corp.'
3581    url: 'https://www.diffbot.com/'
3582
3583- regex: 'DisqusAdstxtCrawler'
3584  name: 'Disqus'
3585  category: 'Crawler'
3586  url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide'
3587  producer:
3588    name: 'Disqus, Inc.'
3589    url: 'https://disqus.com/'
3590
3591- regex: 'startmebot'
3592  name: 'start.me'
3593  category: 'Crawler'
3594  url: 'https://about.start.me/'
3595  producer:
3596    name: 'start.me BV'
3597    url: 'https://about.start.me/'
3598
3599- regex: '2ip bot'
3600  name: '2ip'
3601  category: 'Crawler'
3602  url: 'https://2ip.io/'
3603
3604- regex: 'ReqBin Curl Client'
3605  name: 'ReqBin'
3606  category: 'Crawler'
3607  url: 'https://reqbin.com/curl'
3608
3609- regex: 'XoviBot'
3610  name: 'XoviBot'
3611  category: 'Crawler'
3612  url: 'https://www.xovibot.net'
3613  producer:
3614    name: 'Xovi GmbH'
3615    url: 'http://www.xovi.de'
3616
3617- regex: 'Overcast/.+Podcast Sync'
3618  name: 'Overcast Podcast Sync'
3619  category: 'Service Agent'
3620  url: 'https://overcast.fm/podcasterinfo'
3621
3622- regex: '^Verity'
3623  name: 'GumGum Verity'
3624  category: 'Service Agent'
3625  url: 'https://gumgum.com/verity'
3626
3627- regex: 'hackermention'
3628  name: 'hackermention'
3629  category: 'Feed Reader'
3630  url: 'https://github.com/snarfed/hackermention'
3631
3632- regex: 'BitSightBot'
3633  name: 'BitSight'
3634  category: 'Security Checker'
3635  url: 'https://www.bitsight.com/'
3636  producer:
3637    name: 'BitSight Technologies, Inc.'
3638    url: 'https://www.bitsight.com/'
3639
3640- regex: 'Ezgif'
3641  name: 'Ezgif'
3642  category: 'Service Agent'
3643  url: 'https://ezgif.com/about'
3644
3645- regex: 'intelx\.io_bot'
3646  name: 'Intelligence X'
3647  category: 'Crawler'
3648  url: 'https://intelx.io/'
3649  producer:
3650    name: 'Kleissner Investments s.r.o.'
3651    url: 'https://intelx.io/'
3652
3653- regex: 'FemtosearchBot'
3654  name: 'Femtosearch'
3655  category: 'Crawler'
3656  url: 'http://femtosearch.com/'
3657  producer:
3658    name: 'Grier Forensics, LLC'
3659    url: 'https://www.grierforensics.com/'
3660
3661- regex: 'AdsTxtCrawler/'
3662  name: 'AdsTxtCrawler'
3663  category: 'Crawler'
3664  url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler'
3665  producer:
3666    name: 'IAB Technology Laboratory, Inc.'
3667    url: 'https://iabtechlab.com/'
3668
3669- regex: 'Morningscore'
3670  name: 'Morningscore Bot'
3671  category: 'Crawler'
3672  url: 'https://morningscore.io/'
3673  producer:
3674    name: 'Morningscore'
3675    url: 'https://morningscore.io/'
3676
3677- regex: 'Uptime-Kuma'
3678  name: 'Uptime-Kuma'
3679  category: 'Site Monitor'
3680  url: 'https://github.com/louislam/uptime-kuma'
3681
3682- regex: 'OAI-SearchBot'
3683  name: 'OAI-SearchBot'
3684  category: 'Crawler'
3685  url: 'https://platform.openai.com/docs/bots'
3686  producer:
3687    name: 'OpenAI OpCo, LLC'
3688    url: 'https://openai.com/'
3689
3690- regex: 'GPTBot'
3691  name: 'GPTBot'
3692  category: 'Crawler'
3693  url: 'https://platform.openai.com/docs/bots'
3694  producer:
3695    name: 'OpenAI OpCo, LLC'
3696    url: 'https://openai.com/'
3697
3698- regex: 'ChatGPT-User'
3699  name: 'ChatGPT-User'
3700  category: 'Crawler'
3701  url: 'https://platform.openai.com/docs/bots'
3702  producer:
3703    name: 'OpenAI OpCo, LLC'
3704    url: 'https://openai.com/'
3705
3706- regex: 'BrightEdge Crawler'
3707  name: 'BrightEdge'
3708  category: 'Crawler'
3709  url: 'https://www.brightedge.com/'
3710  producer:
3711    name: 'BrightEdge Technologies, Inc'
3712    url: 'https://www.brightedge.com/'
3713
3714- regex: 'sfFeedReader'
3715  name: 'sfFeedReader'
3716  url: 'https://github.com/diem-project/sfFeed2Plugin'
3717  category: 'Feed Fetcher'
3718
3719- regex: 'cyberscan\.io'
3720  name: 'Cyberscan'
3721  category: 'Security Checker'
3722  url: 'https://www.cyberscan.io/'
3723  producer:
3724    name: 'DGC Verwaltungs GmbH'
3725    url: 'https://dgc.org/'
3726
3727- regex: 'researchscan\.comsys\.rwth-aachen\.de'
3728  name: 'Research Scan'
3729  category: 'Crawler'
3730  url: 'http://researchscan.comsys.rwth-aachen.de/'
3731  producer:
3732    name: 'RWTH Aachen University'
3733    url: 'https://www.comsys.rwth-aachen.de/'
3734
3735- regex: 'newspaper'
3736  name: 'Scraping Robot'
3737  category: 'Crawler'
3738  url: 'https://scrapingrobot.com/'
3739  producer:
3740    name: 'Sprious LLC'
3741    url: 'https://sprious.com/'
3742
3743- regex: 'Ant(?:\.com beta|Bot)'
3744  name: 'Ant'
3745  category: 'Crawler'
3746  url: 'https://www.ant.com/'
3747  producer:
3748    name: 'Ant.com Ltd.'
3749    url: 'https://www.ant.com/'
3750
3751- regex: 'WebwikiBot'
3752  name: 'Webwiki'
3753  category: 'Crawler'
3754  url: 'https://www.webwiki.com/'
3755  producer:
3756    name: 'webwiki GmbH'
3757    url: 'https://www.webwiki.com/'
3758
3759- regex: 'phpMyAdmin'
3760  name: 'phpMyAdmin'
3761  category: 'Service Agent'
3762  url: 'https://www.phpmyadmin.net/'
3763
3764- regex: 'Matomo/[\d.]+'
3765  name: 'Matomo'
3766  category: 'Service Agent'
3767  url: 'https://github.com/matomo-org/matomo'
3768  producer:
3769    name: 'InnoCraft Ltd'
3770    url: 'https://matomo.org/'
3771
3772- regex: 'Prometheus'
3773  name: 'Prometheus'
3774  category: 'Service Agent'
3775  url: 'https://github.com/prometheus/prometheus'
3776  producer:
3777    name: 'The Linux Foundation'
3778    url: 'https://www.cncf.io/'
3779
3780- regex: 'ArchiveTeam ArchiveBot'
3781  name: 'ArchiveBot'
3782  category: 'Crawler'
3783  url: 'https://wiki.archiveteam.org/index.php?title=ArchiveBot'
3784  producer:
3785    name: 'ArchiveTeam'
3786    url: 'https://wiki.archiveteam.org/'
3787
3788- regex: 'MADBbot'
3789  name: 'MADBbot'
3790  category: 'Crawler'
3791  url: 'https://madb.zapto.org/bot.html'
3792
3793- regex: 'MeltwaterNews'
3794  name: 'MeltwaterNews'
3795  category: 'Crawler'
3796  producer:
3797    name: 'Meltwater Deutschland GmbH'
3798    url: 'https://www.meltwater.com/'
3799
3800- regex: 'owler'
3801  name: 'OWLer'
3802  category: 'Crawler'
3803  url: 'https://openwebsearch.eu/owler/'
3804  producer:
3805    name: 'Open Search Foundation e.V.'
3806    url: 'https://openwebsearch.eu/'
3807
3808- regex: 'bbc\.co\.uk/display/men/Page\+Monitor'
3809  name: 'BBC Page Monitor'
3810  category: 'Site Monitor'
3811  url: 'https://confluence.dev.bbc.co.uk/display/men/Page+Monitor'
3812  producer:
3813    name: 'BBC'
3814    url: 'https://www.bbc.com/'
3815
3816- regex: 'BBC-Forge-URL-Monitor-Twisted'
3817  name: 'BBC Forge URL Monitor'
3818  category: 'Site Monitor'
3819  url: 'https://www.bbc.com/'
3820  producer:
3821    name: 'BBC'
3822    url: 'https://www.bbc.com/'
3823
3824- regex: 'ClaudeBot'
3825  name: 'ClaudeBot'
3826  category: 'Crawler'
3827  url: 'https://github.com/ClaudeBot/ClaudeBot'
3828
3829- regex: 'Imagesift'
3830  name: 'ImageSift'
3831  category: 'Crawler'
3832  url: 'https://imagesift.com/'
3833  producer:
3834    name: 'Castle Global, Inc.'
3835    url: 'https://thehive.ai/'
3836
3837- regex: 'TactiScout'
3838  name: 'TactiScout'
3839  category: 'Crawler'
3840  url: 'https://find-it.world/TempCrawl/Crawltheque.php'
3841  producer:
3842    name: 'Tactikast'
3843
3844- regex: 'Brightbot'
3845  name: 'BrightBot'
3846  category: 'Crawler'
3847  url: 'https://www.brightbot.app/'
3848  producer:
3849    name: 'Bright Interactive Ltd'
3850    url: 'https://www.builtbybright.com/'
3851
3852- regex: 'DaspeedBot'
3853  name: 'DaspeedBot'
3854  category: 'Crawler'
3855  url: 'https://daspeed.io/'
3856  producer:
3857    name: 'DAWAP SARL'
3858    url: 'https://dawap.fr/'
3859
3860- regex: 'StractBot'
3861  name: 'Stract'
3862  category: 'Crawler'
3863  url: 'https://stract.com/webmasters'
3864  producer:
3865    name: 'Stract'
3866    url: 'https://github.com/StractOrg/stract/'
3867
3868- regex: 'GeedoBot'
3869  name: 'GeedoBot'
3870  category: 'Crawler'
3871  url: 'https://geedo.com/bot/'
3872
3873- regex: 'GeedoProductSearch'
3874  name: 'GeedoProductSearch'
3875  category: 'Crawler'
3876  url: 'https://geedo.com/product-search/'
3877
3878- regex: 'BackupLand'
3879  name: 'BackupLand'
3880  category: 'Crawler'
3881  url: 'https://go.backupland.com/'
3882  producer:
3883    name: 'ООО «КВАРТА»'
3884    url: 'https://go.backupland.com/'
3885
3886- regex: 'Konturbot'
3887  name: 'Konturbot'
3888  category: 'Crawler'
3889  url: 'https://kontur.ru/'
3890  producer:
3891    name: 'АО «ПФ «СКБ Контур»'
3892    url: 'https://kontur.ru/'
3893
3894- regex: 'keys-so-bot'
3895  name: 'Keys.so'
3896  category: 'Crawler'
3897  url: 'https://www.keys.so/'
3898  producer:
3899    name: 'ООО «МОДЕСКО»'
3900    url: 'https://www.modesco.ru/'
3901
3902- regex: 'LetsearchBot'
3903  name: 'LetSearch'
3904  category: 'Crawler'
3905  url: 'https://letsearch.ru/bots'
3906
3907- regex: 'Example3'
3908  name: 'Example3'
3909  category: 'Crawler'
3910  url: 'https://www.example3.com/'
3911
3912- regex: 'StatOnlineRuBot'
3913  name: 'StatOnline.ru'
3914  category: 'Crawler'
3915  url: 'https://statonline.ru/'
3916  producer:
3917    name: 'ООО «Регистратор доменных имен РЕГ.РУ»'
3918    url: 'https://statonline.ru/'
3919
3920- regex: 'Spawning-AI'
3921  name: 'Spawning AI'
3922  category: 'Crawler'
3923  url: 'https://spawning.ai/'
3924  producer:
3925    name: 'Spawning, Inc'
3926    url: 'https://spawning.ai/'
3927
3928- regex: 'domain research project'
3929  name: 'Domain Research Project'
3930  category: 'Crawler'
3931  url: 'https://trentwil.es/domains.html'
3932  producer:
3933    name: 'Trent Wiles'
3934    url: 'https://trentwil.es/'
3935
3936- regex: 'getodin\.com'
3937  name: 'Odin'
3938  category: 'Security Checker'
3939  url: 'https://docs.getodin.com/'
3940  producer:
3941    name: 'Cyble Inc.'
3942    url: 'https://cyble.com/'
3943
3944- regex: 'YouBot'
3945  name: 'YouBot'
3946  category: 'Crawler'
3947  url: 'https://about.you.com/youbot/'
3948  producer:
3949    name: 'SuSea, Inc.'
3950    url: 'https://you.com/'
3951
3952- regex: 'SiteScoreBot'
3953  name: 'SiteScore'
3954  category: 'Crawler'
3955  url: 'https://sitescore.ai/'
3956
3957- regex: 'MBCrawler'
3958  name: 'Monitor Backlinks'
3959  category: 'Crawler'
3960  url: 'https://www.seoptimer.com/monitor-backlinks/'
3961  producer:
3962    name: 'SEOptimer'
3963    url: 'https://www.seoptimer.com/'
3964
3965- regex: 'mariadb-mysql-kbs-bot'
3966  name: 'MariaDB/MySQL Knowledge Base'
3967  category: 'Crawler'
3968  url: 'https://github.com/williamdes/mariadb-mysql-kbs'
3969  producer:
3970    name: 'WDES SAS'
3971    url: 'https://wdes.fr/en/'
3972
3973- regex: 'GitHubCopilotChat'
3974  name: 'GitHubCopilotChat'
3975  category: 'Crawler'
3976  url: 'https://github.com/aaamoon/copilot-gpt4-service'
3977
3978- regex: '^pdrl\.fm'
3979  name: 'Podroll Analyzer'
3980  category: 'Crawler'
3981  url: 'https://podroll.fm'
3982
3983- regex: 'PodUptime/'
3984  name: 'PodUptime'
3985  category: 'Site Monitor'
3986  url: 'https://poduptime.com'
3987
3988- regex: 'anthropic-ai'
3989  name: 'Anthropic AI'
3990  category: 'Crawler'
3991  url: 'https://www.anthropic.com/'
3992  producer:
3993    name: 'Anthropic, PBC'
3994    url: 'https://www.anthropic.com/'
3995
3996- regex: 'NetpeakCheckerBot'
3997  name: 'Netpeak Checker'
3998  category: 'Crawler'
3999  url: 'https://netpeaksoftware.com/checker'
4000  producer:
4001    name: 'Netpeak LTD'
4002    url: 'https://netpeaksoftware.com/'
4003
4004- regex: 'SandobaCrawler'
4005  name: 'Sandoba//Crawler'
4006  category: 'Crawler'
4007  url: 'https://www.sandoba.com/en/crawler/'
4008  producer:
4009    name: 'SANDOBA//EBUSINESS SOLUTIONS'
4010    url: 'https://www.sandoba.com/'
4011
4012- regex: 'SirdataBot'
4013  name: 'Sirdata'
4014  category: 'Crawler'
4015  url: 'https://semantic-api.docs.sirdata.net/contextual-api/contextual-api/introduction'
4016  producer:
4017    name: 'Sirdata SAS'
4018    url: 'https://www.sirdata.com/'
4019
4020- regex: 'CheckMarkNetwork'
4021  name: 'CheckMark Network'
4022  category: 'Crawler'
4023  url: 'https://www.checkmarknetwork.com/spider.html/'
4024  producer:
4025    name: 'Exipert, Inc.'
4026    url: 'https://www.checkmarknetwork.com/'
4027
4028- regex: 'cohere-ai'
4029  name: 'Cohere AI'
4030  category: 'Crawler'
4031  url: 'https://cohere.com/'
4032  producer:
4033    name: 'Cohere, Inc.'
4034    url: 'https://cohere.com/'
4035
4036- regex: 'PerplexityBot'
4037  name: 'PerplexityBot'
4038  category: 'Crawler'
4039  url: 'https://docs.perplexity.ai/docs/perplexitybot'
4040  producer:
4041    name: 'Perplexity AI, Inc.'
4042    url: 'https://www.perplexity.ai/'
4043
4044- regex: 'TTD-Content'
4045  name: 'The Trade Desk Content'
4046  category: 'Crawler'
4047  url: 'https://www.thetradedesk.com/us/ttd-content'
4048  producer:
4049    name: 'The Trade Desk, Inc.'
4050    url: 'https://www.thetradedesk.com/'
4051
4052- regex: 'montastic-monitor'
4053  name: 'Montastic Monitor'
4054  category: 'Site Monitor'
4055  url: 'https://www.montastic.com/'
4056  producer:
4057    name: 'Metadot, Corp.'
4058    url: 'https://www.metadot.com/'
4059
4060- regex: 'Ruby, Twurly v'
4061  name: 'Twurly'
4062  category: 'Crawler'
4063  url: 'https://twurly.org/'
4064
4065- regex: 'Mixnode(?:Cache)?'
4066  name: 'Mixnode'
4067  category: 'Crawler'
4068  url: 'https://www.mixnode.com/'
4069  producer:
4070    name: 'Mixnode Technologies, Inc.'
4071    url: 'https://www.mixnode.com/'
4072
4073- regex: 'CSSCheck'
4074  name: 'CSSCheck'
4075  category: 'Validator'
4076
4077- regex: 'MicrosoftPreview'
4078  name: 'Microsoft Preview'
4079  category: 'Service Agent'
4080  url: 'https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0'
4081  producer:
4082    name: 'Microsoft Corporation'
4083    url: 'https://www.microsoft.com/'
4084
4085- regex: 's~virustotalcloud'
4086  name: 'VirusTotal Cloud'
4087  category: 'Crawler'
4088  url: 'https://www.virustotal.com/'
4089  producer:
4090    name: 'Chronicle Security Ireland Limited'
4091    url: 'https://chronicle.security/'
4092
4093- regex: 'TinEye'
4094  name: 'TinEye'
4095  category: 'Crawler'
4096  url: 'https://tineye.com/'
4097  producer:
4098    name: 'Idée, Inc.'
4099    url: 'https://tineye.com/'
4100
4101- regex: 'e~arsnova-filter-system'
4102  name: 'ARSNova Filter System'
4103  category: 'Crawler'
4104  url: 'https://particify.de/en/'
4105  producer:
4106    name: 'Particify Gerhardt & Weingarten OHG'
4107    url: 'https://particify.de/en/'
4108
4109- regex: 'botify'
4110  name: 'Botify'
4111  category: 'Crawler'
4112  url: 'https://www.botify.com/'
4113  producer:
4114    name: 'BOTIFY SAS'
4115    url: 'https://www.botify.com/'
4116
4117- regex: 'adscanner'
4118  name: 'Adscanner'
4119  category: 'Crawler'
4120  url: 'https://www.alleyesonscreens.com/'
4121  producer:
4122    name: 'AdScanner d.o.o'
4123    url: 'https://www.alleyesonscreens.com/'
4124
4125- regex: 'online-webceo-bot'
4126  name: 'WebCEO'
4127  category: 'Crawler'
4128  url: 'https://www.webceo.com/'
4129  producer:
4130    name: 'WebCEO, LLC'
4131    url: 'https://www.webceo.com/'
4132
4133- regex: 'NetTrack'
4134  name: 'NetTrack'
4135  category: 'Crawler'
4136  url: 'https://web.archive.org/web/20160607151934/https://nettrack.info/'
4137
4138- regex: 'htmlyse'
4139  name: 'htmlyse'
4140  category: 'Crawler'
4141  url: 'https://www.htmlyse.com/'
4142  producer:
4143    name: 'Vistex LTD'
4144    url: 'https://www.htmlyse.com/'
4145
4146- regex: 'TrendsmapResolver'
4147  name: 'Trendsmap'
4148  category: 'Crawler'
4149  url: 'https://www.trendsmap.com/'
4150  producer:
4151    name: 'Trendsmap Pty Ltd'
4152    url: 'https://www.trendsmap.com/'
4153
4154- regex: 'Shareaholic(?:bot)?'
4155  name: 'Steve Bot'
4156  category: 'Crawler'
4157  url: 'https://www.shareaholic.com/steve'
4158  producer:
4159    name: 'Shareaholic, Inc.'
4160    url: 'https://www.shareaholic.com/'
4161
4162- regex: 'keycdn-tools:'
4163  name: 'KeyCDN Tools'
4164  category: 'Service Agent'
4165  url: 'https://tools.keycdn.com/geo'
4166
4167- regex: 'keycdn-tools/'
4168  name: 'KeyCDN Tools'
4169  category: 'Service Agent'
4170  url: 'https://tools.keycdn.com/'
4171  producer:
4172    name: 'proinity LLC'
4173    url: 'https://www.keycdn.com/'
4174
4175- regex: 'Arquivo-web-crawler'
4176  name: 'Arquivo.pt'
4177  category: 'Crawler'
4178  url: 'https://sobre.arquivo.pt/en/help/crawling-and-archiving-web-content/'
4179  producer:
4180    name: 'FCT|FCCN'
4181    url: 'https://www.fct.pt/'
4182
4183- regex: 'WhatsMyIP\.org'
4184  name: 'WhatsMyIP.org'
4185  category: 'Service Agent'
4186  url: 'https://www.whatsmyip.org/ua/'
4187
4188- regex: 'SenutoBot'
4189  name: 'Senuto'
4190  category: 'Crawler'
4191  url: 'https://www.senuto.com/'
4192  producer:
4193    name: 'Senuto Sp. z o.o.'
4194    url: 'https://www.senuto.com/'
4195
4196- regex: 'GozleBot'
4197  name: 'Gozle'
4198  category: 'Crawler'
4199  url: 'https://gozle.com.tm/en/blog/post/1'
4200  producer:
4201    name: 'Doly Horjun HJ'
4202    url: 'https://gozle.com.tm/'
4203
4204- regex: 'Quantcastbot'
4205  name: 'Quantcast'
4206  category: 'Crawler'
4207  url: 'https://www.quantcast.com/bot/'
4208  producer:
4209    name: 'Quantcast Corp.'
4210    url: 'https://www.quantcast.com/'
4211
4212- regex: 'FontRadar'
4213  name: 'FontRadar'
4214  category: 'Crawler'
4215  url: 'https://www.fontradar.com/'
4216  producer:
4217    name: 'EMDASH SAS'
4218    url: 'https://www.fontradar.com/'
4219
4220- regex: 'ViberUrlDownloader'
4221  name: 'Viber Url Downloader'
4222  category: 'Service Agent'
4223  url: 'https://www.viber.com/'
4224  producer:
4225    name: 'Viber Media S.à r.l.'
4226    url: 'https://www.viber.com/'
4227
4228- regex: '^Zeno$'
4229  name: 'Zeno'
4230  category: 'Crawler'
4231  url: 'https://github.com/internetarchive/Zeno'
4232  producer:
4233    name: 'The Internet Archive'
4234    url: 'https://archive.org/'
4235
4236- regex: 'Barracuda Sentinel'
4237  name: 'Barracuda Sentinel'
4238  category: 'Service Agent'
4239  url: 'https://sentinel.barracudanetworks.com/'
4240  producer:
4241    name: 'Barracuda Networks, Inc.'
4242    url: 'https://www.barracudanetworks.com/'
4243
4244- regex: 'RuxitSynthetic'
4245  name: 'RuxitSynthetic'
4246  category: 'Site Monitor'
4247  url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
4248  producer:
4249    name: 'Dynatrace LLC'
4250    url: 'https://www.dynatrace.com/'
4251
4252- regex: 'DynatraceSynthetic'
4253  name: 'DynatraceSynthetic'
4254  category: 'Site Monitor'
4255  url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
4256  producer:
4257    name: 'Dynatrace LLC'
4258    url: 'https://www.dynatrace.com/'
4259
4260- regex: 'sitebulb'
4261  name: 'Sitebulb'
4262  category: 'Crawler'
4263  url: 'https://sitebulb.com/'
4264  producer:
4265    name: 'Sitebulb Limited'
4266    url: 'https://sitebulb.com/'
4267
4268- regex: 'Monsidobot'
4269  name: 'Monsidobot'
4270  category: 'Crawler'
4271  url: 'https://monsido.com/bot-html'
4272  producer:
4273    name: 'Monsido LLC'
4274    url: 'https://monsido.com/'
4275
4276- regex: 'AccompanyBot'
4277  name: 'AccompanyBot'
4278  category: 'Crawler'
4279  url: 'https://www.accompany.com/'
4280  producer:
4281    name: 'Accompani, Inc'
4282    url: 'https://www.accompany.com/'
4283
4284- regex: 'Ghost Inspector'
4285  name: 'Ghost Inspector'
4286  category: 'Site Monitor'
4287  url: 'https://docs.ghostinspector.com/faq/#how-do-i-detect-ghost-inspector-test-runner-traffic-on-my-site'
4288  producer:
4289    name: 'Ghost Inspector, Inc.'
4290    url: 'https://www.ghostinspector.com/'
4291
4292- regex: 'Google-Apps-Script'
4293  name: 'Google Apps Script'
4294  category: 'Service Agent'
4295  url: 'https://www.google.com/script/start/'
4296
4297- regex: 'SiteOne-Crawler'
4298  name: 'SiteOne Crawler'
4299  category: 'Crawler'
4300  url: 'https://crawler.siteone.io/bot/'
4301  producer:
4302    name: 'SiteOne s.r.o.'
4303    url: 'https://www.siteone.io/'
4304
4305- regex: 'Detectify'
4306  name: 'Detectify'
4307  category: 'Security Checker'
4308  url: 'https://support.detectify.com/support/solutions/articles/48001049001-how-to-allow-detectify-to-access-your-site'
4309  producer:
4310    name: 'Detectify AB'
4311    url: 'https://detectify.com/'
4312
4313- regex: 'DomCopBot'
4314  name: 'DomCop Bot'
4315  category: 'Crawler'
4316  url: 'https://www.domcop.com/bot'
4317  producer:
4318    name: 'Axeman Technology Solutions LLP'
4319    url: 'https://axemantech.com/'
4320
4321- regex: 'Paqlebot'
4322  name: 'Paqlebot'
4323  category: 'Crawler'
4324  url: 'https://www.paqle.dk/about/paqlebot'
4325  producer:
4326    name: 'Paqle A/S'
4327    url: 'https://www.paqle.dk/'
4328
4329- regex: 'Wibybot'
4330  name: 'Wibybot'
4331  category: 'Crawler'
4332  url: 'https://www.wiby.me/'
4333
4334- regex: 'Synapse'
4335  name: 'Synapse'
4336  category: 'Crawler'
4337  url: 'https://github.com/matrix-org/synapse'
4338
4339- regex: 'OSZKbot'
4340  name: 'OSZKbot'
4341  category: 'Crawler'
4342  url: 'http://mekosztaly.oszk.hu/mia/'
4343  producer:
4344    name: 'National Szechenyi Library'
4345    url: 'https://webarchivum.oszk.hu/'
4346
4347- regex: 'ZoomBot'
4348  name: 'ZoomBot'
4349  category: 'Crawler'
4350  url: 'https://suite.seozoom.it/bot.html'
4351  producer:
4352    name: 'SEO Cube S.r.l.'
4353    url: 'https://www.seocube.it/'
4354
4355- regex: 'RavenCrawler'
4356  name: 'RavenCrawler'
4357  category: 'Crawler'
4358  url: 'https://raventools.com/site-auditor/'
4359  producer:
4360    name: 'TapClicks, Inc.'
4361    url: 'https://www.tapclicks.com/'
4362
4363- regex: 'KadoBot'
4364  name: 'KadoBot'
4365  category: 'Crawler'
4366  url: 'https://www.kadolijst.nl/bot'
4367  producer:
4368    name: 'Kadolijst'
4369    url: 'https://www.kadolijst.nl/'
4370
4371- regex: 'Dubbotbot'
4372  name: 'Dubbotbot'
4373  category: 'Crawler'
4374  url: 'https://help.dubbot.com/en/articles/6746594-example-custom-user-agent'
4375  producer:
4376    name: 'DubBot'
4377    url: 'https://dubbot.com/'
4378
4379- regex: 'Swiftbot'
4380  name: 'Swiftbot'
4381  category: 'Crawler'
4382  url: 'https://swiftype.com/swiftbot'
4383  producer:
4384    name: 'Elasticsearch, B.V.'
4385    url: 'https://www.elastic.co/'
4386
4387- regex: 'EyeMonIT'
4388  name: 'EyeMonit'
4389  category: 'Site Monitor'
4390  url: 'https://eyemonit.com/'
4391  producer:
4392    name: 'EyeMonit'
4393    url: 'https://eyemonit.com/'
4394
4395- regex: 'ThousandEyes'
4396  name: 'ThousandEyes'
4397  category: 'Site Monitor'
4398  url: 'https://www.thousandeyes.com/'
4399  producer:
4400    name: 'Cisco Systems, Inc.'
4401    url: 'https://www.cisco.com/'
4402
4403- regex: 'OmtrBot'
4404  name: 'OmtrBot'
4405  category: 'Site Monitor'
4406
4407- regex: 'WebMon'
4408  name: 'WebMon'
4409  category: 'Site Monitor'
4410
4411- regex: 'AdsTxtCrawlerTP'
4412  name: 'AdsTxtCrawlerTP'
4413  category: 'Crawler'
4414
4415- regex: 'fragFINN'
4416  name: 'fragFINN'
4417  category: 'Crawler'
4418  url: 'https://www.fragfinn.de/'
4419  producer:
4420    name: 'fragFINN e.V.'
4421    url: 'https://www.fragfinn.de/'
4422
4423- regex: 'Clickagy'
4424  name: 'Clickagy'
4425  category: 'Crawler'
4426  url: 'https://www.clickagy.com/'
4427  producer:
4428    name: 'Clickagy, LLC'
4429    url: 'https://www.clickagy.com/'
4430
4431- regex: 'kiwitcms-gitops'
4432  name: 'Kiwi TCMS GitOps'
4433  category: 'Service Agent'
4434  url: 'https://kiwitcms.org'
4435  producer:
4436    name: 'Open Technologies Bulgaria, Ltd.'
4437    url: 'https://kiwitcms.org'
4438
4439- regex: 'webtru_crawler'
4440  name: 'webtru'
4441  category: 'Crawler'
4442  url: 'https://webtru.io/'
4443  producer:
4444    name: 'DataSign Inc.'
4445    url: 'https://datasign.jp/'
4446
4447- regex: 'URLSuMaBot'
4448  name: 'URLSuMaBot'
4449  category: 'Crawler'
4450  url: 'https://www.urlsuma.de/'
4451
4452- regex: '360JK yunjiankong'
4453  name: '360JK'
4454  category: 'Site Monitor'
4455  url: 'http://jk.cloud.360.cn/'
4456  producer:
4457    name: '360 Security Technology Inc.'
4458    url: 'https://www.360.cn/'
4459
4460- regex: 'UCSBNetworkMeasurement'
4461  name: 'UCSB Network Measurement'
4462  category: 'Crawler'
4463  url: 'https://www.it.ucsb.edu/'
4464  producer:
4465    name: 'University of California, Santa Barbara'
4466    url: 'https://www.it.ucsb.edu/'
4467
4468- regex: 'Plesk screenshot bot'
4469  name: 'Plesk Screenshot Service'
4470  category: 'Service Agent'
4471  url: 'https://support.plesk.com/hc/en-us/articles/13302778306199-What-is-Plesk-Screenshot-Service'
4472  producer:
4473    name: 'Plesk International GmbH'
4474    url: 'https://www.plesk.com/'
4475
4476- regex: 'Who\.is'
4477  name: 'Who.is Bot'
4478  category: 'Crawler'
4479  url: 'https://who.is/'
4480
4481- regex: 'Probely'
4482  name: 'Probely'
4483  category: 'Security Checker'
4484  url: 'https://probely.com/sos/'
4485  producer:
4486    name: 'Probely - Soluções de Cibersegurança, S.A.'
4487    url: 'https://probely.com/'
4488
4489- regex: 'Uptimia'
4490  name: 'Uptimia'
4491  category: 'Site Monitor'
4492  url: 'https://www.uptimia.com/'
4493  producer:
4494    name: 'JJ Online GmbH'
4495    url: 'https://www.uptimia.com/'
4496
4497- regex: '2GDPR'
4498  name: '2GDPR'
4499  category: 'Service Agent'
4500  url: 'https://2gdpr.com/tos'
4501  producer:
4502    name: '2GDPR'
4503    url: 'https://2gdpr.com/'
4504
4505- regex: 'abuse\.xmco\.fr'
4506  name: 'Serenety'
4507  category: 'Security Checker'
4508  url: 'https://abuse.xmco.fr/'
4509  producer:
4510    name: 'XMCO, SASU'
4511    url: 'https://www.xmco.fr/'
4512
4513- regex: 'CheckHost'
4514  name: 'CheckHost'
4515  category: 'Site Monitor'
4516  url: 'https://check-host.net/'
4517  producer:
4518    name: 'CheckHost'
4519    url: 'https://check-host.net/'
4520
4521- regex: 'LAC_IAHarvester'
4522  name: 'LAC IA Harvester'
4523  category: 'Crawler'
4524  url: 'https://library-archives.canada.ca/eng/services/government-canada/web-social-media-preservation-program/Pages/web-archive.aspx'
4525  producer:
4526    name: 'Library and Archives Canada'
4527    url: 'https://library-archives.canada.ca/'
4528
4529- regex: 'InsytfulBot'
4530  name: 'InsytfulBot'
4531  category: 'Crawler'
4532  url: 'https://www.insytful.com/'
4533  producer:
4534    name: 'Zengenti Limited'
4535    url: 'https://www.zengenti.com/'
4536
4537- regex: 'statista\.com'
4538  name: 'Statista'
4539  category: 'Crawler'
4540  url: 'https://www.statista.com/'
4541  producer:
4542    name: 'Statista, Inc.'
4543    url: 'https://www.statista.com/'
4544
4545- regex: 'SubstackContentFetch'
4546  name: 'Substack Content Fetch'
4547  category: 'Crawler'
4548  url: 'https://substack.com/'
4549  producer:
4550    name: 'Substack, Inc.'
4551    url: 'https://substack.com/'
4552
4553- regex: '^ds9'
4554  name: 'Deep SEARCH 9'
4555  category: 'Crawler'
4556  url: 'https://www.copyright.com/blog/ccc-expands-corporate-solutions-offering-with-new-technology/'
4557  producer:
4558    name: 'Copyright Clearance Center, Inc.'
4559    url: 'https://www.copyright.com/'
4560
4561- regex: 'LiveJournal\.com'
4562  name: 'LiveJournal'
4563  url: 'https://www.livejournal.com/'
4564  category: 'Feed Fetcher'
4565  producer:
4566    name: 'ООО "СИМ"'
4567    url: 'https://www.livejournal.com/'
4568
4569- regex: 'bitdiscovery'
4570  name: 'Tenable.asm'
4571  category: 'Security Checker'
4572  url: 'https://bitdiscovery.com/'
4573  producer:
4574    name: 'Tenable, Inc.'
4575    url: 'https://www.tenable.com/'
4576
4577- regex: 'Castopod'
4578  name: 'Castopod'
4579  category: 'Crawler'
4580  url: 'https://www.castopod.org/'
4581
4582- regex: 'Elastic/Synthetics'
4583  name: 'Elastic Synthetics'
4584  category: 'Site Monitor'
4585  url: 'https://github.com/elastic/synthetics'
4586  producer:
4587    name: 'Elasticsearch B.V.'
4588    url: 'https://www.elastic.co/'
4589
4590- regex: 'WDG_Validator'
4591  name: 'WDG HTML Validator'
4592  category: 'Validator'
4593  url: 'http://www.htmlhelp.com/tools/validator/'
4594
4595- regex: 'scan@aegis.network'
4596  name: 'Aegis'
4597  category: 'Crawler'
4598  url: 'https://web.archive.org/web/20180910002802/http://www.aegis.network/'
4599
4600- regex: 'CrawlyProjectCrawler'
4601  name: 'Crawly Project'
4602  category: 'Crawler'
4603  url: 'https://web.archive.org/web/20240326141952/https://crawlyproject.digitaldragon.dev/'
4604
4605- regex: 'BDFetch'
4606  name: 'BDFetch'
4607  category: 'Crawler'
4608  url: 'https://web.archive.org/web/20130821043949/http://www.branddimensions.com/'
4609
4610- regex: 'PunkMap'
4611  name: 'Punk Map'
4612  category: 'Security Checker'
4613  url: 'https://github.com/openeasm/punkmap'
4614
4615- regex: 'GenomeCrawlerd'
4616  name: 'Deepfield Genome'
4617  category: 'Crawler'
4618  url: 'https://www.nokia.com/networks/ip-networks/deepfield/genome/'
4619  producer:
4620    name: 'Nokia Corporation'
4621    url: 'https://www.nokia.com/'
4622
4623- regex: 'Gaisbot'
4624  name: 'Gaisbot'
4625  category: 'Crawler'
4626  url: 'https://web.archive.org/web/20090604121511/https://gais.cs.ccu.edu.tw/robot.php'
4627
4628- regex: 'FAST-WebCrawler'
4629  name: 'AlltheWeb'
4630  category: 'Crawler'
4631  url: 'https://web.archive.org/web/20041020050801/http://www.alltheweb.com/help/webmaster/crawler'
4632
4633- regex: 'ducks\.party'
4634  name: 'ducks.party'
4635  category: 'Security Checker'
4636  url: 'https://ducks.party/'
4637
4638- regex: 'DepSpid'
4639  name: 'DepSpid'
4640  category: 'Crawler'
4641  url: 'https://web.archive.org/web/20080321224033/http://about.depspid.net/'
4642
4643- regex: 'Website-info\.net'
4644  name: 'Website-info'
4645  category: 'Crawler'
4646  url: 'https://website-info.net/robot'
4647  producer:
4648    name: 'Meins und Vogel GmbH'
4649    url: 'https://muv.com/'
4650
4651- regex: 'RedekenBot'
4652  name: 'RedekenBot'
4653  category: 'Crawler'
4654  url: 'https://www.redeken.com/en/help/bot.html'
4655  producer:
4656    name: 'Redeken'
4657    url: 'https://www.redeken.com/'
4658
4659- regex: 'semaltbot'
4660  name: 'semaltbot'
4661  category: 'Crawler'
4662  url: 'https://semalt.net/'
4663  producer:
4664    name: 'Semalt LP'
4665    url: 'https://semalt.net/'
4666
4667- regex: 'MakeMerryBot'
4668  name: 'MakeMerryBot'
4669  category: 'Crawler'
4670  url: 'https://makemerry.app/bots'
4671
4672- regex: 'Timpibot'
4673  name: 'Timpibot'
4674  category: 'Crawler'
4675  url: 'https://timpi.io/'
4676  producer:
4677    name: 'Timpi Inc.'
4678    url: 'https://timpi.io/'
4679
4680- regex: 'Validbot'
4681  name: 'ValidBot'
4682  category: 'Crawler'
4683  url: 'https://www.validbot.com/'
4684  producer:
4685    name: 'Jake Olefsky LLC'
4686    url: 'https://www.validbot.com/'
4687
4688- regex: 'NPBot'
4689  name: 'NameProtectBot'
4690  category: 'Crawler'
4691  url: 'https://www.cscglobal.com/cscglobal/home/'
4692  producer:
4693    name: 'NameProtect, Inc.'
4694    url: 'https://www.cscglobal.com/'
4695
4696- regex: 'domaincodex\.com'
4697  name: 'Domain Codex'
4698  category: 'Crawler'
4699  url: 'https://www.domaincodex.com/'
4700  producer:
4701    name: 'Erie Data Systems, LLC'
4702    url: 'https://www.eriedatasys.com/'
4703
4704- regex: 'Swisscows Favicons'
4705  name: 'Swisscows Favicons'
4706  category: 'Crawler'
4707  url: 'https://swisscows.com/'
4708  producer:
4709    name: 'Swisscows AG'
4710    url: 'https://swisscows.com/'
4711
4712- regex: 'leak\.info'
4713  name: 'leak.info'
4714  category: 'Crawler'
4715  url: 'http://www.leak.info/'
4716
4717- regex: 'workona'
4718  name: 'Workona'
4719  category: 'Crawler'
4720  url: 'https://workona.com/'
4721  producer:
4722    name: 'Workona, Inc.'
4723    url: 'https://workona.com/'
4724
4725- regex: 'Bloglines'
4726  name: 'Bloglines'
4727  category: 'Crawler'
4728  url: 'https://web.archive.org/web/20140309033202/http://www.bloglines.com/'
4729  producer:
4730    name: 'Reply!, Inc.'
4731    url: 'https://www.reply.com/'
4732
4733- regex: 'heritrix'
4734  name: 'Heritrix'
4735  category: 'Crawler'
4736  url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
4737  producer:
4738    name: 'The Internet Archive'
4739    url: 'https://archive.org'
4740
4741- regex: 'search\.marginalia\.nu'
4742  name: 'Marginalia'
4743  category: 'Crawler'
4744  url: 'https://www.marginalia.nu/marginalia-search/for-webmasters/'
4745  producer:
4746    name: 'Marginalia'
4747    url: 'https://www.marginalia.nu/'
4748
4749- regex: 'vu-server-health-scanner'
4750  name: 'VU Server Health Scanner'
4751  category: 'Security Checker'
4752  url: 'https://130.37.198.75/index.html'
4753  producer:
4754    name: 'VU Amsterdam'
4755    url: 'https://vu.nl/en'
4756
4757- regex: 'Functionize'
4758  name: 'Functionize'
4759  category: 'Crawler'
4760  url: 'https://www.functionize.com/'
4761  producer:
4762    name: 'Functionize, Inc.'
4763    url: 'https://www.functionize.com/'
4764
4765- regex: 'Prerender'
4766  name: 'Prerender'
4767  category: 'Crawler'
4768  url: 'https://docs.prerender.io/docs/33-overview-of-prerender-crawlers'
4769  producer:
4770    name: 'saas.group Inc.'
4771    url: 'https://saas.group/'
4772
4773- regex: 'bl\.uk_ldfc_bot'
4774  name: 'The British Library Legal Deposit Bot'
4775  category: 'Crawler'
4776  url: 'https://www.bl.uk/'
4777  producer:
4778    name: 'The British Library'
4779    url: 'https://www.bl.uk/'
4780
4781- regex: 'Miniature\.io'
4782  name: 'Miniature.io'
4783  category: 'Service Agent'
4784  url: 'https://miniature.io/'
4785  producer:
4786    name: 'LCX Ventures Ltd'
4787    url: 'https://www.lcxventures.com/'
4788
4789- regex: 'Convertify'
4790  name: 'Convertify'
4791  category: 'Service Agent'
4792  url: 'https://www.convertify.app/'
4793  producer:
4794    name: 'Convertify'
4795    url: 'https://www.convertify.app/'
4796
4797- regex: 'ZoteroTranslationServer'
4798  name: 'Zotero Translation Server'
4799  category: 'Service Agent'
4800  url: 'https://github.com/wikimedia/mediawiki-services-zotero'
4801  producer:
4802    name: 'The Wikimedia Foundation, Inc.'
4803    url: 'https://www.wikimedia.org/'
4804
4805- regex: 'MuckRack'
4806  name: 'MuckRack'
4807  category: 'Crawler'
4808  url: 'https://muckrack.com/'
4809  producer:
4810    name: 'Muck Rack, LLC'
4811    url: 'https://muckrack.com/'
4812
4813- regex: 'Golfe'
4814  name: 'Golfe'
4815  category: 'Crawler'
4816  url: 'http://www.goo-olfe.ae/bot.html'
4817
4818- regex: 'SpiderLing'
4819  name: 'SpiderLing'
4820  category: 'Crawler'
4821  url: 'https://nlp.fi.muni.cz/projects/biwec/'
4822  producer:
4823    name: 'Natural Language Processing Centre'
4824    url: 'https://nlp.fi.muni.cz/'
4825
4826- regex: 'Bravebot'
4827  name: 'Bravebot'
4828  category: 'Search bot'
4829  url: 'https://search.brave.com/help/brave-search-crawler'
4830  producer:
4831    name: 'Brave Software, Inc.'
4832    url: 'https://brave.com/'
4833
4834- regex: '1001FirmsBot'
4835  name: '1001FirmsBot'
4836  category: 'Crawler'
4837  url: 'https://www.1001firms.com/1001firmsbot.php'
4838
4839- regex: 'SteamChatURLLookup'
4840  name: 'Steam Chat URL Lookup'
4841  category: 'Service Agent'
4842  url: 'https://help.steampowered.com/en/faqs/view/595C-42F4-3B66-E02F'
4843  producer:
4844    name: 'Valve Corporation'
4845    url: 'https://www.valvesoftware.com/'
4846
4847- regex: 'ohdear\.app'
4848  name: 'Oh Dear'
4849  category: 'Site Monitor'
4850  url: 'https://ohdear.app/docs/faq/what-is-the-oh-dear-crawler-doing-in-my-logs'
4851  producer:
4852    name: 'Immutable, SNC'
4853    url: 'https://ohdear.app/'
4854
4855- regex: 'Inspici'
4856  name: 'Inspici'
4857  category: 'Crawler'
4858  url: 'https://www.inspici.com/'
4859  producer:
4860    name: 'Inspici, LLC'
4861    url: 'https://www.inspici.com/'
4862
4863- regex: 'peer39_crawler'
4864  name: 'Peer39'
4865  category: 'Crawler'
4866  url: 'https://www.peer39.com/crawler-notice'
4867  producer:
4868    name: 'Peer39 Tech, LLC'
4869    url: 'https://www.peer39.com/'
4870
4871- regex: 'Pandalytics'
4872  name: 'Pandalytics'
4873  category: 'Crawler'
4874  url: 'https://www.domainsbot.com/business-intelligence/'
4875  producer:
4876    name: 'DomainsBot, Inc.'
4877    url: 'https://www.domainsbot.com/'
4878
4879- regex: 'CloudServerMarketSpider'
4880  name: 'CloudServerMarketSpider'
4881  category: 'Crawler'
4882  url: 'https://web.archive.org/web/20151228225429/https://cloudservermarket.com/spider.html'
4883
4884- regex: 'Pigafetta'
4885  name: 'Pigafetta'
4886  category: 'Crawler'
4887  url: 'https://visual-seo.com/Pigafetta-Bot'
4888  producer:
4889    name: 'aStonish Studio Srl'
4890    url: 'http://www.astonishstudio.com/'
4891
4892- regex: 'Cotoyogi'
4893  name: 'Cotoyogi'
4894  category: 'Crawler'
4895  url: 'https://ds.rois.ac.jp/center8/crawler/'
4896  producer:
4897    name: 'Joint Support-Center for Data Science Research (ROIS-DS)'
4898    url: 'https://ds.rois.ac.jp/'
4899
4900- regex: 'SuggestBot'
4901  name: 'SuggestBot'
4902  category: 'Crawler'
4903  url: 'https://github.com/nettrom/suggestbot'
4904
4905- regex: 'cms-experiment'
4906  name: 'CMS Experiment'
4907  category: 'Security Checker'
4908  url: 'https://securitee.org/cms-experiment-fall2024/'
4909
4910- regex: 'SiteCheckerBotCrawler'
4911  name: 'SiteCheckerBotCrawler'
4912  category: 'Crawler'
4913  url: 'https://sitechecker.pro/'
4914  producer:
4915    name: 'Cyber Circus Limited'
4916    url: 'https://sitechecker.pro/'
4917
4918- regex: 'SBIder'
4919  name: 'SBIder'
4920  category: 'Crawler'
4921  url: 'https://www.sitesell.com/sbider.html'
4922  producer:
4923    name: 'SiteSell Inc.'
4924    url: 'https://www.sitesell.com/'
4925
4926- regex: 'LightspeedSystemsCrawler'
4927  name: 'LightspeedSystemsCrawler'
4928  category: 'Crawler'
4929  url: 'https://www.lightspeedsystems.com/'
4930  producer:
4931    name: 'Lightspeed Systems, Inc.'
4932    url: 'https://www.lightspeedsystems.com/'
4933
4934- regex: 'Research JLU'
4935  name: 'Research JLU'
4936  category: 'Crawler'
4937  url: 'https://www.uni-giessen.de/en/research'
4938  producer:
4939    name: 'Justus Liebig University Giessen'
4940    url: 'https://www.uni-giessen.de/en'
4941
4942- regex: '(?:hgf|OS)AlphaXCrawl'
4943  name: 'AlphaXCrawl'
4944  category: 'Crawler'
4945  url: 'https://www.fim.uni-passau.de/en/data-science/research/open-search'
4946  producer:
4947    name: 'University of Passau'
4948    url: 'https://www.uni-passau.de/en/'
4949
4950- regex: 'WPMU DEV'
4951  name: 'WPMU DEV'
4952  category: 'Crawler'
4953  url: 'https://wpmudev.com/docs/wpmu-dev-plugins/broken-link-checker/#broken-link-checker-user-agent'
4954  producer:
4955    name: 'Incsub, LLC.'
4956    url: 'https://incsub.com/'
4957
4958- regex: 'SnoopSecInspect'
4959  name: 'SnoopSecInspect'
4960  category: 'Security Checker'
4961  url: 'https://web.archive.org/web/20241206193253/https://snoopsec.us.to/'
4962
4963- regex: 'ModatScanner'
4964  name: 'ModatScanner'
4965  category: 'Security Checker'
4966  url: 'https://www.modat.io/scanning'
4967  producer:
4968    name: 'Modat B.V.'
4969    url: 'https://www.modat.io/'
4970
4971- regex: 'researchcyber\.net'
4972  name: 'researchcyber.net'
4973  category: 'Security Checker'
4974  url: 'https://web.archive.org/web/20241219082407/https://researchcyber.net/'
4975
4976- regex: 'CrystalSemanticsBot'
4977  name: 'CrystalSemanticsBot'
4978  category: 'Crawler'
4979  url: 'https://web.archive.org/web/20121230203310/http://www.crystalsemantics.com/user-agent/'
4980  producer:
4981    name: 'Crystal Semantics Ltd.'
4982    url: 'https://web.archive.org/web/20121029062239/http://www.crystalsemantics.com/'
4983
4984- regex: 'najdu\.s\.holubem\.eu'
4985  name: 'najdu.s.holubem.eu'
4986  category: 'Crawler'
4987  url: 'https://najdu.s.holubem.eu/'
4988
4989- regex: 'VORTEX/'
4990  name: 'VORTEX'
4991  category: 'Crawler'
4992  url: 'https://marty.anstey.ca/robots/vortex'
4993
4994- regex: 'xtate/(\d+\.[.\d]+)'
4995  name: 'xtate'
4996  category: 'Crawler'
4997  url: 'https://github.com/babycoff/xtate'
4998
4999- regex: 'FediList Agent/'
5000  name: 'FediList'
5001  category: 'Social Media Agent'
5002  url: 'https://fedilist.com/'
5003
5004- regex: 'Grafana/(\d+\.[.\d]+)'
5005  name: 'Grafana'
5006  category: 'Site Monitor'
5007  url: 'https://github.com/grafana/grafana'
5008  producer:
5009    name: 'Grafana Labs'
5010    url: 'https://grafana.com/'
5011
5012- regex: 'github-camo'
5013  name: 'Github Camo'
5014  category: 'Crawler'
5015  url: 'https://github.com/atmos/camo'
5016  producer:
5017    name: 'Github'
5018    url: 'https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/about-anonymized-urls'
5019
5020- regex: 'Bluesky'
5021  name: 'Bluesky'
5022  category: 'Social Media Agent'
5023  url: 'https://bsky.app'
5024  producer:
5025    name: 'Bluesky Social PBC'
5026    url: 'https://bsky.app'
5027
5028- regex: 'OpenGraph\.io'
5029  name: 'OpenGraph.io'
5030  category: 'Crawler'
5031  url: 'https://www.opengraph.io'
5032  producer:
5033    name: 'OpenGraph.io'
5034    url: 'https://www.opengraph.io'
5035
5036# Generic bots
5037- regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherx?web|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|xx032_bo9vs83_2a|sslshed|geckotrail|Wordup|Keydrop|\(compatible\)|John Recon|SPARK COMMIT|masjesu|Komaru_The_Cat|Jesus Christ of Nazareth is LORD|Kowai|Hakai|LoliSec|LMAO|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|Node.js|Report Runner|url|Zeus|ZmEu)$|OnlyScans|TheInternetSearchx'
5038  name: 'Generic Bot'
5039
5040# Generic detections
5041- regex: '[a-z0-9_-]*(?:(?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|(?<!node-|uclient-|Mikrotik/\d\.[x\d] |electron-)fetch(?:er)?|indexer|inspector|monitor|(?<!Microsoft |banshee-)project(?!or)|(?<!Google Wap |Blue |SpeedMode; )proxy|(?<!P)research|resolver|robots|(?<!Cam)scanner|scraper|script|searcher|(?<!-)security|spider(?! 8)|study|transcoder|uptime|user[ _]?agent|validator)(?:[^a-z]|$)'
5042  name: 'Generic Bot'
5043