1############### 2# Device Detector - The Universal Device Detection library for parsing User Agents 3# 4# @link https://matomo.org 5# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later 6############### 7 8- regex: 'WireReaderBot' 9 name: 'WireReaderBot' 10 category: 'Feed Fetcher' 11 url: 'https://wirereader.app/' 12 13- regex: 'monitoring360bot' 14 name: '360 Monitoring' 15 category: 'Site Monitor' 16 url: 'https://www.360monitoring.io' 17 producer: 18 name: 'Plesk International GmbH' 19 url: 'https://www.plesk.com' 20 21- regex: 'Cloudflare-Healthchecks' 22 name: 'Cloudflare Health Checks' 23 category: 'Service Agent' 24 url: 'https://developers.cloudflare.com/health-checks/' 25 producer: 26 name: 'CloudFlare' 27 url: 'https://www.cloudflare.com/' 28 29- regex: '360Spider' 30 name: '360Spider' 31 category: 'Search bot' 32 url: 'https://www.so.com/help/help_3_2.html' 33 producer: 34 name: 'Online Media Group, Inc.' 35 url: '' 36 37- regex: 'Aboundex' 38 name: 'Aboundexbot' 39 category: 'Search bot' 40 url: 'http://www.aboundex.com/crawler/' 41 producer: 42 name: 'Aboundex.com' 43 url: 'http://www.aboundex.com' 44 45- regex: 'AcoonBot' 46 name: 'Acoon' 47 category: 'Search bot' 48 url: 'http://www.acoon.de/robot.asp' 49 producer: 50 name: 'Acoon GmbH' 51 url: 'http://www.acoon.de' 52 53- regex: 'AddThis\.com' 54 name: 'AddThis.com' 55 category: 'Social Media Agent' 56 url: '' 57 producer: 58 name: 'Clearspring Technologies, Inc.' 59 url: 'http://www.clearspring.com' 60 61- regex: 'AhrefsBot' 62 name: 'aHrefs Bot' 63 category: 'Crawler' 64 url: 'https://ahrefs.com/robot' 65 producer: 66 name: 'Ahrefs Pte Ltd' 67 url: 'https://ahrefs.com/robot' 68 69- regex: 'AhrefsSiteAudit' 70 name: 'AhrefsSiteAudit' 71 category: 'Site Monitor' 72 url: 'https://ahrefs.com/robot/site-audit' 73 producer: 74 name: 'Ahrefs Pte Ltd' 75 url: 'https://ahrefs.com/' 76 77- regex: 'ia_archiver|alexabot|verifybot' 78 name: 'Alexa Crawler' 79 category: 'Search bot' 80 url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers' 81 producer: 82 name: 'Alexa Internet' 83 url: 'https://www.alexa.com' 84 85- regex: 'alexa site audit' 86 name: 'Alexa Site Audit' 87 category: 'Site Monitor' 88 url: 'https://support.alexa.com/hc/en-us/articles/200450194' 89 producer: 90 name: 'Alexa Internet' 91 url: 'https://www.alexa.com' 92 93- regex: 'Amazonbot' 94 name: 'Amazon Bot' 95 category: 'Crawler' 96 url: 'https://developer.amazon.com/support/amazonbot' 97 producer: 98 name: 'Amazon.com, Inc.' 99 url: 'https://www.amazon.com/' 100 101- regex: 'AmazonAdBot' 102 name: 'Amazon AdBot' 103 category: 'Crawler' 104 url: 'https://adbot.amazon.com/' 105 producer: 106 name: 'Amazon.com, Inc.' 107 url: 'https://www.amazon.com/' 108 109- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service' 110 name: 'Amazon Route53 Health Check' 111 category: 'Service Agent' 112 producer: 113 name: 'Amazon Web Services' 114 url: 'https://aws.amazon.com/' 115 116- regex: 'AmorankSpider' 117 name: 'Amorank Spider' 118 category: 'Crawler' 119 url: 'http://amorank.com/webcrawler.html' 120 producer: 121 name: 'Amorank' 122 url: 'http://www.amorank.com' 123 124- regex: 'ApacheBench' 125 name: 'ApacheBench' 126 category: 'Benchmark' 127 url: 'https://httpd.apache.org/docs/2.4/programs/ab.html' 128 producer: 129 name: 'The Apache Software Foundation' 130 url: 'https://www.apache.org/foundation/' 131 132- regex: 'Applebot' 133 name: 'Applebot' 134 category: 'Crawler' 135 url: 'https://support.apple.com/en-us/119829' 136 producer: 137 name: 'Apple Inc' 138 url: 'https://www.apple.com/' 139 140- regex: 'iTMS' 141 name: 'iTMS' 142 category: 'Crawler' 143 url: 'https://support.apple.com/en-us/119829' 144 producer: 145 name: 'Apple Inc' 146 url: 'https://www.apple.com/' 147 148- regex: 'AppSignalBot' 149 name: 'AppSignalBot' 150 category: 'Site Monitor' 151 url: 'https://docs.appsignal.com/uptime-monitoring/' 152 producer: 153 name: 'AppSignal' 154 url: 'https://appsignal.com/' 155 156- regex: 'Arachni' 157 name: 'Arachni' 158 category: 'Security Checker' 159 url: 'https://www.arachni-scanner.com/' 160 producer: 161 name: 'Sarosys LLC' 162 url: 'https://www.sarosys.com/' 163 164- regex: 'AspiegelBot' 165 name: 'AspiegelBot' 166 category: 'Crawler' 167 url: 'https://aspiegel.com/' 168 producer: 169 name: 'Huawei' 170 url: 'https://www.huawei.com/' 171 172- regex: 'Castro 2, Episode Duration Lookup' 173 name: 'Castro 2' 174 category: 'Service Agent' 175 url: 'http://supertop.co/castro/' 176 producer: 177 name: 'Supertop' 178 url: 'http://supertop.co' 179 180- regex: 'Curious George' 181 name: 'Analytics SEO Crawler' 182 category: 'Crawler' 183 url: 'http://www.analyticsseo.com/crawler' 184 producer: 185 name: 'Analytics SEO' 186 url: 'http://www.analyticsseo.com' 187 188- regex: 'archive\.org_bot|special_archiver' 189 name: 'archive.org bot' 190 category: 'Crawler' 191 url: 'https://archive.org/details/archive.org_bot' 192 producer: 193 name: 'The Internet Archive' 194 url: 'https://archive.org' 195 196- regex: 'Ask Jeeves/Teoma' 197 name: 'Ask Jeeves' 198 category: 'Search bot' 199 url: '' 200 producer: 201 name: 'Ask Jeeves Inc.' 202 url: 'http://www.ask.com' 203 204- regex: 'Backlink-Check\.de' 205 name: 'Backlink-Check.de' 206 category: 'Crawler' 207 url: 'http://www.backlink-check.de/bot.html' 208 producer: 209 name: 'Mediagreen Medienservice' 210 url: 'http://www.backlink-check.de' 211 212- regex: 'BacklinkCrawler' 213 name: 'BacklinkCrawler' 214 category: 'Crawler' 215 url: 'http://www.backlinktest.com/crawler.html' 216 producer: 217 name: '2.0Promotion GbR' 218 url: 'http://www.backlinktest.com' 219 220- regex: 'Baidu.*spider|baidu Transcoder' 221 name: 'Baidu Spider' 222 category: 'Search bot' 223 url: 'http://www.baidu.com/search/spider.htm' 224 producer: 225 name: 'Baidu' 226 url: 'http://www.baidu.com' 227 228- regex: 'BazQux' 229 name: 'BazQux Reader' 230 url: 'https://bazqux.com/fetcher' 231 category: 'Feed Fetcher' 232 producer: 233 name: '' 234 url: '' 235 236- regex: 'Better Uptime Bot' 237 name: 'Better Uptime Bot' 238 category: 'Site Monitor' 239 url: 'https://betteruptime.com/faq' 240 producer: 241 name: 'Better Uptime' 242 url: 'https://betteruptime.com/' 243 244- regex: 'MSNBot|msrbot|bingbot|bingadsbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot' 245 name: 'BingBot' 246 category: 'Search bot' 247 url: 'http://search.msn.com/msnbot.htmn' 248 producer: 249 name: 'Microsoft Corporation' 250 url: 'http://www.microsoft.com' 251 252- regex: 'Blackbox Exporter' 253 name: 'Blackbox Exporter' 254 category: 'Site Monitor' 255 url: 'https://github.com/prometheus/blackbox_exporter' 256 producer: 257 name: 'Prometheus' 258 url: 'https://prometheus.io/' 259 260- regex: 'Blekkobot' 261 name: 'Blekkobot' 262 category: 'Search bot' 263 url: 'http://blekko.com/about/blekkobot' 264 producer: 265 name: 'Blekko' 266 url: 'http://blekko.com' 267 268- regex: 'BLEXBot' 269 name: 'BLEXBot Crawler' 270 category: 'Crawler' 271 url: 'http://webmeup-crawler.com' 272 producer: 273 name: 'WebMeUp' 274 url: 'http://webmeup.com' 275 276- regex: 'Bloglovin' 277 name: 'Bloglovin' 278 url: 'http://www.bloglovin.com' 279 category: 'Feed Fetcher' 280 producer: 281 name: '' 282 url: '' 283 284- regex: 'Blogtrottr' 285 name: 'Blogtrottr' 286 url: '' 287 category: 'Feed Fetcher' 288 producer: 289 name: 'Blogtrottr Ltd' 290 url: 'https://blogtrottr.com/' 291 292- regex: 'BoardReader Blog Indexer' 293 name: 'BoardReader Blog Indexer' 294 category: 'Crawler' 295 producer: 296 name: 'BoardReader' 297 url: 'https://boardreader.com/' 298 299- regex: 'BountiiBot' 300 name: 'Bountii Bot' 301 category: 'Search bot' 302 url: 'http://bountii.com/contact.php' 303 producer: 304 name: 'Bountii Inc.' 305 url: 'http://bountii.com' 306 307- regex: 'Browsershots' 308 name: 'Browsershots' 309 category: 'Service Agent' 310 url: 'http://browsershots.org/faq' 311 producer: 312 name: 'Browsershots.org' 313 url: 'http://browsershots.org' 314 315- regex: 'BUbiNG' 316 name: 'BUbiNG' 317 category: 'Crawler' 318 url: 'http://law.di.unimi.it/BUbiNG.html' 319 producer: 320 name: 'The Laboratory for Web Algorithmics (LAW)' 321 url: 'http://law.di.unimi.it/software.php#buging' 322 323- regex: '(?<!HTC)[ _]Butterfly/' 324 name: 'Butterfly Robot' 325 category: 'Search bot' 326 url: 'http://labs.topsy.com/butterfly' 327 producer: 328 name: 'Topsy Labs' 329 url: 'http://labs.topsy.com' 330 331- regex: 'CareerBot' 332 name: 'CareerBot' 333 category: 'Crawler' 334 url: 'http://www.career-x.de/bot.html' 335 producer: 336 name: 'career-x GmbH' 337 url: 'http://www.career-x.de' 338 339- regex: 'CCBot' 340 name: 'ccBot crawler' 341 category: 'Crawler' 342 url: 'http://commoncrawl.org/faq/' 343 producer: 344 name: 'reddit inc.' 345 url: 'http://www.reddit.com' 346 347- regex: 'Cliqzbot' 348 name: 'Cliqzbot' 349 category: 'Crawler' 350 url: 'http://cliqz.com/company/cliqzbot' 351 producer: 352 name: '10betterpages GmbH' 353 url: 'http://cliqz.com' 354 355- regex: 'Cloudflare-AMP' 356 name: 'CloudFlare AMP Fetcher' 357 category: 'Crawler' 358 url: 'https://amp.cloudflare.com/doc/fetcher.html' 359 producer: 360 name: 'CloudFlare' 361 url: 'http://www.cloudflare.com' 362 363- regex: 'Cloudflare-?Diagnostics' 364 name: 'Cloudflare Diagnostics' 365 category: 'Site Monitor' 366 url: 'https://www.cloudflare.com/' 367 producer: 368 name: 'Cloudflare' 369 url: 'https://www.cloudflare.com/' 370 371- regex: 'CloudFlare-AlwaysOnline' 372 name: 'CloudFlare Always Online' 373 category: 'Site Monitor' 374 url: 'https://www.cloudflare.com/always-online' 375 producer: 376 name: 'CloudFlare' 377 url: 'https://www.cloudflare.com/' 378 379- regex: 'Cloudflare-SSLDetector' 380 name: 'Cloudflare SSL Detector' 381 category: 'Site Monitor' 382 url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/' 383 producer: 384 name: 'CloudFlare' 385 url: 'https://www.cloudflare.com/' 386 387- regex: 'Cloudflare Custom Hostname Verification' 388 name: 'Cloudflare Custom Hostname Verification' 389 category: 'Service Agent' 390 url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/' 391 producer: 392 name: 'CloudFlare' 393 url: 'https://www.cloudflare.com/' 394 395- regex: 'Cloudflare-Traffic-Manager' 396 name: 'Cloudflare Traffic Manager' 397 category: 'Site Monitor' 398 url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/' 399 producer: 400 name: 'CloudFlare' 401 url: 'https://www.cloudflare.com/' 402 403- regex: 'Cloudflare-Smart-Transit' 404 name: 'Cloudflare Smart Transit' 405 category: 'Site Monitor' 406 url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/' 407 producer: 408 name: 'CloudFlare' 409 url: 'https://www.cloudflare.com/' 410 411- regex: 'CloudflareObservatory' 412 name: 'Cloudflare Observatory' 413 category: 'Site Monitor' 414 url: 'https://developers.cloudflare.com/speed/speed-test/run-speed-test' 415 producer: 416 name: 'CloudFlare' 417 url: 'https://www.cloudflare.com/' 418 419- regex: 'https://developers\.cloudflare\.com/security-center/' 420 name: 'Cloudflare Security Insights' 421 category: 'Site Monitor' 422 url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/' 423 producer: 424 name: 'CloudFlare' 425 url: 'https://www.cloudflare.com/' 426 427- regex: 'coccoc\.com' 428 name: 'Cốc Cốc Bot' 429 url: 'https://help.coccoc.com/en/search-engine/coccoc-robots' 430 category: 'Search bot' 431 producer: 432 name: 'Cốc Cốc' 433 url: 'https://coccoc.com/' 434 435- regex: 'collectd' 436 name: 'Collectd' 437 url: 'https://collectd.org/' 438 category: 'Site Monitor' 439 producer: 440 name: 'Collectd' 441 url: 'https://collectd.org/' 442 443- regex: 'CommaFeed' 444 name: 'CommaFeed' 445 url: 'http://www.commafeed.com' 446 category: 'Feed Fetcher' 447 producer: 448 name: '' 449 url: '' 450 451- regex: 'CSS Certificate Spider' 452 name: 'CSS Certificate Spider' 453 category: 'Crawler' 454 url: 'http://www.css-security.com/certificatespider/' 455 producer: 456 name: 'Certified Security Solutions' 457 url: 'https://www.css-security.com/company/about-us/' 458 459- regex: 'Datadog Agent|Datadog/?Synthetics' 460 name: 'Datadog Agent' 461 url: 'https://github.com/DataDog/dd-agent' 462 category: 'Site Monitor' 463 producer: 464 name: 'Datadog' 465 url: 'https://www.datadoghq.com/' 466 467- regex: 'Datanyze' 468 name: 'Datanyze' 469 url: '' 470 category: 'Crawler' 471 producer: 472 name: 'Datanyze' 473 url: 'https://www.datanyze.com' 474 475- regex: 'Dataprovider' 476 name: 'Dataprovider' 477 category: 'Crawler' 478 url: '' 479 producer: 480 name: 'Dataprovider B.V.' 481 url: 'https://www.dataprovider.com/' 482 483- regex: 'Daum(?!(?:Apps|Device))' 484 name: 'Daum' 485 category: 'Search bot' 486 url: 'http://tab.search.daum.net/aboutWebSearch_en.html' 487 producer: 488 name: 'Daum Communications Corp.' 489 url: 'http://www.kakaocorp.com/main' 490 491- regex: 'Dazoobot' 492 name: 'Dazoobot' 493 category: 'Search bot' 494 url: '' 495 producer: 496 name: 'DAZOO.FR' 497 url: 'http://dazoo.fr' 498 499- regex: 'discobot' 500 name: 'Discobot' 501 category: 'Search bot' 502 url: 'http://discoveryengine.com/discobot.html' 503 producer: 504 name: 'Discovery Engine' 505 url: 'http://discoveryengine.com' 506 507- regex: 'Domain Re-Animator Bot|support@domainreanimator\.com' 508 name: 'Domain Re-Animator Bot' 509 category: 'Crawler' 510 url: '' 511 producer: 512 name: 'Domain Re-Animator, LLC' 513 url: 'http://domainreanimator.com' 514 515- regex: 'DotBot' 516 name: 'DotBot' 517 category: 'Crawler' 518 url: 'http://www.opensiteexplorer.org/dotbot' 519 producer: 520 name: 'SEOmoz, Inc.' 521 url: 'http://moz.com/' 522 523- regex: 'DuckDuck(?:Go-Favicons-)?Bot' 524 name: 'DuckDuckBot' 525 category: 'Search bot' 526 url: 'https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/' 527 producer: 528 name: 'DuckDuckGo' 529 url: 'https://duckduckgo.com/' 530 531- regex: 'DuckAssistBot' 532 name: 'DuckAssistBot' 533 category: 'Search bot' 534 url: 'https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot/' 535 producer: 536 name: 'DuckDuckGo' 537 url: 'https://duckduckgo.com/' 538 539- regex: 'EasouSpider' 540 name: 'Easou Spider' 541 category: 'Search bot' 542 url: 'http://www.easou.com/search/spider.html' 543 producer: 544 name: 'easou ICP' 545 url: 'http://www.easou.com' 546 547- regex: 'eCairn-Grabber' 548 name: 'eCairn-Grabber' 549 category: 'Crawler' 550 producer: 551 name: 'eCairn' 552 url: 'https://ecairn.com' 553 554- regex: 'EMail Exractor' 555 name: 'EMail Exractor' 556 category: 'Crawler' 557 url: '' 558 producer: 559 name: '' 560 url: '' 561 562- regex: 'evc-batch' 563 name: 'evc-batch' 564 category: 'Crawler' 565 url: '' 566 producer: 567 name: 'eVenture Capital Partners II, LLC' 568 url: 'http://www.eventures.vc/' 569 570- regex: 'Exabot|ExaleadCloudview' 571 name: 'ExaBot' 572 category: 'Crawler' 573 url: 'http://www.exabot.com/go/robot' 574 producer: 575 name: 'Dassault Systèmes' 576 url: 'http://www.3ds.com' 577 578- regex: 'ExactSeek Crawler' 579 name: 'ExactSeek Crawler' 580 category: 'Search bot' 581 url: 'http://www.exactseek.com' 582 producer: 583 name: 'Jayde Online, Inc.' 584 url: 'http://www.jaydeonlineinc.com' 585 586- regex: 'Ezooms' 587 name: 'Ezooms' 588 category: 'Crawler' 589 url: '' 590 producer: 591 name: 'SEOmoz, Inc.' 592 url: 'http://moz.com/' 593 594- regex: 'facebook(?:catalog|externalhit|externalua|platform|scraper)' 595 name: 'Facebook Crawler' 596 category: 'Social Media Agent' 597 url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers' 598 producer: 599 name: 'Meta Platforms, Inc.' 600 url: 'https://www.meta.com/' 601 602- regex: 'meta-externalagent' 603 name: 'Meta-ExternalAgent' 604 category: 'Crawler' 605 url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers' 606 producer: 607 name: 'Meta Platforms, Inc.' 608 url: 'https://www.meta.com/' 609 610- regex: 'meta-externalfetcher' 611 name: 'Meta-ExternalFetcher' 612 category: 'Social Media Agent' 613 url: 'https://developers.facebook.com/docs/sharing/webmasters/web-crawlers' 614 producer: 615 name: 'Meta Platforms, Inc.' 616 url: 'https://www.meta.com/' 617 618- regex: 'FacebookBot' 619 name: 'FacebookBot' 620 category: 'Crawler' 621 url: 'https://developers.facebook.com/docs/sharing/bot' 622 producer: 623 name: 'Meta Platforms, Inc.' 624 url: 'https://www.meta.com/' 625 626- regex: 'Feedbin' 627 name: 'Feedbin' 628 url: 'http://feedbin.com/' 629 category: 'Feed Fetcher' 630 producer: 631 name: '' 632 url: '' 633 634- regex: 'FeedBurner' 635 name: 'FeedBurner' 636 url: 'http://www.feedburner.com' 637 category: 'Feed Fetcher' 638 producer: 639 name: '' 640 url: '' 641 642- regex: 'Feed Wrangler' 643 name: 'Feed Wrangler' 644 url: 'https://feedwrangler.net/' 645 category: 'Feed Fetcher' 646 producer: 647 name: 'David Smith & Developing Perspective, LLC' 648 url: 'https://david-smith.org' 649 650- regex: 'Feedly' 651 name: 'Feedly' 652 url: 'http://www.feedly.com' 653 category: 'Feed Fetcher' 654 producer: 655 name: '' 656 url: '' 657 658- regex: 'Feedspot' 659 name: 'Feedspot' 660 url: 'http://www.feedspot.com' 661 category: 'Feed Fetcher' 662 producer: 663 name: '' 664 url: '' 665 666- regex: 'Fever/' 667 name: 'Fever' 668 url: 'http://feedafever.com/' 669 category: 'Feed Fetcher' 670 producer: 671 name: '' 672 url: '' 673 674- regex: 'FlipboardProxy|FlipboardRSS' 675 name: 'Flipboard' 676 url: 'http://flipboard.com/browserproxy' 677 category: 'Feed Fetcher' 678 producer: 679 name: 'Flipboard' 680 url: 'http://flipboard.com/' 681 682- regex: 'Findxbot' 683 name: 'Findxbot' 684 category: 'Crawler' 685 url: 'http://www.findxbot.com' 686 687- regex: 'FreshRSS' 688 name: 'FreshRSS' 689 category: 'Feed Fetcher' 690 url: 'https://freshrss.org/' 691 692- regex: 'Genieo' 693 name: 'Genieo Web filter' 694 category: '' 695 url: 'http://www.genieo.com/webfilter.html' 696 producer: 697 name: 'Genieo' 698 url: 'http://www.genieo.com' 699 700- regex: 'GigablastOpenSource' 701 name: 'Gigablast' 702 category: 'Search bot' 703 url: 'https://github.com/gigablast/open-source-search-engine' 704 producer: 705 name: 'Matt Wells' 706 url: 'http://www.gigablast.com/faq.html' 707 708- regex: 'Gluten Free Crawler' 709 name: 'Gluten Free Crawler' 710 category: 'Crawler' 711 url: 'http://glutenfreepleasure.com/' 712 producer: 713 name: '' 714 url: '' 715 716- regex: 'gobuster' 717 name: 'Gobuster' 718 url: 'https://github.com/OJ/gobuster' 719 720- regex: 'ichiro/mobile goo' 721 name: 'Goo' 722 category: 'Search bot' 723 url: 'http://search.goo.ne.jp/option/use/sub4/sub4-1' 724 producer: 725 name: 'NTT Resonant' 726 url: 'http://goo.ne.jp' 727 728- regex: 'Storebot-Google' 729 name: 'Google StoreBot' 730 category: 'Crawler' 731 732- regex: 'Google Favicon' 733 name: 'Google Favicon' 734 category: 'Crawler' 735 736- regex: 'Google Search Console' 737 name: 'Google Search Console' 738 category: 'Crawler' 739 url: 'https://search.google.com/search-console/about' 740 producer: 741 name: 'Google Inc.' 742 url: 'https://www.google.com/' 743 744- regex: 'Google Page Speed Insights' 745 name: 'Google PageSpeed Insights' 746 category: 'Site Monitor' 747 url: 'http://developers.google.com/speed/pagespeed/insights/' 748 producer: 749 name: 'Google Inc.' 750 url: 'https://www.google.com/' 751 752- regex: 'google_partner_monitoring' 753 name: 'Google Partner Monitoring' 754 category: 'Site Monitor' 755 url: '' 756 producer: 757 name: 'Google Inc.' 758 url: 'https://www.google.com/' 759 760- regex: 'Google-Cloud-Scheduler' 761 name: 'Google Cloud Scheduler' 762 category: 'Crawler' 763 url: 'https://cloud.google.com/scheduler' 764 producer: 765 name: 'Google Inc.' 766 url: 'https://www.google.com' 767 768- regex: 'Google-Structured-Data-Testing-Tool' 769 name: 'Google Structured Data Testing Tool' 770 category: 'Validator' 771 url: 'https://search.google.com/structured-data/testing-tool' 772 producer: 773 name: 'Google Inc.' 774 url: 'https://www.google.com/' 775 776- regex: 'GoogleStackdriverMonitoring' 777 name: 'Google Stackdriver Monitoring' 778 category: 'Site Monitor' 779 url: 'https://cloud.google.com/monitoring' 780 producer: 781 name: 'Google Inc.' 782 url: 'https://www.google.com' 783 784- regex: 'Google-Transparency-Report' 785 name: 'Google Transparency Report' 786 category: 'Site Monitor' 787 url: 'https://transparencyreport.google.com/' 788 producer: 789 name: 'Google Inc.' 790 url: 'https://www.google.com/' 791 792- regex: 'Google-CloudVertexBot' 793 name: 'Google-CloudVertexBot' 794 category: 'Crawler' 795 url: 'https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers#google-cloudvertexbot' 796 producer: 797 name: 'Google Inc.' 798 url: 'https://www.google.com/' 799 800- regex: 'via ggpht\.com GoogleImageProxy' 801 name: 'Gmail Image Proxy' 802 category: 'Crawler' 803 url: '' 804 producer: 805 name: 'Google Inc.' 806 url: 'https://www.google.com/' 807 808- regex: 'Google-Document-Conversion' 809 name: 'Google-Document-Conversion' 810 category: 'Service Agent' 811 url: 'https://support.google.com/drive/answer/176692?hl=en' 812 producer: 813 name: 'Google Inc.' 814 url: 'https://www.google.com/' 815 816- regex: 'GoogleDocs; apps-spreadsheets' 817 name: 'Google Sheets' 818 category: 'Service Agent' 819 url: 'https://workspace.google.com/products/sheets/' 820 producer: 821 name: 'Google Inc.' 822 url: 'https://www.google.com/' 823 824- regex: 'GoogleDocs; apps-presentations' 825 name: 'Google Slides' 826 category: 'Service Agent' 827 url: 'https://workspace.google.com/products/slides/' 828 producer: 829 name: 'Google Inc.' 830 url: 'https://www.google.com/' 831 832- regex: 'GoogleDocs;' 833 name: 'Google Docs' 834 category: 'Service Agent' 835 url: 'https://docs.google.com/' 836 producer: 837 name: 'Google Inc.' 838 url: 'https://www.google.com/' 839 840- regex: 'SeznamEmailProxy' 841 name: 'Seznam Email Proxy' 842 category: 'Crawler' 843 url: '' 844 producer: 845 name: 'Seznam.cz, a.s.' 846 url: 'http://www.seznam.cz/' 847 848- regex: 'Seznam-Zbozi-robot' 849 name: 'Seznam Zbozi.cz' 850 category: 'Crawler' 851 url: '' 852 producer: 853 name: 'Seznam.cz, a.s.' 854 url: 'https://www.zbozi.cz/' 855 856- regex: 'Heurekabot-Feed' 857 name: 'Heureka Feed' 858 category: 'Crawler' 859 url: 'https://sluzby.heureka.cz/napoveda/heurekabot/' 860 producer: 861 name: 'Heureka.cz, a.s.' 862 url: 'https://www.heureka.cz/' 863 864- regex: 'ShopAlike' 865 name: 'ShopAlike' 866 category: 'Crawler' 867 url: '' 868 producer: 869 name: 'Visual Meta' 870 url: 'https://www.shopalike.cz/' 871 872- regex: 'deepcrawl\.com' 873 name: 'Lumar' 874 category: 'Crawler' 875 url: 'https://deepcrawl.com/bot' 876 producer: 877 name: 'Lumar' 878 url: 'https://www.lumar.io/' 879 880- regex: 'Googlebot-News' 881 name: 'Googlebot News' 882 category: 'Search bot' 883 url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers' 884 producer: 885 name: 'Google Inc.' 886 url: 'https://www.google.com/' 887 888- regex: 'Adwords-(?:DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(?:adstxt|Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Shopping-Quality|Site-Verification|Sites-Thumbnails|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer|Sites)|Google.*/\+/web/snippet' 889 name: 'Googlebot' 890 category: 'Search bot' 891 url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers' 892 producer: 893 name: 'Google Inc.' 894 url: 'https://www.google.com/' 895 896- regex: '^Google$' 897 name: 'Googlebot' 898 category: 'Search bot' 899 url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers' 900 producer: 901 name: 'Google Inc.' 902 url: 'https://www.google.com/' 903 904- regex: 'Google-Safety' 905 name: 'Google-Safety' 906 category: 'Crawler' 907 url: 'https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers' 908 producer: 909 name: 'Google Inc.' 910 url: 'https://www.google.com/' 911 912- regex: 'DuplexWeb-Google' 913 name: 'DuplexWeb-Google' 914 category: 'Crawler' 915 url: 'https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers' 916 producer: 917 name: 'Google Inc.' 918 url: 'https://www.google.com/' 919 920- regex: 'Google-Area120-PrivacyPolicyFetcher' 921 name: 'Google Area 120 Privacy Policy Fetcher' 922 category: 'Crawler' 923 url: 'https://area120.google.com/' 924 producer: 925 name: 'Google Inc.' 926 url: 'https://www.google.com/' 927 928- regex: 'HubSpot ' 929 name: 'HubSpot' 930 category: 'Crawler' 931 producer: 932 name: 'HubSpot Inc.' 933 url: 'https://www.hubspot.com' 934 935- regex: 'vuhuv(?:Bot|RBT)' 936 name: 'vuhuvBot' 937 category: 'Search bot' 938 url: 'https://vuhuv.com/bot.html' 939 940- regex: 'HTTPMon' 941 name: 'HTTPMon' 942 category: 'Site Monitor' 943 url: 'http://www.httpmon.com' 944 producer: 945 name: 'towards GmbH' 946 url: 'http://www.towards.ch/' 947 948- regex: 'ICC-Crawler' 949 name: 'ICC-Crawler' 950 category: 'Crawler' 951 url: 'http://www.nict.go.jp/en/univ-com/plan/crawl.html' 952 producer: 953 name: '' 954 url: '' 955 956- regex: 'inoreader\.com' 957 name: 'inoreader' 958 category: 'Feed Reader' 959 url: 'https://www.inoreader.com' 960 961- regex: 'iisbot' 962 name: 'IIS Site Analysis' 963 category: 'Crawler' 964 url: 'http://www.iis.net/iisbot.html' 965 producer: 966 name: 'Microsoft Corporation' 967 url: 'http://www.microsoft.com' 968 969- regex: 'ips-agent' 970 name: 'IPS Agent' 971 category: 'Crawler' 972 producer: 973 name: 'VeriSign, Inc' 974 url: 'http://www.verisign.com/' 975 976- regex: 'IP-Guide\.com' 977 name: 'IP-Guide Crawler' 978 category: 'Crawler' 979 url: '' 980 producer: 981 name: '' 982 url: 'https://ip-guide.com' 983 984- regex: 'k6/' 985 name: 'K6' 986 url: 'https://k6.io/' 987 988- regex: 'kouio' 989 name: 'Kouio' 990 url: 'http://kouio.com/' 991 category: 'Feed Fetcher' 992 producer: 993 name: '' 994 url: '' 995 996- regex: 'larbin' 997 name: 'Larbin web crawler' 998 category: 'Crawler' 999 url: 'http://larbin.sourceforge.net' 1000 producer: 1001 name: '' 1002 url: '' 1003 1004- regex: '[A-z0-9]*-Lighthouse' 1005 name: 'Lighthouse' 1006 category: 'Site Monitor' 1007 url: 'https://developers.google.com/web/tools/lighthouse' 1008 producer: 1009 name: 'Lighthouse' 1010 url: 'https://developers.google.com/web/tools/lighthouse' 1011 1012- regex: 'last-modified\.com' 1013 name: 'LastMod Bot' 1014 category: 'Site Monitor' 1015 url: 'https://last-modified.com/en/about' 1016 producer: 1017 name: '' 1018 url: 'https://last-modified.com/en' 1019 1020- regex: 'linkdexbot|linkdex\.com' 1021 name: 'Linkdex Bot' 1022 category: 'Search bot' 1023 url: 'http://www.linkdex.com/bots' 1024 producer: 1025 name: 'Mojeek Ltd.' 1026 url: 'http://www.mojeek.com' 1027 1028- regex: 'LinkedInBot' 1029 name: 'LinkedIn Bot' 1030 category: 'Social Media Agent' 1031 url: 'http://www.linkedin.com' 1032 producer: 1033 name: 'LinkedIn' 1034 url: 'http://www.linkedin.com' 1035 1036- regex: 'ltx71' 1037 name: 'LTX71' 1038 category: 'Security Checker' 1039 url: 'https://ltx71.com/' 1040 producer: 1041 name: '' 1042 url: '' 1043 1044- regex: 'Mail\.RU' 1045 name: 'Mail.Ru Bot' 1046 category: 'Search bot' 1047 url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots' 1048 producer: 1049 name: 'Mail.Ru Group' 1050 url: 'http://corp.mail.ru' 1051 1052- regex: 'magpie-crawler' 1053 name: 'Magpie-Crawler' 1054 category: 'Social Media Agent' 1055 url: 'http://www.brandwatch.com/magpie-crawler/' 1056 producer: 1057 name: 'Brandwatch' 1058 url: 'http://www.brandwatch.com' 1059 1060- regex: 'MagpieRSS' 1061 name: 'MagpieRSS' 1062 url: 'http://magpierss.sourceforge.net/' 1063 category: 'Feed Parser' 1064 producer: 1065 name: '' 1066 url: '' 1067 1068- regex: 'masscan-ng' 1069 name: 'masscan-ng' 1070 url: 'https://github.com/bi-zone/masscan-ng' 1071 category: 'Crawler' 1072 producer: 1073 name: 'BIZON, OOO' 1074 url: 'https://bi.zone/' 1075 1076- regex: '.*masscan' 1077 name: 'masscan' 1078 url: 'https://github.com/robertdavidgraham/masscan' 1079 category: 'Crawler' 1080 producer: 1081 name: 'Robert Graham' 1082 url: 'https://github.com/robertdavidgraham' 1083 1084- regex: 'Mastodon/' 1085 name: 'Mastodon Bot' 1086 category: 'Social Media Agent' 1087 1088- regex: 'meanpathbot' 1089 name: 'Meanpath Bot' 1090 category: 'Search bot' 1091 url: 'http://www.meanpath.com/meanpathbot.html' 1092 producer: 1093 name: 'Meanpath' 1094 url: 'http://www.meanpath.com' 1095 1096- regex: 'MetaJobBot' 1097 name: 'MetaJobBot' 1098 category: 'Crawler' 1099 url: 'http://www.metajob.at/the/crawler' 1100 producer: 1101 name: 'MetaJob' 1102 url: 'http://www.metajob.at' 1103 1104- regex: 'MetaInspector' 1105 name: 'MetaInspector' 1106 category: 'Crawler' 1107 url: 'https://github.com/jaimeiniesta/metainspector' 1108 1109- regex: 'MixrankBot' 1110 name: 'Mixrank Bot' 1111 category: 'Crawler' 1112 url: 'http://mixrank.com' 1113 producer: 1114 name: 'Online Media Group, Inc.' 1115 url: '' 1116 1117- regex: 'MJ12bot' 1118 name: 'MJ12 Bot' 1119 category: 'Search bot' 1120 url: 'http://majestic12.co.uk/bot.php' 1121 producer: 1122 name: 'Majestic-12' 1123 url: 'http://majestic12.co.uk' 1124 1125- regex: 'Mnogosearch' 1126 name: 'Mnogosearch' 1127 category: 'Search bot' 1128 url: 'http://www.mnogosearch.org/' 1129 producer: 1130 name: 'Lavtech.Com Corp.' 1131 url: '' 1132- regex: 'MojeekBot' 1133 name: 'MojeekBot' 1134 category: 'Search bot' 1135 url: 'http://www.mojeek.com/bot.html' 1136 producer: 1137 name: 'Mojeek Ltd.' 1138 url: 'http://www.mojeek.com' 1139 1140- regex: 'munin' 1141 name: 'Munin' 1142 category: 'Site Monitor' 1143 url: 'http://munin-monitoring.org/' 1144 producer: 1145 name: 'Munin' 1146 url: 'http://munin-monitoring.org/' 1147 1148- regex: 'NalezenCzBot' 1149 name: 'NalezenCzBot' 1150 category: 'Crawler' 1151 url: 'http://www.nalezen.cz/about-crawler' 1152 producer: 1153 name: 'Jaroslav Kuboš' 1154 url: '' 1155 1156- regex: 'check_http/v' 1157 name: 'Nagios check_http' 1158 category: 'Site Monitor' 1159 url: 'https://nagios.org' 1160 producer: 1161 name: 'Nagios Plugins Development Team' 1162 url: 'https://nagios.org' 1163 1164- regex: 'nbertaupete95\(at\)gmail\.com' 1165 name: 'nbertaupete95' 1166 category: 'Crawler' 1167 1168- regex: 'Netcraft(?: Web Server Survey| SSL Server Survey|SurveyAgent)' 1169 name: 'Netcraft Survey Bot' 1170 category: 'Search bot' 1171 url: '' 1172 producer: 1173 name: 'Netcraft' 1174 url: 'http://www.netcraft.com' 1175 1176- regex: 'netEstate NE Crawler' 1177 name: 'netEstate' 1178 category: 'Crawler' 1179 url: 'http://www.website-datenbank.de/Impressum' 1180 producer: 1181 name: 'netEstate GmbH' 1182 url: 'https://www.netestate.de/en/' 1183 1184- regex: 'Netvibes' 1185 name: 'Netvibes' 1186 url: 'http://www.netvibes.com/' 1187 category: 'Feed Fetcher' 1188 producer: 1189 name: '' 1190 url: '' 1191 1192- regex: 'NewsBlur .*(?:Fetcher|Finder)' 1193 name: 'NewsBlur' 1194 url: 'http://www.newsblur.com' 1195 category: 'Feed Fetcher' 1196 producer: 1197 name: '' 1198 url: '' 1199 1200- regex: 'NewsGatorOnline' 1201 name: 'NewsGator' 1202 url: 'http://www.newsgator.com' 1203 category: 'Feed Fetcher' 1204 producer: 1205 name: '' 1206 url: '' 1207 1208- regex: 'nlcrawler' 1209 name: 'NLCrawler' 1210 category: 'Crawler' 1211 url: '' 1212 producer: 1213 name: 'Northern Light' 1214 url: 'http://northernlight.com' 1215 1216- regex: 'Nmap Scripting Engine' 1217 name: 'Nmap' 1218 category: 'Security Checker' 1219 url: 'https://nmap.org/book/nse.html' 1220 producer: 1221 name: 'Nmap' 1222 url: 'https://nmap.org/' 1223 1224- regex: 'Nuzzel' 1225 name: 'Nuzzel' 1226 category: 'Crawler' 1227 producer: 1228 name: 'Nuzzel' 1229 url: 'https://www.nuzzel.com/' 1230 1231- regex: 'NodePing' 1232 name: 'NodePing' 1233 category: 'Site Monitor' 1234 url: 'https://nodeping.com' 1235 producer: 1236 name: 'NodePing' 1237 url: 'https://nodeping.com' 1238 1239- regex: 'Octopus [\d.]+' 1240 name: 'Octopus' 1241 1242- regex: 'OnlineOrNot\.com_bot' 1243 name: 'OnlineOrNot Bot' 1244 category: 'Site Monitor' 1245 url: 'https://onlineornot.com/website-monitoring' 1246 producer: 1247 name: 'OnlineOrNot' 1248 url: 'https://onlineornot.com' 1249 1250- regex: 'omgili' 1251 name: 'Omgili bot' 1252 category: 'Search bot' 1253 url: 'http://www.omgili.com/Crawler.html' 1254 producer: 1255 name: 'Omgili' 1256 url: 'http://www.omgili.com' 1257 1258- regex: 'OpenindexSpider' 1259 name: 'Openindex Spider' 1260 category: 'Search bot' 1261 url: 'http://www.openindex.io/en/webmasters/spider.html' 1262 producer: 1263 name: 'Openindex B.V.' 1264 url: 'http://www.openindex.io' 1265 1266- regex: 'spbot' 1267 name: 'OpenLinkProfiler' 1268 category: 'Crawler' 1269 url: 'http://openlinkprofiler.org/bot' 1270 producer: 1271 name: 'Axandra GmbH' 1272 url: 'http://www.axandra.com' 1273 1274- regex: 'OpenWebSpider' 1275 name: 'OpenWebSpider' 1276 category: 'Crawler' 1277 url: 'http://www.openwebspider.org' 1278 producer: 1279 name: 'OpenWebSpider Lab' 1280 url: 'http://lab.openwebspider.org' 1281 1282- regex: 'OrangeBot|VoilaBot' 1283 name: 'Orange Bot' 1284 category: 'Search bot' 1285 url: 'http://lemoteur.orange.fr' 1286 producer: 1287 name: 'Orange' 1288 url: 'http://www.orange.fr' 1289 1290- regex: 'PaperLiBot' 1291 name: 'PaperLiBot' 1292 category: 'Search bot' 1293 url: 'http://support.paper.li/entries/20023257-what-is-paper-li' 1294 producer: 1295 name: 'Smallrivers SA' 1296 url: 'http://www.paper.li' 1297 1298- regex: 'phantomas/' 1299 name: 'Phantomas' 1300 category: 'Site Monitor' 1301 url: 'https://github.com/macbre/phantomas' 1302 1303- regex: 'phpservermon' 1304 name: 'PHP Server Monitor' 1305 category: 'Site Monitor' 1306 url: 'https://github.com/phpservermon/phpservermon' 1307 producer: 1308 name: 'PHP Server Monitor' 1309 url: 'http://www.phpservermonitor.org/' 1310 1311- regex: 'Pocket(?:ImageCache|Parser)' 1312 name: 'Pocket' 1313 category: 'Read-it-later Service' 1314 url: 'https://getpocket.com/pocketparser_ua' 1315 producer: 1316 name: 'Read It Later, Inc.' 1317 url: 'https://getpocket.com/' 1318 1319- regex: 'PritTorrent' 1320 name: 'PritTorrent' 1321 category: 'Crawler' 1322 url: 'https://github.com/astro/prittorrent' 1323 producer: 1324 name: 'Bitlove' 1325 url: 'http://bitlove.org/' 1326 1327- regex: 'PRTG Network Monitor' 1328 name: 'PRTG Network Monitor' 1329 category: 'Network Monitor' 1330 url: 'https://www.paessler.com/prtg' 1331 producer: 1332 name: 'Paessler AG' 1333 url: 'https://www.paessler.com' 1334 1335- regex: 'psbot' 1336 name: 'Picsearch bot' 1337 category: 'Search bot' 1338 url: 'http://www.picsearch.com/bot.html' 1339 producer: 1340 name: 'Picsearch' 1341 url: 'http://www.picsearch.com' 1342 1343- regex: 'Pingdom(?:\.com|TMS)' 1344 name: 'Pingdom Bot' 1345 category: 'Site Monitor' 1346 url: '' 1347 producer: 1348 name: 'Pingdom AB' 1349 url: 'https://www.pingdom.com' 1350 1351- regex: 'Quora Link Preview' 1352 name: 'Quora Link Preview' 1353 category: 'Crawler' 1354 url: '' 1355 producer: 1356 name: 'Quora' 1357 url: 'http://www.quora.com' 1358 1359- regex: 'Quora-Bot' 1360 name: 'Quora Bot' 1361 category: 'Crawler' 1362 url: '' 1363 producer: 1364 name: 'Quora' 1365 url: 'https://www.quora.com/' 1366 1367- regex: 'RamblerMail' 1368 name: 'RamblerMail Image Proxy' 1369 category: 'Crawler' 1370 url: '' 1371 producer: 1372 name: 'Rambler&Co' 1373 url: 'https://rambler-co.ru/' 1374 1375- regex: 'QuerySeekerSpider' 1376 name: 'QuerySeekerSpider' 1377 category: 'Crawler' 1378 url: 'http://queryseeker.com/bot.html' 1379 producer: 1380 name: 'QueryEye Inc.' 1381 url: 'http://queryeye.com' 1382 1383- regex: 'Qwantify|Qwantbot' 1384 name: 'Qwantbot' 1385 category: 'Crawler' 1386 url: 'https://help.qwant.com/bot/' 1387 producer: 1388 name: 'Qwant Corporation' 1389 url: 'https://www.qwant.com/' 1390 1391- regex: 'Rainmeter' 1392 name: 'Rainmeter' 1393 category: 'Crawler' 1394 url: 'https://www.rainmeter.net' 1395 1396- regex: 'redditbot' 1397 name: 'Reddit Bot' 1398 category: 'Social Media Agent' 1399 url: 'http://www.reddit.com/feedback' 1400 producer: 1401 name: 'reddit inc.' 1402 url: 'http://www.reddit.com' 1403 1404- regex: 'Riddler' 1405 name: 'Riddler' 1406 category: 'Security search bot' 1407 url: 'https://riddler.io/about' 1408 producer: 1409 name: 'F-Secure' 1410 url: 'https://www.f-secure.com' 1411 1412- regex: 'rogerbot' 1413 name: 'Rogerbot' 1414 category: 'Crawler' 1415 url: 'http://moz.com/help/pro/what-is-rogerbot-' 1416 producer: 1417 name: 'SEOmoz, Inc.' 1418 url: 'http://moz.com/' 1419 1420- regex: 'ROI Hunter' 1421 name: 'ROI Hunter' 1422 category: 'Crawler' 1423 url: '' 1424 producer: 1425 name: 'Roihunter a.s.' 1426 url: 'http://roihunter.com/' 1427 1428- regex: 'SafeDNSBot' 1429 name: 'SafeDNSBot' 1430 category: 'Crawler' 1431 url: 'https://www.safedns.com/searchbot' 1432 producer: 1433 name: 'SafeDNS, Inc.' 1434 url: 'https://www.safedns.com/' 1435 1436- regex: 'Scrapy' 1437 name: 'Scrapy' 1438 category: 'Crawler' 1439 url: 'http://scrapy.org' 1440 1441- regex: 'Screaming Frog SEO Spider' 1442 name: 'Screaming Frog SEO Spider' 1443 category: 'Crawler' 1444 url: 'http://www.screamingfrog.co.uk/seo-spider' 1445 producer: 1446 name: 'Screaming Frog Ltd' 1447 url: 'http://www.screamingfrog.co.uk' 1448 1449- regex: 'ScreenerBot' 1450 name: 'ScreenerBot' 1451 category: 'Crawler' 1452 url: 'http://www.screenerbot.com' 1453 producer: 1454 name: '' 1455 url: '' 1456 1457- regex: 'SemrushBot' 1458 name: 'SemrushBot' 1459 category: 'Crawler' 1460 url: 'https://www.semrush.com/bot/' 1461 producer: 1462 name: 'Semrush Inc.' 1463 url: 'https://www.semrush.com/' 1464 1465- regex: 'BacklinksExtendedBot' 1466 name: 'BacklinksExtendedBot' 1467 category: 'Crawler' 1468 url: 'https://www.semrush.com/bot/' 1469 producer: 1470 name: 'Semrush Inc.' 1471 url: 'https://www.semrush.com/' 1472 1473- regex: 'SerpReputationManagementAgent' 1474 name: 'Semrush Reputation Management' 1475 category: 'Service Agent' 1476 url: 'https://www.semrush.com/bot/' 1477 producer: 1478 name: 'Semrush Inc.' 1479 url: 'https://www.semrush.com/' 1480 1481- regex: 'SplitSignalBot' 1482 name: 'SplitSignalBot' 1483 category: 'Crawler' 1484 url: 'https://www.semrush.com/bot/' 1485 producer: 1486 name: 'Semrush Inc.' 1487 url: 'https://www.semrush.com/' 1488 1489- regex: 'SiteAuditBot' 1490 name: 'SiteAuditBot' 1491 category: 'Crawler' 1492 url: 'https://www.semrush.com/bot/' 1493 producer: 1494 name: 'Semrush Inc.' 1495 url: 'https://www.semrush.com/' 1496 1497- regex: 'SensikaBot' 1498 name: 'Sensika Bot' 1499 category: '' 1500 url: '' 1501 producer: 1502 name: 'Sensika' 1503 url: 'http://sensika.com' 1504 1505- regex: 'SEOENG(?:World)?Bot' 1506 name: 'SEOENGBot' 1507 category: 'Crawler' 1508 url: 'http://www.seoengine.com/seoengbot.htm' 1509 producer: 1510 name: 'SEO Engine' 1511 url: 'http://www.seoengine.com' 1512 1513- regex: 'seoscanners\.net' 1514 name: 'Seoscanners.net' 1515 category: 'Crawler' 1516 url: '' 1517 1518- regex: 'SkypeUriPreview' 1519 name: 'Skype URI Preview' 1520 category: 'Service Agent' 1521 url: '' 1522 producer: 1523 name: 'Skype Communications S.à.r.l.' 1524 url: 'https://www.skype.com' 1525 1526- regex: 'SeznamBot|SklikBot|Seznam screenshot-generator' 1527 name: 'Seznam Bot' 1528 category: 'Search bot' 1529 url: 'http://www.mapy.cz/cz/seznambot.html' 1530 producer: 1531 name: 'Seznam.cz, a.s.' 1532 url: 'http://www.seznam.cz/' 1533 1534- regex: 'shopify-partner-homepage-scraper' 1535 name: 'Shopify Partner' 1536 category: 'Crawler' 1537 url: 'https://www.shopify.com/partners' 1538 producer: 1539 name: 'Shopify' 1540 url: 'https://www.shopify.com/' 1541 1542- regex: 'ShopWiki' 1543 name: 'ShopWiki' 1544 category: 'Search tools' 1545 url: 'http://www.shopwiki.com/wiki/Help:Bot' 1546 producer: 1547 name: 'ShopWiki Corp.' 1548 url: 'http://www.shopwiki.com' 1549 1550- regex: 'SilverReader' 1551 name: 'SilverReader' 1552 url: 'http://silverreader.com' 1553 category: 'Feed Fetcher' 1554 producer: 1555 name: '' 1556 url: '' 1557 1558- regex: 'SimplePie' 1559 name: 'SimplePie' 1560 url: 'http://www.simplepie.org' 1561 category: 'Feed Parser' 1562 producer: 1563 name: '' 1564 url: '' 1565 1566- regex: 'SISTRIX Crawler' 1567 name: 'SISTRIX Crawler' 1568 category: 'Crawler' 1569 url: 'http://crawler.sistrix.net' 1570 producer: 1571 name: 'SISTRIX GmbH' 1572 url: 'http://www.sistrix.de' 1573 1574- regex: 'compatible; (?:SISTRIX )?Optimizer' 1575 name: 'SISTRIX Optimizer' 1576 category: 'Crawler' 1577 url: 'https://optimizer.sistrix.com' 1578 producer: 1579 name: 'SISTRIX GmbH' 1580 url: 'http://www.sistrix.de' 1581 1582- regex: 'SiteSucker' 1583 name: 'SiteSucker' 1584 category: 'Crawler' 1585 url: 'http://ricks-apps.com/osx/sitesucker/' 1586 1587- regex: 'sixy\.ch' 1588 name: 'Sixy.ch' 1589 category: 'Site Monitor' 1590 url: 'http://sixy.ch' 1591 producer: 1592 name: 'Manuel Kasper' 1593 url: 'https://neon1.net/' 1594 1595- regex: 'Slackbot|Slack-ImgProxy' 1596 name: 'Slackbot' 1597 category: 'Crawler' 1598 url: 'https://api.slack.com/robots' 1599 producer: 1600 name: 'Slack Technologies' 1601 url: 'http://slack.com' 1602 1603- regex: 'Sogou[ -](?:head|inst|Orion|Pic|Test|web)[ -]spider|New-Sogou-Spider' 1604 name: 'Sogou Spider' 1605 category: 'Search bot' 1606 url: 'http://www.sogou.com/docs/help/webmasters.htm' 1607 producer: 1608 name: 'Sohu, Inc.' 1609 url: 'http://www.sogou.com' 1610 1611- regex: 'Sosospider|Sosoimagespider' 1612 name: 'Soso Spider' 1613 category: 'Search bot' 1614 url: 'http://help.soso.com/webspider.htm' 1615 producer: 1616 name: 'Tencent Holdings' 1617 url: 'http://www.soso.com' 1618 1619- regex: 'Sprinklr' 1620 name: 'Sprinklr' 1621 category: 'Crawler' 1622 url: '' 1623 producer: 1624 name: 'Sprinklr, Inc.' 1625 url: 'https://www.sprinklr.com/' 1626 1627- regex: 'SSL Labs' 1628 name: 'SSL Labs' 1629 category: 'Validator' 1630 url: 'https://www.ssllabs.com/about/assessment.html' 1631 producer: 1632 name: 'SSL Labs' 1633 url: 'https://www.ssllabs.com/about/assessment.html' 1634 1635- regex: 'StatusCake' 1636 name: 'StatusCake' 1637 category: 'Site Monitor' 1638 url: 'https://www.statuscake.com' 1639 producer: 1640 name: 'StatusCake' 1641 url: 'https://www.statuscake.com' 1642 1643- regex: 'Superfeedr bot' 1644 name: 'Superfeedr Bot' 1645 category: 'Feed Fetcher' 1646 url: '' 1647 producer: 1648 name: 'Superfeedr' 1649 url: 'https://superfeedr.com/' 1650 1651- regex: 'Sparkler' 1652 name: 'Sparkler' 1653 category: 'Crawler' 1654 url: 'https://github.com/USCDataScience/sparkler' 1655 1656- regex: 'Spinn3r' 1657 name: 'Spinn3r' 1658 category: 'Crawler' 1659 url: 'http://spinn3r.com/robot' 1660 producer: 1661 name: 'Tailrank Inc' 1662 url: 'http://spinn3r.com' 1663 1664- regex: 'SputnikBot' 1665 name: 'Sputnik Bot' 1666 category: 'Crawler' 1667 url: '' 1668 1669- regex: 'SputnikFaviconBot' 1670 name: 'Sputnik Favicon Bot' 1671 category: 'Crawler' 1672 url: '' 1673 1674- regex: 'SputnikImageBot' 1675 name: 'Sputnik Image Bot' 1676 category: 'Crawler' 1677 url: '' 1678 1679- regex: 'SurveyBot' 1680 name: 'Survey Bot' 1681 category: 'Search bot' 1682 url: 'http://www.domaintools.com/webmasters/surveybot.php' 1683 producer: 1684 name: 'Domain Tools' 1685 url: 'http://www.domaintools.com' 1686 1687- regex: 'TarmotGezgin' 1688 name: 'Tarmot Gezgin' 1689 url: 'http://www.tarmot.com/gezgin/' 1690 category: 'Search bot' 1691 1692- regex: 'TelegramBot' 1693 name: 'TelegramBot' 1694 url: 'https://telegram.org/blog/bot-revolution' 1695 1696- regex: 'TLSProbe' 1697 name: 'TLSProbe' 1698 url: 'https://scan.trustnet.venafi.com/' 1699 category: 'Security search bot' 1700 producer: 1701 name: 'Venafi TrustNet' 1702 url: 'https://www.venafi.com' 1703 1704- regex: 'TinEye-bot' 1705 name: 'TinEye Crawler' 1706 category: 'Search bot' 1707 url: 'http://www.tineye.com/crawler.html' 1708 producer: 1709 name: 'Idée Inc.' 1710 url: 'http://ideeinc.com' 1711 1712- regex: 'Tiny Tiny RSS' 1713 name: 'Tiny Tiny RSS' 1714 url: 'http://tt-rss.org' 1715 category: 'Feed Fetcher' 1716 producer: 1717 name: '' 1718 url: '' 1719 1720- regex: 'theoldreader\.com' 1721 name: 'theoldreader' 1722 category: 'Feed Reader' 1723 url: 'https://theoldreader.com' 1724 1725- regex: 'Trackable/0\.1' 1726 name: 'Chartable' 1727 category: 'Site Monitor' 1728 url: 'https://help.chartable.com/article/34-what-is-the-trackable-analytics-prefix' 1729 producer: 1730 name: 'Chartable' 1731 url: 'https://chartable.com' 1732 1733- regex: 'trendictionbot' 1734 name: 'Trendiction Bot' 1735 category: 'Crawler' 1736 url: 'http://www.trendiction.de/bot' 1737 producer: 1738 name: 'Talkwalker Inc.' 1739 url: 'http://www.talkwalker.com' 1740 1741- regex: 'TurnitinBot' 1742 name: 'TurnitinBot' 1743 category: 'Crawler' 1744 url: 'http://www.turnitin.com/robot/crawlerinfo.html' 1745 producer: 1746 name: 'iParadigms, LLC.' 1747 url: 'http://www.turnitin.com' 1748 1749- regex: 'TweetedTimes' 1750 name: 'TweetedTimes Bot' 1751 category: 'Crawler' 1752 url: 'https://tweetedtimes.com/' 1753 producer: 1754 name: 'TweetedTimes' 1755 url: 'https://tweetedtimes.com/' 1756 1757- regex: 'TweetmemeBot' 1758 name: 'Tweetmeme Bot' 1759 category: 'Crawler' 1760 url: 'http://tweetmeme.com/' 1761 producer: 1762 name: 'Mediasift' 1763 url: '' 1764 1765- regex: 'Twingly Recon' 1766 name: 'Twingly Recon' 1767 category: 'Crawler' 1768 producer: 1769 name: 'Twingly' 1770 url: 'https://www.twingly.com' 1771 1772- regex: 'Twitterbot' 1773 name: 'Twitterbot' 1774 category: 'Social Media Agent' 1775 url: 'https://dev.twitter.com/docs/cards/getting-started' 1776 producer: 1777 name: 'Twitter' 1778 url: 'http://www.twitter.com' 1779 1780- regex: 'UniversalFeedParser' 1781 name: 'UniversalFeedParser' 1782 category: 'Feed Fetcher' 1783 url: 'https://github.com/kurtmckee/feedparser' 1784 producer: 1785 name: 'Kurt McKee' 1786 url: 'https://github.com/kurtmckee' 1787 1788- regex: 'via secureurl\.fwdcdn\.com' 1789 name: 'UkrNet Mail Proxy' 1790 category: 'Crawler' 1791 url: '' 1792 producer: 1793 name: 'UkrNet Ltd' 1794 url: 'https://www.ukr.net/' 1795 1796- regex: 'Uptime(?:bot)?/' 1797 name: 'Uptimebot' 1798 category: 'Site Monitor' 1799 url: 'https://uptime.com/uptime-bot' 1800 producer: 1801 name: 'Uptime' 1802 url: 'https://uptime.com/' 1803 1804- regex: 'UptimeRobot' 1805 name: 'UptimeRobot' 1806 category: 'Site Monitor' 1807 url: 'https://uptimerobot.com/' 1808 producer: 1809 name: 'Uptime Robot' 1810 url: 'https://uptimerobot.com/' 1811 1812- regex: 'URLAppendBot' 1813 name: 'URLAppendBot' 1814 category: 'Crawler' 1815 url: 'http://www.profound.net/urlappendbot.html' 1816 producer: 1817 name: 'Profound Networks' 1818 url: 'http://www.profound.net' 1819 1820- regex: 'Vagabondo' 1821 name: 'Vagabondo' 1822 category: 'Crawler' 1823 url: '' 1824 producer: 1825 name: 'WiseGuys' 1826 url: 'http://www.wise-guys.nl/' 1827 1828- regex: 'vkShare; ' 1829 name: 'VK Share Button' 1830 category: 'Crawler' 1831 url: 'https://dev.vk.com/en/widgets/share' 1832 producer: 1833 name: 'VK' 1834 url: 'https://vk.com/' 1835 1836- regex: 'VKRobot' 1837 name: 'VK Robot' 1838 category: 'Crawler' 1839 url: 'https://dev.vk.com/en/' 1840 producer: 1841 name: 'VK' 1842 url: 'https://vk.com/' 1843 1844- regex: 'VSMCrawler' 1845 name: 'Visual Site Mapper Crawler' 1846 category: 'Crawler' 1847 url: 'http://www.visualsitemapper.com/crawler' 1848 producer: 1849 name: 'Alentum Software Ltd.' 1850 url: 'http://www.alentum.com' 1851 1852- regex: 'Jigsaw' 1853 name: 'W3C CSS Validator' 1854 category: 'Validator' 1855 url: 'http://jigsaw.w3.org/css-validator' 1856 producer: 1857 name: 'W3C' 1858 url: 'http://www.w3.org' 1859 1860- regex: 'W3C_I18n-Checker' 1861 name: 'W3C I18N Checker' 1862 category: 'Validator' 1863 url: 'http://validator.w3.org/i18n-checker' 1864 producer: 1865 name: 'W3C' 1866 url: 'http://www.w3.org' 1867 1868- regex: 'W3C-checklink' 1869 name: 'W3C Link Checker' 1870 category: 'Validator' 1871 url: 'http://validator.w3.org/checklink' 1872 producer: 1873 name: 'W3C' 1874 url: 'http://www.w3.org' 1875 1876- regex: 'W3C_Validator|Validator\.nu' 1877 name: 'W3C Markup Validation Service' 1878 category: 'Validator' 1879 url: 'http://validator.w3.org/services' 1880 producer: 1881 name: 'W3C' 1882 url: 'http://www.w3.org' 1883 1884- regex: 'W3C-mobileOK' 1885 name: 'W3C MobileOK Checker' 1886 category: 'Validator' 1887 url: 'http://validator.w3.org/mobile' 1888 producer: 1889 name: 'W3C' 1890 url: 'http://www.w3.org' 1891 1892- regex: 'W3C_Unicorn' 1893 name: 'W3C Unified Validator' 1894 category: 'Validator' 1895 url: 'http://validator.w3.org/unicorn' 1896 producer: 1897 name: 'W3C' 1898 url: 'http://www.w3.org' 1899 1900- regex: 'P3P Validator' 1901 name: 'W3C P3P Validator' 1902 category: 'Validator' 1903 url: 'https://www.w3.org/P3P/validator.html' 1904 producer: 1905 name: 'W3C' 1906 url: 'https://www.w3.org' 1907 1908- regex: 'Wappalyzer' 1909 name: 'Wappalyzer' 1910 url: 'https://github.com/AliasIO/Wappalyzer' 1911 producer: 1912 name: 'AliasIO' 1913 url: 'https://github.com/AliasIO' 1914 1915- regex: 'PTST/' 1916 name: 'WebPageTest' 1917 category: 'Site Monitor' 1918 url: 'https://www.webpagetest.org' 1919 1920- regex: 'WeSEE' 1921 name: 'WeSEE:Search' 1922 category: 'Search bot' 1923 url: 'http://www.wesee.com/bot' 1924 producer: 1925 name: 'WeSEE Ltd' 1926 url: 'http://www.wesee.com' 1927 1928- regex: 'WebbCrawler' 1929 name: 'WebbCrawler' 1930 category: 'Crawler' 1931 url: 'http://badcheese.com/crawler.html' 1932 producer: 1933 name: 'Steve Webb' 1934 url: 'http://badcheese.com' 1935 1936- regex: 'websitepulse[+ ]checker' 1937 name: 'WebSitePulse' 1938 category: 'Site Monitor' 1939 url: 'http://badcheese.com/crawler.html' 1940 producer: 1941 name: 'WebSitePulse' 1942 url: 'http://www.websitepulse.com/' 1943 1944- regex: 'WordPress.+isitwp\.com' 1945 name: 'IsItWP' 1946 category: 'Crawler' 1947 url: 'https://www.isitwp.com/' 1948 producer: 1949 name: 'WPBeginner, LLC' 1950 url: 'https://www.wpbeginner.com/' 1951 1952- regex: 'Automattic Analytics Crawler' 1953 name: 'Automattic Analytics' 1954 category: 'Crawler' 1955 url: 'https://wordpress.com/crawler/' 1956 producer: 1957 name: 'Wordpress.org' 1958 url: 'https://wordpress.org/' 1959 1960- regex: 'WordPress\.com mShots' 1961 name: 'WordPress.com mShots' 1962 category: 'Service Agent' 1963 url: 'https://wordpress.org/' 1964 producer: 1965 name: 'Wordpress.org' 1966 url: 'https://wordpress.org/' 1967 1968- regex: 'wp\.com feedbot' 1969 name: 'wp.com feedbot' 1970 category: 'Feed Fetcher' 1971 url: 'https://wordpress.com/' 1972 producer: 1973 name: 'Automattic, Inc.' 1974 url: 'https://automattic.com/' 1975 1976- regex: 'WordPress' 1977 name: 'WordPress' 1978 category: 'Service Agent' 1979 url: 'https://wordpress.org/' 1980 producer: 1981 name: 'Wordpress.org' 1982 url: 'https://wordpress.org/' 1983 1984- regex: 'Wotbox' 1985 name: 'Wotbox' 1986 category: 'Search bot' 1987 url: 'http://www.wotbox.com/bot/' 1988 producer: 1989 name: 'Wotbox' 1990 url: 'http://www.wotbox.com' 1991 1992- regex: 'XenForo' 1993 name: 'XenForo' 1994 category: 'Service Agent' 1995 url: 'https://xenforo.com/' 1996 producer: 1997 name: 'XenForo Ltd.' 1998 url: 'https://xenforo.com/' 1999 2000- regex: 'yacybot' 2001 name: 'YaCy' 2002 category: 'Search bot' 2003 url: 'http://yacy.net/bot.html' 2004 producer: 2005 name: 'YaCy' 2006 url: 'http://yacy.net' 2007 2008- regex: 'Yahoo! Slurp|Yahoo!-AdCrawler' 2009 name: 'Yahoo! Slurp' 2010 category: 'Search bot' 2011 url: 'http://help.yahoo.com/ysearch/slurp' 2012 producer: 2013 name: 'Yahoo! Inc.' 2014 url: 'http://www.yahoo.com' 2015 2016- regex: 'Yahoo Link Preview|Yahoo:LinkExpander:Slingstone' 2017 name: 'Yahoo! Link Preview' 2018 category: 'Crawler' 2019 url: 'https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html' 2020 producer: 2021 name: 'Yahoo! Inc.' 2022 url: 'http://www.yahoo.com' 2023 2024- regex: 'YahooMailProxy' 2025 name: 'Yahoo! Mail Proxy' 2026 category: 'Service Agent' 2027 url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html' 2028 producer: 2029 name: 'Yahoo! Inc.' 2030 url: 'http://www.yahoo.com' 2031 2032- regex: 'YahooCacheSystem' 2033 name: 'Yahoo! Cache System' 2034 category: 'Crawler' 2035 url: '' 2036 producer: 2037 name: 'Yahoo! Inc.' 2038 url: 'http://www.yahoo.com' 2039 2040- regex: 'Y!J-BRW' 2041 name: 'Yahoo! Japan BRW' 2042 category: 'Crawler' 2043 url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955' 2044 producer: 2045 name: 'Yahoo! Japan Corp.' 2046 url: 'https://www.yahoo.co.jp/' 2047 2048- regex: 'Y!J-WSC' 2049 name: 'Yahoo! Japan WSC' 2050 category: 'Crawler' 2051 url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955' 2052 producer: 2053 name: 'Yahoo! Japan Corp.' 2054 url: 'https://www.yahoo.co.jp/' 2055 2056- regex: 'Y!J-ASR' 2057 name: 'Yahoo! Japan ASR' 2058 category: 'Crawler' 2059 url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955' 2060 producer: 2061 name: 'Yahoo! Japan Corp.' 2062 url: 'https://www.yahoo.co.jp/' 2063 2064- regex: '^Y!J' 2065 name: 'Yahoo! Japan' 2066 category: 'Crawler' 2067 url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955' 2068 producer: 2069 name: 'Yahoo! Japan Corp.' 2070 url: 'https://www.yahoo.co.jp/' 2071 2072- regex: 'Yandex(?:(?:\.Gazeta |Accessibility|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:AdNet|Antivirus|Blogs|Calendar|Catalog|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Userproxy|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher' 2073 name: 'Yandex Bot' 2074 category: 'Search bot' 2075 url: 'https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html' 2076 producer: 2077 name: 'Yandex LLC' 2078 url: 'https://yandex.com/company/' 2079 2080- regex: 'Yeti|NaverJapan|AdsBot-Naver' 2081 name: 'Yeti/Naverbot' 2082 category: 'Search bot' 2083 url: 'http://help.naver.com/robots/' 2084 producer: 2085 name: 'Naver' 2086 url: 'http://www.naver.com' 2087 2088- regex: 'YoudaoBot' 2089 name: 'Youdao Bot' 2090 category: 'Search bot' 2091 url: 'http://www.youdao.com/help/webmaster/spider' 2092 producer: 2093 name: 'NetEase, Inc.' 2094 url: 'http://corp.163.com' 2095 2096- regex: 'YOURLS' 2097 name: 'Yourls' 2098 category: 'Crawler' 2099 url: 'http://yourls.org' 2100 2101- regex: 'YRSpider|YYSpider' 2102 name: 'Yunyun Bot' 2103 category: 'Search bot' 2104 url: 'http://www.yunyun.com/SiteInfo.php?r=about' 2105 producer: 2106 name: 'YunYun' 2107 url: 'http://www.yunyun.com' 2108 2109- regex: 'zgrab' 2110 name: 'zgrab' 2111 category: 'Security Checker' 2112 url: 'https://github.com/zmap/zgrab' 2113 2114- regex: 'Zookabot' 2115 name: 'Zookabot' 2116 category: 'Crawler' 2117 url: 'http://zookabot.com' 2118 producer: 2119 name: 'Hwacha ApS' 2120 url: 'http://hwacha.dk' 2121 2122- regex: 'ZumBot' 2123 name: 'ZumBot' 2124 category: 'Search bot' 2125 url: 'http://help.zum.com/inquiry' 2126 producer: 2127 name: 'ZUM internet' 2128 url: 'http://www.zuminternet.com/' 2129 2130- regex: 'YottaaMonitor' 2131 name: 'Yottaa Site Monitor' 2132 category: 'Site Monitor' 2133 url: 'http://www.yottaa.com/products/site-monitor' 2134 producer: 2135 name: 'Yottaa' 2136 url: 'http://www.yottaa.com/' 2137 2138- regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857' 2139 name: 'Yahoo Gemini' 2140 category: 'Crawler' 2141 url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html' 2142 producer: 2143 name: 'Yahoo! Inc.' 2144 url: 'http://www.yahoo.com' 2145 2146- regex: '.*Java.*outbrain' 2147 name: 'Outbrain' 2148 category: 'Crawler' 2149 url: '' 2150 producer: 2151 name: 'Outbrain' 2152 url: 'http://www.outbrain.com/' 2153 2154- regex: 'HubPages.*crawlingpolicy' 2155 name: 'HubPages' 2156 category: 'Crawler' 2157 url: 'https://hubpages.com/help/crawlingpolicy' 2158 producer: 2159 name: 'HubPages, Inc.' 2160 url: 'https://discover.hubpages.com/' 2161 2162- regex: 'Pinterest(?:bot)?/.*www\.pinterest\.com' 2163 name: 'Pinterest' 2164 url: 'https://help.pinterest.com/en/business/article/pinterest-crawler' 2165 category: 'Crawler' 2166 producer: 2167 name: 'Pinterest' 2168 url: 'https://www.pinterest.com/' 2169 2170- regex: '.*Site24x7' 2171 name: 'Site24x7 Website Monitoring' 2172 category: 'Site Monitor' 2173 url: 'https://www.site24x7.com/site24x7-faq.html' 2174 producer: 2175 name: 'Site24x7' 2176 url: 'https://www.site24x7.com' 2177 2178- regex: '.* HLB' 2179 name: 'Site24x7 Defacement Monitor' 2180 category: 'Site Monitor' 2181 url: 'https://support.site24x7.com/portal/en/kb/articles/default-user-agent-used-in-website-defacement-monitor' 2182 producer: 2183 name: 'Site24x7' 2184 url: 'https://www.site24x7.com/' 2185 2186- regex: 's~snapchat-proxy' 2187 name: 'Snapchat Proxy' 2188 category: 'Crawler' 2189 url: 'https://www.snapchat.com' 2190 producer: 2191 name: 'Snapchat Inc.' 2192 url: 'https://www.snapchat.com' 2193 2194- regex: 'Snap URL Preview Service' 2195 name: 'Snap URL Preview Service' 2196 category: 'Service Agent' 2197 url: 'https://developers.snap.com/robots' 2198 producer: 2199 name: 'Snapchat Inc.' 2200 url: 'https://www.snapchat.com/' 2201 2202- regex: 'SnapchatAds' 2203 name: 'Snapchat Ads' 2204 category: 'Crawler' 2205 url: 'https://businesshelp.snapchat.com/s/article/adsbot-crawler?language=en_US' 2206 producer: 2207 name: 'Snapchat Inc.' 2208 url: 'https://www.snapchat.com/' 2209 2210- regex: "Let's Encrypt validation server" 2211 name: "Let's Encrypt Validation" 2212 category: 'Service Agent' 2213 url: 'https://letsencrypt.org/how-it-works/' 2214 producer: 2215 name: "Let's Encrypt" 2216 url: 'https://letsencrypt.org' 2217 2218- regex: 'GrapeshotCrawler' 2219 name: 'Grapeshot' 2220 category: 'Crawler' 2221 url: 'https://www.grapeshot.com/crawler' 2222 producer: 2223 name: 'Grapeshot' 2224 url: 'https://www.grapeshot.com' 2225 2226- regex: 'www\.monitor\.us' 2227 name: 'Monitor.Us' 2228 category: 'Site Monitor' 2229 url: 'http://www.monitor.us' 2230 producer: 2231 name: 'Monitor.Us' 2232 url: 'http://www.monitor.us' 2233 2234- regex: 'Catchpoint' 2235 name: 'Catchpoint' 2236 category: 'Site Monitor' 2237 url: 'https://www.catchpoint.com/' 2238 producer: 2239 name: 'Catchpoint Systems' 2240 url: 'https://www.catchpoint.com/' 2241 2242- regex: 'bitlybot' 2243 name: 'BitlyBot' 2244 category: 'Crawler' 2245 url: 'https://bitly.com' 2246 producer: 2247 name: 'Bitly, Inc.' 2248 url: 'https://bitly.com' 2249 2250- regex: 'Zao/' 2251 name: 'Zao' 2252 category: 'Crawler' 2253 2254- regex: 'lycos' 2255 name: 'Lycos' 2256 2257- regex: 'Slurp' 2258 name: 'Inktomi Slurp' 2259 2260- regex: 'Speedy Spider' 2261 name: 'Speedy' 2262 2263- regex: 'ScoutJet' 2264 name: 'ScoutJet' 2265 2266- regex: 'nrsbot|netresearch' 2267 name: 'NetResearchServer' 2268 2269- regex: 'scooter' 2270 name: 'Scooter' 2271 2272- regex: 'gigabot' 2273 name: 'Gigabot' 2274 2275- regex: 'charlotte' 2276 name: 'Charlotte' 2277 2278- regex: 'Pompos' 2279 name: 'Pompos' 2280 2281- regex: 'ichiro' 2282 name: 'ichiro' 2283 2284- regex: 'PagePeeker' 2285 name: 'PagePeeker' 2286 category: 'Crawler' 2287 url: 'https://pagepeeker.com/robots/' 2288 producer: 2289 name: 'PAGEPEEKER SRL' 2290 url: 'https://pagepeeker.com/' 2291 2292- regex: 'WebThumbnail' 2293 name: 'WebThumbnail' 2294 2295- regex: 'Willow Internet Crawler' 2296 name: 'Willow Internet Crawler' 2297 2298- regex: 'EmailWolf' 2299 name: 'EmailWolf' 2300 2301- regex: 'NetLyzer FastProbe' 2302 name: 'NetLyzer FastProbe' 2303 2304- regex: 'AdMantX.*admantx\.com' 2305 name: 'ADMantX' 2306 2307- regex: 'Server Density Service Monitoring' 2308 name: 'Server Density' 2309 2310- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)' 2311 name: 'RSSRadio Bot' 2312 2313- regex: '^sentry' 2314 name: 'Sentry Bot' 2315 producer: 2316 name: 'Sentry' 2317 url: 'https://sentry.io' 2318 2319- regex: '^Spotify/[\d.]+$' 2320 name: 'Spotify' 2321 producer: 2322 name: 'Spotify' 2323 url: 'https://www.spotify.com' 2324 2325- regex: 'The Knowledge AI' 2326 name: 'The Knowledge AI' 2327 category: 'Crawler' 2328 2329- regex: 'Embedly' 2330 name: 'Embedly' 2331 category: 'Crawler' 2332 url: 'https://support.embed.ly/hc/en-us' 2333 producer: 2334 name: 'A Medium, Corp.' 2335 url: 'https://medium.com/' 2336 2337- regex: 'BrandVerity' 2338 name: 'BrandVerity' 2339 category: 'Crawler' 2340 url: 'https://www.brandverity.com/why-is-brandverity-visiting-me' 2341 producer: 2342 name: 'BrandVerity, Inc.' 2343 url: 'https://www.brandverity.com/' 2344 2345- regex: 'Kaspersky Lab CFR link resolver' 2346 name: 'Kaspersky' 2347 category: 'Security Checker' 2348 url: 'https://www.kaspersky.com/' 2349 producer: 2350 name: 'AO Kaspersky Lab' 2351 url: 'https://www.kaspersky.com/' 2352 2353- regex: 'eZ Publish Link Validator' 2354 name: 'eZ Publish Link Validator' 2355 category: 'Crawler' 2356 url: 'https://ez.no/' 2357 producer: 2358 name: 'eZ Systems AS' 2359 url: 'https://ez.no/' 2360 2361- regex: 'woorankreview' 2362 name: 'WooRank' 2363 category: 'Search bot' 2364 url: 'https://www.woorank.com/' 2365 producer: 2366 name: 'WooRank sprl' 2367 url: 'https://www.woorank.com/' 2368 2369- regex: 'Siteimprove' 2370 name: 'Siteimprove' 2371 category: 'Search bot' 2372 url: 'https://siteimprove.com/' 2373 producer: 2374 name: 'Siteimprove GmbH' 2375 url: 'https://siteimprove.com/' 2376 2377- regex: 'CATExplorador' 2378 name: 'CATExplorador' 2379 category: 'Search bot' 2380 url: 'https://fundacio.cat/ca/domini/' 2381 producer: 2382 name: 'Fundació puntCAT' 2383 url: 'https://fundacio.cat/ca/domini/' 2384 2385- regex: 'Buck' 2386 name: 'Buck' 2387 category: 'Search bot' 2388 url: 'https://hypefactors.com/' 2389 producer: 2390 name: 'Hypefactors A/S' 2391 url: 'https://hypefactors.com/' 2392 2393- regex: 'tracemyfile' 2394 name: 'TraceMyFile' 2395 category: 'Search bot' 2396 url: 'https://www.tracemyfile.com/' 2397 producer: 2398 name: 'Idee Inc.' 2399 url: 'http://ideeinc.com/' 2400 2401- regex: 'zelist\.ro feed parser' 2402 name: 'Ze List' 2403 url: 'https://www.zelist.ro/' 2404 category: 'Feed Fetcher' 2405 producer: 2406 name: 'Treeworks SRL' 2407 url: 'https://www.tree.ro/' 2408 2409- regex: 'weborama-fetcher' 2410 name: 'Weborama' 2411 category: 'Search bot' 2412 url: 'https://weborama.com/' 2413 producer: 2414 name: 'Weborama SA' 2415 url: 'https://weborama.com/' 2416 2417- regex: 'BoardReader Favicon Fetcher' 2418 name: 'BoardReader' 2419 category: 'Search bot' 2420 url: 'https://boardreader.com/' 2421 producer: 2422 name: 'Effyis Inc' 2423 url: 'https://boardreader.com/' 2424 2425- regex: 'IDG/(?:EU|IT|RU|UK)' 2426 name: 'IDG' 2427 category: 'Crawler' 2428 url: 'https://www.spaziodati.eu/' 2429 producer: 2430 name: 'SpazioDati S.r.l.' 2431 url: 'https://www.spaziodati.eu/' 2432 2433- regex: 'Bytespider' 2434 name: 'Bytespider' 2435 category: 'Search bot' 2436 url: 'https://bytedance.com/' 2437 producer: 2438 name: 'ByteDance Ltd.' 2439 url: 'https://bytedance.com/' 2440 2441- regex: 'WikiDo' 2442 name: 'WikiDo' 2443 category: 'Search bot' 2444 url: 'https://www.wikido.com/' 2445 producer: 2446 name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.' 2447 url: 'https://www.wikido.com/' 2448 2449- regex: 'Awario(?:Smart)?Bot' 2450 name: 'Awario' 2451 category: 'Search bot' 2452 url: 'https://awario.com/bots.html' 2453 producer: 2454 name: 'TechFusion Ltd.' 2455 url: 'https://www.techfusion.com.cy/' 2456 2457- regex: 'AwarioRssBot' 2458 name: 'Awario' 2459 category: 'Feed Fetcher' 2460 url: 'https://awario.com/bots.html' 2461 producer: 2462 name: 'TechFusion Ltd.' 2463 url: 'https://www.techfusion.com.cy/' 2464 2465- regex: 'oBot' 2466 name: 'oBot' 2467 category: 'Search bot' 2468 url: 'https://www.xforce-security.com/crawler/' 2469 producer: 2470 name: 'IBM Germany Research & Development GmbH' 2471 url: 'https://exchange.xforce.ibmcloud.com/' 2472 2473- regex: 'SMTBot' 2474 name: 'SMTBot' 2475 category: 'Search bot' 2476 url: 'https://www.similartech.com/smtbot' 2477 producer: 2478 name: 'SimilarTech Ltd.' 2479 url: 'https://www.similartech.com/' 2480 2481- regex: 'LCC' 2482 name: 'LCC' 2483 category: 'Search bot' 2484 url: 'https://corpora.uni-leipzig.de/crawler_faq.html' 2485 producer: 2486 name: 'Universität Leipzig' 2487 url: 'https://www.uni-leipzig.de/' 2488 2489- regex: 'Startpagina-Linkchecker' 2490 name: 'Startpagina Linkchecker' 2491 category: 'Search bot' 2492 url: 'https://www.startpagina.nl/linkchecker' 2493 producer: 2494 name: 'Startpagina B.V.' 2495 url: 'https://www.startpagina.nl/' 2496 2497- regex: 'MoodleBot-Linkchecker' 2498 name: 'MoodleBot Linkchecker' 2499 category: 'Search bot' 2500 url: 'hhttps://docs.moodle.org/en/Usage' 2501 producer: 2502 name: 'Moodle Pty Ltd' 2503 url: 'https://moodle.org/' 2504 2505- regex: 'GTmetrix' 2506 name: 'GTmetrix' 2507 category: 'Crawler' 2508 url: 'https://gtmetrix.com/' 2509 producer: 2510 name: 'Carbon60 Operating Co. Ltd.' 2511 url: 'https://www.carbon60.com/' 2512 2513- regex: 'CyberFind ?Crawler' 2514 name: 'CyberFind Crawler' 2515 category: 'Crawler' 2516 url: 'https://www.cyberfind.net/bot.html' 2517 producer: 2518 name: 'Find.tf' 2519 url: 'https://find.tf/' 2520 2521- regex: 'Nutch' 2522 name: 'Nutch-based Bot' 2523 category: 'Crawler' 2524 url: 'https://nutch.apache.org' 2525 producer: 2526 name: 'The Apache Software Foundation' 2527 url: 'https://www.apache.org/foundation/' 2528 2529- regex: 'Seobility' 2530 name: 'Seobility' 2531 category: 'Crawler' 2532 url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot' 2533 2534- regex: 'Vercelbot' 2535 name: 'Vercel Bot' 2536 category: 'Service bot' 2537 url: 'https://vercel.com' 2538 2539- regex: 'Grammarly' 2540 name: 'Grammarly' 2541 category: 'Service bot' 2542 url: 'https://www.grammarly.com' 2543 2544- regex: 'Robozilla' 2545 name: 'Robozilla' 2546 category: 'Crawler' 2547 2548- regex: 'Domains Project' 2549 name: 'Domains Project' 2550 category: 'Crawler' 2551 url: 'https://domainsproject.org' 2552 2553- regex: 'PetalBot' 2554 name: 'Petal Bot' 2555 category: 'Crawler' 2556 url: 'https://aspiegel.com/petalbot' 2557 2558- regex: 'SerendeputyBot' 2559 name: 'Serendeputy Bot' 2560 category: 'Crawler' 2561 url: 'https://serendeputy.com/about/serendeputy-bot' 2562 2563- regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx\.com.*service-fetcher' 2564 name: 'ADmantX Service Fetcher' 2565 category: 'Service bot' 2566 url: 'https://www.admantx.com/service-fetcher.html' 2567 2568- regex: 'SemanticScholarBot' 2569 name: 'Semantic Scholar Bot' 2570 category: 'Crawler' 2571 url: 'https://www.semanticscholar.org/crawler' 2572 2573- regex: 'VelenPublicWebCrawler' 2574 name: 'Velen Public Web Crawler' 2575 category: 'Crawler' 2576 url: 'https://hunter.io/robot' 2577 2578- regex: 'Barkrowler' 2579 name: 'Barkrowler' 2580 category: 'Crawler' 2581 url: 'http://www.exensa.com/crawl' 2582 2583- regex: 'BDCbot' 2584 name: 'BDCbot' 2585 category: 'Crawler' 2586 url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx' 2587 producer: 2588 name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA' 2589 url: 'https://bigdatacorp.com.br/' 2590 2591- regex: 'adbeat' 2592 name: 'Adbeat' 2593 category: 'Crawler' 2594 url: 'https://www.adbeat.com/operation_policy' 2595 producer: 2596 name: 'PPC Labs LLC' 2597 url: 'https://www.adbeat.com/' 2598 2599- regex: '(?:BuiltWith|BW/)' 2600 name: 'BuiltWith' 2601 category: 'Crawler' 2602 url: 'https://builtwith.com/biup' 2603 producer: 2604 name: 'BuiltWith Pty Ltd' 2605 url: 'https://builtwith.com/' 2606 2607- regex: 'https://whatis\.contentkingapp\.com' 2608 name: 'ContentKing' 2609 category: 'Site Monitor' 2610 url: 'https://whatis.contentkingapp.com/' 2611 producer: 2612 name: 'ContentKing BV' 2613 url: 'https://www.contentkingapp.com/' 2614 2615- regex: 'MicroAdBot' 2616 name: 'MicroAdBot' 2617 category: 'Crawler' 2618 url: 'https://www.microad.co.jp/' 2619 producer: 2620 name: 'MicroAd, Inc.' 2621 url: 'https://www.microad.co.jp/' 2622 2623- regex: 'PingAdmin\.Ru' 2624 name: 'PingAdmin.Ru' 2625 category: 'Site Monitor' 2626 url: 'https://ping-admin.ru/' 2627 2628- regex: 'notifyninja.+monitoring' 2629 name: 'Notify Ninja' 2630 category: 'Site Monitor' 2631 url: 'http://notifyninja.com' 2632 2633- regex: 'WebDataStats' 2634 name: 'WebDataStats' 2635 category: 'Crawler' 2636 url: 'https://webdatastats.com/policy.html' 2637 producer: 2638 name: 'WebTehRazrabotka LLC' 2639 url: 'https://webdatastats.com/' 2640 2641- regex: 'parse\.ly scraper' 2642 name: 'parse.ly' 2643 category: 'Crawler' 2644 url: 'https://www.parse.ly/help/integration/crawler' 2645 producer: 2646 name: 'Parsely, Inc.' 2647 url: 'https://www.parse.ly/' 2648 2649- regex: 'Nimbostratus-Bot' 2650 name: 'Nimbostratus Bot' 2651 category: 'Site Monitor' 2652 url: 'http://cloudsystemnetworks.com' 2653 2654- regex: 'HeartRails_Capture' 2655 name: 'Heart Rails Capture' 2656 category: 'Service Agent' 2657 url: 'http://capture.heartrails.com' 2658 2659- regex: 'Project-Resonance' 2660 name: 'Project Resonance' 2661 category: 'Crawler' 2662 url: 'https://project-resonance.com/' 2663 producer: 2664 name: 'RedHunt Labs Limited' 2665 url: 'https://redhuntlabs.com/' 2666 2667- regex: 'DataXu' 2668 name: 'DataXu' 2669 category: 'Service Agent' 2670 url: 'https://advertising.roku.com/dataxu' 2671 producer: 2672 name: 'Roku, Inc.' 2673 url: 'https://roku.com' 2674 2675- regex: 'Cocolyzebot' 2676 name: 'Cocolyzebot' 2677 category: 'Crawler' 2678 url: 'https://cocolyze.com/en/cocolyzebot' 2679 producer: 2680 name: 'VSI INNOVATION SAS' 2681 url: 'https://vsi-innovation.com/' 2682 2683- regex: 'veryhip' 2684 name: 'VeryHip' 2685 category: 'Crawler' 2686 url: 'https://veryhip.com/' 2687 producer: 2688 name: 'VeryHip' 2689 url: 'https://veryhip.com/' 2690 2691- regex: 'LinkpadBot' 2692 name: 'LinkpadBot' 2693 category: 'Crawler' 2694 url: 'https://www.linkpad.org/' 2695 producer: 2696 name: 'Solomono LLC' 2697 url: 'https://www.linkpad.org/' 2698 2699- regex: 'MuscatFerret' 2700 name: 'MuscatFerret' 2701 category: 'Crawler' 2702 url: 'http://www.webtop.com/' 2703 2704- regex: 'PageThing\.com' 2705 name: 'PageThing' 2706 category: 'Crawler' 2707 url: 'https://www.pagething.com/' 2708 producer: 2709 name: 'SPECIALNOISE LTD' 2710 url: 'https://www.specialnoise.com/' 2711 2712- regex: 'ArchiveBox' 2713 name: 'ArchiveBox' 2714 url: 'https://archivebox.io/' 2715 category: 'Crawler' 2716 producer: 2717 name: '' 2718 url: '' 2719 2720- regex: 'Choosito' 2721 name: 'Choosito' 2722 url: 'https://www.choosito.com/' 2723 category: 'Crawler' 2724 producer: 2725 name: 'Choosito! Inc.' 2726 url: 'https://www.choosito.com/' 2727 2728- regex: 'datagnionbot' 2729 name: 'datagnionbot' 2730 url: 'https://www.datagnion.com/bot.html' 2731 category: 'Crawler' 2732 producer: 2733 name: 'DATAGNION GMBH' 2734 url: 'https://www.datagnion.com/' 2735 2736- regex: 'WhatCMS' 2737 name: 'WhatCMS' 2738 url: 'https://whatcms.org/' 2739 category: 'Crawler' 2740 producer: 2741 name: 'Nineteen Ten LLC' 2742 url: 'https://whatcms.org/' 2743 2744- regex: 'httpx' 2745 name: 'httpx' 2746 url: 'https://github.com/projectdiscovery/httpx' 2747 category: 'Crawler' 2748 producer: 2749 name: 'ProjectDiscovery, Inc.' 2750 url: 'https://projectdiscovery.io/' 2751 2752- regex: '.*\.oast\.' 2753 name: 'Interactsh' 2754 category: 'Security Checker' 2755 url: 'https://github.com/projectdiscovery/interactsh' 2756 producer: 2757 name: 'ProjectDiscovery, Inc.' 2758 url: 'https://projectdiscovery.io/' 2759 2760- regex: 'scaninfo@(?:expanseinc|paloaltonetworks)\.com' 2761 name: 'Expanse' 2762 category: 'Security Checker' 2763 url: 'https://expanse.co/' 2764 producer: 2765 name: 'Expanse Inc.' 2766 url: 'https://expanse.co/' 2767 2768- regex: 'HuaweiWebCatBot' 2769 name: 'HuaweiWebCatBot' 2770 category: 'Crawler' 2771 url: 'https://isecurity.huawei.com' 2772 producer: 2773 name: 'Huawei Technologies Co., Ltd.' 2774 url: 'https://huawei.com' 2775 2776- regex: 'Hatena-Favicon' 2777 name: 'Hatena Favicon' 2778 category: 'Crawler' 2779 url: 'https://www.hatena.ne.jp/faq/' 2780 producer: 2781 name: 'Hatena Co., Ltd.' 2782 url: 'https://www.hatena.ne.jp' 2783- regex: 'Hatena-?Bookmark' 2784 name: 'Hatena Bookmark' 2785 category: 'Crawler' 2786 url: 'https://www.hatena.ne.jp/faq/' 2787 producer: 2788 name: 'Hatena Co., Ltd.' 2789 url: 'https://www.hatena.ne.jp' 2790 2791- regex: 'RyowlEngine' 2792 name: 'Ryowl' 2793 category: 'Crawler' 2794 url: 'https://ryowl.org' 2795 2796- regex: 'OdklBot' 2797 name: 'Odnoklassniki Bot' 2798 category: 'Crawler' 2799 url: 'https://odnoklassniki.ru' 2800 2801- regex: 'Mediatoolkitbot' 2802 name: 'Mediatoolkit Bot' 2803 category: 'Crawler' 2804 url: 'https://mediatoolkit.com' 2805 2806- regex: 'ZoominfoBot' 2807 name: 'ZoominfoBot' 2808 category: 'Crawler' 2809 url: 'https://www.zoominfo.com' 2810 2811- regex: 'WeViKaBot' 2812 name: 'WeViKaBot' 2813 category: 'Crawler' 2814 url: 'http://www.wevika.de' 2815 2816- regex: 'SEOkicks' 2817 name: 'SEOkicks' 2818 category: 'Crawler' 2819 url: 'https://www.seokicks.de/robot.html' 2820 producer: 2821 name: 'SEOkicks' 2822 url: 'https://www.seokicks.de/' 2823 2824- regex: 'Plukkie' 2825 name: 'Plukkie' 2826 category: 'Crawler' 2827 url: 'http://www.botje.com/plukkie.htm' 2828 2829- regex: 'proximic;' 2830 name: 'Comscore' 2831 category: 'Crawler' 2832 url: 'https://www.comscore.com/Web-Crawler' 2833 2834- regex: 'SurdotlyBot' 2835 name: 'SurdotlyBot' 2836 category: 'Crawler' 2837 url: 'http://sur.ly/bot.html' 2838 2839- regex: 'Gowikibot' 2840 name: 'Gowikibot' 2841 category: 'Crawler' 2842 url: 'http:/www.gowikibot.com' 2843 2844- regex: 'SabsimBot' 2845 name: 'SabsimBot' 2846 category: 'Crawler' 2847 url: 'https://sabsim.com' 2848 2849- regex: 'LumtelBot' 2850 name: 'LumtelBot' 2851 category: 'Crawler' 2852 url: 'https://umtel.com' 2853 2854- regex: 'PiplBot' 2855 name: 'PiplBot' 2856 category: 'Crawler' 2857 url: 'http://www.pipl.com/bot' 2858 2859- regex: 'woobot' 2860 name: 'WooRank' 2861 category: 'Crawler' 2862 url: 'https://www.woorank.com/bot' 2863 2864- regex: 'Cookiebot' 2865 name: 'Cookiebot' 2866 category: 'Crawler' 2867 url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent' 2868 producer: 2869 name: 'Cybot A/S' 2870 url: 'https://www.cybot.com/' 2871 2872- regex: 'NetSystemsResearch' 2873 name: 'NetSystemsResearch' 2874 category: 'Security Checker' 2875 url: 'https://www.netsystemsresearch.com/' 2876 producer: 2877 name: 'NET SYSTEMS RESEARCH LLC' 2878 url: 'https://www.netsystemsresearch.com/' 2879 2880- regex: 'CensysInspect' 2881 name: 'CensysInspect' 2882 category: 'Security Checker' 2883 url: 'https://about.censys.io/' 2884 producer: 2885 name: 'Censys, Inc.' 2886 url: 'https://censys.io/' 2887 2888- regex: 'gdnplus\.com' 2889 name: 'GDNP' 2890 category: 'Crawler' 2891 url: 'https://gdnplus.com/' 2892 producer: 2893 name: 'Global Digital Network Plus, LLC' 2894 url: 'https://gdnplus.com/' 2895 2896- regex: 'WellKnownBot' 2897 name: 'WellKnownBot' 2898 category: 'Crawler' 2899 url: 'https://well-known.dev' 2900 2901- regex: 'Adsbot' 2902 name: 'Adsbot' 2903 category: 'Crawler' 2904 url: 'https://seostar.co/robot/' 2905 2906- regex: 'MTRobot' 2907 name: 'MTRobot' 2908 category: 'Crawler' 2909 url: 'https://metrics-tools.de/robot.html' 2910 producer: 2911 name: 'Metrics Tools' 2912 url: 'https://metrics-tools.de/' 2913 2914- regex: 'serpstatbot' 2915 name: 'serpstatbot' 2916 category: 'Crawler' 2917 url: 'http://serpstatbot.com/' 2918 producer: 2919 name: 'Netpeak Ltd' 2920 url: 'https://netpeak.net/' 2921 2922- regex: 'colly' 2923 name: 'colly' 2924 category: 'Crawler' 2925 url: 'https://github.com/gocolly/colly/' 2926 2927- regex: 'l9tcpid' 2928 name: 'l9tcpid' 2929 category: 'Security Checker' 2930 url: 'https://github.com/LeakIX/l9tcpid' 2931 2932- regex: 'l9explore' 2933 name: 'l9explore' 2934 category: 'Security Checker' 2935 url: 'https://github.com/LeakIX/l9explore' 2936 2937- regex: 'l9scan/|^Lkx-.*/' 2938 name: 'LeakIX' 2939 category: 'Security Checker' 2940 url: 'https://leakix.net/' 2941 producer: 2942 name: 'BaDaaS SRL' 2943 url: 'https://leakix.net/' 2944 2945- regex: 'MegaIndex\.ru' 2946 name: 'MegaIndex' 2947 category: 'Crawler' 2948 url: 'https://megaindex.com/crawler' 2949 2950- regex: 'Seekport' 2951 name: 'Seekport' 2952 category: 'Crawler' 2953 url: 'https://bot.seekport.com/' 2954 producer: 2955 name: 'SISTRIX GmbH' 2956 url: 'https://www.sistrix.de/' 2957 2958- regex: 'Seolyt(?:Bot)?' 2959 name: 'SeolytBot' 2960 category: 'Crawler' 2961 url: 'https://seolyt.com/' 2962 2963- regex: 'YaK/' 2964 name: 'YaK' 2965 category: 'Crawler' 2966 url: 'https://www.linkfluence.com/' 2967 producer: 2968 name: 'Linkfluence SAS' 2969 url: 'https://www.linkfluence.com/' 2970 2971- regex: 'KomodiaBot' 2972 name: 'KomodiaBot' 2973 category: 'Crawler' 2974 url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler' 2975 producer: 2976 name: 'Komodia Inc.' 2977 url: 'https://www.komodia.com/' 2978 2979- regex: 'KStandBot' 2980 name: 'KStandBot' 2981 category: 'Crawler' 2982 url: 'https://url-classification.io/wiki/index.php?title=URL_server_crawler' 2983 producer: 2984 name: 'Komodia Inc.' 2985 url: 'https://www.komodia.com/' 2986 2987- regex: 'Neevabot' 2988 name: 'Neevabot' 2989 category: 'Search bot' 2990 url: 'https://neeva.com/neevabot' 2991 producer: 2992 name: 'Neeva Inc.' 2993 url: 'https://neeva.com/' 2994 2995- regex: 'Chatwork LinkPreview' 2996 name: 'Chatwork LinkPreview' 2997 category: 'Service Agent' 2998 url: 'https://go.chatwork.com/en/' 2999 producer: 3000 name: 'kubell Co., Ltd.' 3001 url: 'https://www.kubell.com/en/' 3002 3003- regex: 'LinkPreview' 3004 name: 'LinkPreview' 3005 category: 'Service Agent' 3006 url: 'https://www.linkpreview.net/' 3007 3008- regex: 'JungleKeyThumbnail' 3009 name: 'JungleKeyThumbnail' 3010 category: 'Crawler' 3011 url: 'https://junglekey.com/' 3012 3013- regex: 'rocketmonitor(?:bot)?' 3014 name: 'RocketMonitorBot' 3015 category: 'Site Monitor' 3016 url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html' 3017 producer: 3018 name: 'Radio Mast, Inc.' 3019 url: 'https://www.radiomast.io/' 3020 3021- regex: 'SitemapParser-VIPnytt' 3022 name: 'SitemapParser-VIPnytt' 3023 category: 'Crawler' 3024 url: 'https://github.com/VIPnytt/SitemapParser/' 3025 3026- regex: '^Turnitin' 3027 name: 'Turnitin' 3028 category: 'Crawler' 3029 url: 'https://turnitin.com/robot/crawlerinfo.html' 3030 3031- regex: 'DMBrowser|DMBrowser-[UB]V' 3032 name: 'Dotcom Monitor' 3033 category: 'Site Monitor' 3034 url: 'https://www.dotcom-monitor.com' 3035 3036- regex: 'ThinkChaos/' 3037 name: 'ThinkChaos' 3038 category: 'Crawler' 3039 3040- regex: 'DataForSeoBot' 3041 name: 'DataForSeoBot' 3042 category: 'Crawler' 3043 url: 'https://dataforseo.com/dataforseo-bot' 3044 3045- regex: 'Discordbot' 3046 name: 'Discord Bot' 3047 category: 'Service Agent' 3048 url: 'https://discordapp.com' 3049 3050- regex: 'Linespider' 3051 name: 'Linespider' 3052 category: 'Crawler' 3053 url: 'https://lin.ee/4dwXkTH' 3054 3055- regex: 'Cincraw' 3056 name: 'Cincraw' 3057 category: 'Crawler' 3058 url: 'http://cincrawdata.net/bot/' 3059 3060- regex: 'CISPA Web Analyzer' 3061 name: 'CISPA Web Analyzer' 3062 category: 'Crawler' 3063 url: 'https://notify.cispa.de/' 3064 producer: 3065 name: 'CISPA - Helmholtz-Zentrum für Informationssicherheit gGmbH' 3066 url: 'https://cispa.de/en' 3067 3068- regex: 'IonCrawl' 3069 name: 'IONOS Crawler' 3070 category: 'Crawler' 3071 url: 'https://www.ionos.de/terms-gtc/faq-crawler-en/' 3072 producer: 3073 name: 'IONOS SE' 3074 url: 'https://www.ionos.de/' 3075 3076- regex: 'Crawldad' 3077 name: 'Crawldad' 3078 category: 'Crawler' 3079 url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972' 3080 3081- regex: 'https://securitytxt-scan\.cs\.hm\.edu/' 3082 name: 'security.txt scanserver' 3083 category: 'Security Checker' 3084 url: 'https://securitytxt-scan.cs.hm.edu/' 3085 producer: 3086 name: 'Hochschule für angewandte Wissenschaften München' 3087 url: 'https://www.hm.edu/' 3088 3089- regex: 'TigerBot' 3090 name: 'TigerBot' 3091 category: 'Crawler' 3092 url: 'https://tiger.ch/' 3093 3094- regex: 'TestCrawler' 3095 name: 'TestCrawler' 3096 category: 'Crawler' 3097 url: 'https://www.comcepta.com/' 3098 3099- regex: 'CrowdTanglebot' 3100 name: 'CrowdTangle' 3101 category: 'Crawler' 3102 url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot' 3103 producer: 3104 name: 'CrowdTangle, Inc.' 3105 url: 'https://www.crowdtangle.com/' 3106 3107- regex: 'Sellers\.Guide Crawler by Primis' 3108 name: 'Sellers.Guide' 3109 category: 'Crawler' 3110 url: 'https://sellers.guide/' 3111 producer: 3112 name: 'McCann Disciplines, Ltd.' 3113 url: 'https://www.primis.tech/' 3114 3115- regex: 'OnalyticaBot' 3116 name: 'Onalytica' 3117 category: 'Crawler' 3118 url: 'https://www.airslate.com/bot/explore/onalytica-bot' 3119 producer: 3120 name: 'airSlate, Inc.' 3121 url: 'https://www.airslate.com/' 3122 3123- regex: 'deepnoc' 3124 name: 'deepnoc' 3125 category: 'Crawler' 3126 url: 'https://deepnoc.com/bot' 3127 producer: 3128 name: 'deepnoc, GmbH' 3129 url: 'https://deepnoc.com/' 3130 3131- regex: 'Newslitbot' 3132 name: 'Newslitbot' 3133 category: 'Crawler' 3134 url: 'https://www.newslit.co/' 3135 producer: 3136 name: 'Newslit, LLC.' 3137 url: 'https://www.newslit.co/' 3138 3139- regex: 'um-(?:ANS|CC|FC|IC|LN)' 3140 name: 'uMBot' 3141 category: 'Crawler' 3142 url: 'https://www.ubermetrics-technologies.com/' 3143 producer: 3144 name: 'Ubermetrics Technologies GmbH' 3145 url: 'https://www.ubermetrics-technologies.com/' 3146 3147- regex: 'Abonti' 3148 name: 'Abonti' 3149 category: 'Crawler' 3150 url: 'http://abonti.com/' 3151 3152- regex: 'collection@infegy\.com' 3153 name: 'Infegy' 3154 category: 'Crawler' 3155 url: 'https://infegy.com/' 3156 producer: 3157 name: 'Infegy, Inc.' 3158 url: 'https://infegy.com/' 3159 3160- regex: 'HTTP Banner Detection \(https://security\.ipip\.net\)' 3161 name: 'IPIP' 3162 category: 'Security Checker' 3163 url: 'https://security.ipip.net/' 3164 producer: 3165 name: 'Beijing Tiantexin Tech. Co., Ltd.' 3166 url: 'https://en.ipip.net/' 3167 3168- regex: 'ev-crawler' 3169 name: 'Headline' 3170 category: 'Crawler' 3171 url: 'https://headline.com/legal/crawler' 3172 producer: 3173 name: 'e.ventures Managementgesellschaft mbH' 3174 url: 'https://headline.com/' 3175 3176- regex: 'webprosbot' 3177 name: 'WebPros' 3178 category: 'Crawler' 3179 url: 'https://webpros.com/' 3180 producer: 3181 name: 'WebPros Holdco B.V.' 3182 url: 'https://webpros.com/' 3183 3184- regex: 'ELB-HealthChecker' 3185 name: 'Amazon ELB' 3186 category: 'Site Monitor' 3187 url: 'https://aws.amazon.com/elasticloadbalancing/' 3188 producer: 3189 name: 'Amazon.com, Inc.' 3190 url: 'https://www.amazon.com/' 3191 3192- regex: 'Wheregoes\.com Redirect Checker' 3193 name: 'WhereGoes' 3194 category: 'Crawler' 3195 url: 'https://wheregoes.com/' 3196 3197- regex: 'project_patchwatch' 3198 name: 'Project Patchwatch' 3199 category: 'Crawler' 3200 url: 'http://66.240.192.82/' 3201 3202- regex: 'InternetMeasurement' 3203 name: 'InternetMeasurement' 3204 category: 'Crawler' 3205 url: 'https://internet-measurement.com/' 3206 3207- regex: 'DomainAppender' 3208 name: 'DomainAppender' 3209 category: 'Crawler' 3210 url: 'https://www.profound.net/product/domain_append/' 3211 producer: 3212 name: 'Profound Networks, LLC' 3213 url: 'https://www.profound.net/' 3214 3215- regex: 'FreeWebMonitoring SiteChecker' 3216 name: 'FreeWebMonitoring' 3217 category: 'Site Monitor' 3218 url: 'https://www.freewebmonitoring.com/bot.html' 3219 producer: 3220 name: 'GreenWave Online, Inc.' 3221 url: 'http://www.greenwaveonline.com/' 3222 3223- regex: 'Page Modified Pinger' 3224 name: 'Page Modified Pinger' 3225 category: 'Site Monitor' 3226 url: 'https://www.pagemodified.com/' 3227 producer: 3228 name: 'Valley Hosting, LLC' 3229 url: 'https://www.pagemodified.com/' 3230 3231- regex: 'adstxtlab\.com' 3232 name: 'adstxtlab.com' 3233 category: 'Crawler' 3234 url: 'https://adstxtlab.com/validator.php' 3235 producer: 3236 name: 'Jaohawi AB' 3237 url: 'https://adstxtlab.com/' 3238 3239- regex: 'Iframely' 3240 name: 'Iframely' 3241 category: 'Crawler' 3242 url: 'https://iframely.com/' 3243 producer: 3244 name: 'Itteco Software, Corp.' 3245 url: 'https://iframely.com/' 3246 3247- regex: 'DomainStatsBot' 3248 name: 'DomainStatsBot' 3249 category: 'Crawler' 3250 url: 'https://domainstats.com/pages/our-bot' 3251 producer: 3252 name: 'Domainstats Ltd' 3253 url: 'https://domainstats.com/' 3254 3255- regex: 'aiHitBot' 3256 name: 'aiHitBot' 3257 category: 'Crawler' 3258 url: 'https://www.aihitdata.com/about' 3259 3260- regex: 'DomainCrawler/' 3261 name: 'DomainCrawler' 3262 category: 'Crawler' 3263 url: 'https://domaincrawler.com/about-us/' 3264 3265- regex: 'DNSResearchBot' 3266 name: 'DNSResearchBot' 3267 category: 'Crawler' 3268 3269- regex: 'GitCrawlerBot' 3270 name: 'GitCrawlerBot' 3271 category: 'Crawler' 3272 3273- regex: 'AdAuth' 3274 name: 'AdAuth' 3275 category: 'Crawler' 3276 url: 'https://www.adauth.com' 3277 3278- regex: 'faveeo\.com' 3279 name: 'Faveeo' 3280 category: 'Crawler' 3281 url: 'http://www.faveeo.com' 3282 3283- regex: 'kozmonavt\.' 3284 name: 'Kozmonavt' 3285 category: 'Crawler' 3286 url: 'https://kozmonavt.ml' 3287 3288- regex: 'CriteoBot/' 3289 name: 'CriteoBot' 3290 category: 'Crawler' 3291 url: 'https://www.criteo.com/criteo-crawler/' 3292 3293- regex: 'PayPal IPN' 3294 name: 'PayPal IPN' 3295 category: 'Service Agent' 3296 url: 'https://developer.paypal.com/api/nvp-soap/ipn/IPNIntro/' 3297 producer: 3298 name: 'PayPal, Inc.' 3299 url: 'https://www.paypal.com/' 3300 3301- regex: 'MaCoCu' 3302 name: 'MaCoCu' 3303 category: 'Crawler' 3304 url: 'https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data/' 3305 producer: 3306 name: 'Jožef Stefan Institute' 3307 url: 'https://www.ijs.si/ijsw/JSI' 3308 3309- regex: 'CLASSLA' 3310 name: 'CLASSLA-web' 3311 category: 'Crawler' 3312 url: 'https://www.clarin.si/info/classla-web-crawler/' 3313 producer: 3314 name: 'Jožef Stefan Institute' 3315 url: 'https://www.ijs.si/ijsw/JSI' 3316 3317- regex: 'dnt-policy@eff\.org' 3318 name: 'EFF Do Not Track Verifier' 3319 category: 'Crawler' 3320 url: 'https://www.eff.org/issues/do-not-track' 3321 producer: 3322 name: 'Electronic Frontier Foundation' 3323 url: 'https://www.eff.org/' 3324 3325- regex: 'InfoTigerBot' 3326 name: 'InfoTigerBot' 3327 category: 'Crawler' 3328 url: 'https://infotiger.com/bot' 3329 producer: 3330 name: 'Infotiger UG' 3331 url: 'https://infotiger.com/' 3332 3333- regex: '(?:Birdcrawlerbot|CrawlaDeBot)' 3334 name: 'Birdcrawlerbot' 3335 category: 'Crawler' 3336 url: 'https://crawla.de/de/index.php' 3337 producer: 3338 name: 'Swoppen Systems GmbH' 3339 url: 'https://www.swoppen.com/de' 3340 3341- regex: 'ScamadviserExternalHit' 3342 name: 'Scamadviser External Hit' 3343 category: 'Crawler' 3344 url: 'https://www.scamadviser.com/' 3345 producer: 3346 name: 'Ecommerce Operations B.V.' 3347 url: 'https://www.scamadviser.com/' 3348 3349- regex: 'ZaldamoSearchBot' 3350 name: 'Zaldamo' 3351 category: 'Crawler' 3352 url: 'https://www.zaldamo.com/search.html' 3353 producer: 3354 name: 'Zaldamo, LLC.' 3355 url: 'https://www.zaldamo.com/' 3356 3357- regex: 'AFB' 3358 name: 'Allloadin Favicon Bot' 3359 category: 'Crawler' 3360 url: 'https://allloadin.com/' 3361 3362- regex: 'LinkWalker' 3363 name: 'LinkWalker' 3364 category: 'Crawler' 3365 url: 'https://www.phishlabs.com/' 3366 producer: 3367 name: 'PhishLabs, Inc.' 3368 url: 'https://www.phishlabs.com/' 3369 3370- regex: 'RenovateBot' 3371 name: 'RenovateBot' 3372 category: 'Security Checker' 3373 url: 'https://github.com/renovatebot/renovate' 3374 producer: 3375 name: 'White Source Ltd.' 3376 url: 'https://www.mend.io/free-developer-tools/renovate/' 3377 3378- regex: 'INETDEX-BOT' 3379 name: 'Inetdex Bot' 3380 category: 'Crawler' 3381 url: 'https://www.inetdex.com/' 3382 3383- regex: 'NETZZAPPEN' 3384 name: 'NETZZAPPEN' 3385 category: 'Crawler' 3386 url: 'https://www.netzzappen.com/' 3387 producer: 3388 name: 'Marc Huemer' 3389 url: 'https://www.netzzappen.com/' 3390 3391- regex: 'panscient\.com' 3392 name: 'Panscient' 3393 category: 'Crawler' 3394 url: 'https://www.panscient.com/faq.htm' 3395 producer: 3396 name: 'Panscient, Inc.' 3397 url: 'https://www.panscient.com/' 3398 3399- regex: 'research@pdrlabs\.net' 3400 name: 'PDR Labs' 3401 category: 'Security Checker' 3402 url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/' 3403 producer: 3404 name: 'PDR Labs' 3405 url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/' 3406 3407- regex: 'Nicecrawler' 3408 name: 'NiceCrawler' 3409 category: 'Crawler' 3410 url: 'https://www.nicecrawler.com/' 3411 producer: 3412 name: 'Intelium Corp.' 3413 url: 'https://www.intelium.com/' 3414 3415- regex: 't3versionsBot' 3416 name: 't3versions' 3417 category: 'Crawler' 3418 url: 'https://www.t3versions.com/bot' 3419 producer: 3420 name: 'Torben Hansen' 3421 url: 'https://www.t3versions.com/' 3422 3423- regex: 'Crawlson' 3424 name: 'Crawlson' 3425 category: 'Crawler' 3426 url: 'https://www.crawlson.com/about' 3427 producer: 3428 name: 'Crawlson' 3429 url: 'https://www.crawlson.com/' 3430 3431- regex: 'tchelebi' 3432 name: 'tchelebi' 3433 category: 'Crawler' 3434 url: 'https://tchelebi.io/' 3435 producer: 3436 name: 'NormShield, Inc.' 3437 url: 'https://blackkite.com/' 3438 3439- regex: 'JobboerseBot' 3440 name: 'JobboerseBot' 3441 category: 'Crawler' 3442 url: 'https://www.xing.com/jobs' 3443 producer: 3444 name: 'New Work SE' 3445 url: 'https://www.xing.com/' 3446 3447- regex: 'RepoLookoutBot' 3448 name: 'Repo Lookout' 3449 category: 'Security Checker' 3450 url: 'https://www.repo-lookout.org/' 3451 producer: 3452 name: 'Crissy Field GmbH' 3453 url: 'https://www.crissyfield.de/' 3454 3455- regex: 'PATHspider' 3456 name: 'PATHspider' 3457 category: 'Security Checker' 3458 url: 'https://pathspider.net/' 3459 producer: 3460 name: 'MAMI Project' 3461 url: 'https://mami-project.eu/' 3462 3463- regex: 'everyfeed-spider' 3464 name: 'Everyfeed' 3465 url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/' 3466 category: 'Feed Fetcher' 3467 producer: 3468 name: '' 3469 url: '' 3470 3471- regex: 'Exchange check' 3472 name: 'Exchange check' 3473 category: 'Security Checker' 3474 url: 'https://github.com/GossiTheDog/scanning' 3475 producer: 3476 name: 'Kevin Beaumont' 3477 url: 'https://doublepulsar.com/' 3478 3479- regex: 'Sublinq' 3480 name: 'Sublinq' 3481 category: 'Crawler' 3482 url: 'https://web.archive.org/web/20220626191617/https://sublinq.com/' 3483 producer: 3484 name: '' 3485 url: '' 3486 3487- regex: 'Gregarius' 3488 name: 'Gregarius' 3489 category: 'Feed Fetcher' 3490 url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/' 3491 producer: 3492 name: '' 3493 url: '' 3494 3495- regex: 'COMODO DCV' 3496 name: 'COMODO DCV' 3497 category: 'Service Agent' 3498 url: 'https://www.comodo.com/' 3499 producer: 3500 name: 'Comodo Security Solutions, Inc.' 3501 url: 'https://www.comodo.com/' 3502 3503- regex: 'Sectigo DCV|acme\.sectigo\.com' 3504 name: 'Sectigo DCV' 3505 category: 'Service Agent' 3506 url: 'https://sectigo.com/' 3507 producer: 3508 name: 'Sectigo Limited' 3509 url: 'https://sectigo.com/' 3510 3511- regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)' 3512 name: 'KlarnaBot' 3513 category: 'Crawler' 3514 url: 'https://docs.klarna.com/klarna-bot/' 3515 producer: 3516 name: 'Klarna Bank AB' 3517 url: 'https://www.klarna.com/' 3518 3519- regex: 'Taboolabot' 3520 name: 'Taboolabot' 3521 category: 'Crawler' 3522 url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler' 3523 producer: 3524 name: 'Taboola, Inc.' 3525 url: 'https://www.taboola.com/' 3526 3527- regex: 'Asana' 3528 name: 'Asana' 3529 category: 'Crawler' 3530 url: 'https://asana.com/' 3531 producer: 3532 name: 'Asana, Inc.' 3533 url: 'https://asana.com/' 3534 3535- regex: 'Chrome Privacy Preserving Prefetch Proxy' 3536 name: 'Chrome Privacy Preserving Prefetch Proxy' 3537 category: 'Service Agent' 3538 url: 'https://developer.chrome.com/blog/private-prefetch-proxy/' 3539 producer: 3540 name: 'Google Inc.' 3541 url: 'https://www.google.com/' 3542 3543- regex: 'URLinspectorBot' 3544 name: 'URLinspector' 3545 category: 'Site Monitor' 3546 url: 'https://www.urlinspector.com/bot/' 3547 producer: 3548 name: 'LinkResearchTools GmbH' 3549 url: 'https://www.linkresearchtools.com/' 3550 3551- regex: 'EntferBot' 3552 name: 'Entfer' 3553 category: 'Crawler' 3554 url: 'https://entfer.com/' 3555 producer: 3556 name: 'Entfer Ltd.' 3557 url: 'https://entfer.com/' 3558 3559- regex: 'TagInspector' 3560 name: 'Tag Inspector' 3561 category: 'Crawler' 3562 url: 'https://taginspector.com/' 3563 producer: 3564 name: 'InfoTrust, LLC' 3565 url: 'https://infotrust.com/' 3566 3567- regex: 'pageburst' 3568 name: 'Pageburst' 3569 category: 'Crawler' 3570 url: 'https://pageburstls.elsevier.com/' 3571 producer: 3572 name: 'Elsevier Ltd' 3573 url: 'https://www.elsevier.com/' 3574 3575- regex: '.+diffbot' 3576 name: 'Diffbot' 3577 category: 'Crawler' 3578 url: 'https://docs.diffbot.com/docs/getting-started-with-crawl' 3579 producer: 3580 name: 'Diffbot Technologies Corp.' 3581 url: 'https://www.diffbot.com/' 3582 3583- regex: 'DisqusAdstxtCrawler' 3584 name: 'Disqus' 3585 category: 'Crawler' 3586 url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide' 3587 producer: 3588 name: 'Disqus, Inc.' 3589 url: 'https://disqus.com/' 3590 3591- regex: 'startmebot' 3592 name: 'start.me' 3593 category: 'Crawler' 3594 url: 'https://about.start.me/' 3595 producer: 3596 name: 'start.me BV' 3597 url: 'https://about.start.me/' 3598 3599- regex: '2ip bot' 3600 name: '2ip' 3601 category: 'Crawler' 3602 url: 'https://2ip.io/' 3603 3604- regex: 'ReqBin Curl Client' 3605 name: 'ReqBin' 3606 category: 'Crawler' 3607 url: 'https://reqbin.com/curl' 3608 3609- regex: 'XoviBot' 3610 name: 'XoviBot' 3611 category: 'Crawler' 3612 url: 'https://www.xovibot.net' 3613 producer: 3614 name: 'Xovi GmbH' 3615 url: 'http://www.xovi.de' 3616 3617- regex: 'Overcast/.+Podcast Sync' 3618 name: 'Overcast Podcast Sync' 3619 category: 'Service Agent' 3620 url: 'https://overcast.fm/podcasterinfo' 3621 3622- regex: '^Verity' 3623 name: 'GumGum Verity' 3624 category: 'Service Agent' 3625 url: 'https://gumgum.com/verity' 3626 3627- regex: 'hackermention' 3628 name: 'hackermention' 3629 category: 'Feed Reader' 3630 url: 'https://github.com/snarfed/hackermention' 3631 3632- regex: 'BitSightBot' 3633 name: 'BitSight' 3634 category: 'Security Checker' 3635 url: 'https://www.bitsight.com/' 3636 producer: 3637 name: 'BitSight Technologies, Inc.' 3638 url: 'https://www.bitsight.com/' 3639 3640- regex: 'Ezgif' 3641 name: 'Ezgif' 3642 category: 'Service Agent' 3643 url: 'https://ezgif.com/about' 3644 3645- regex: 'intelx\.io_bot' 3646 name: 'Intelligence X' 3647 category: 'Crawler' 3648 url: 'https://intelx.io/' 3649 producer: 3650 name: 'Kleissner Investments s.r.o.' 3651 url: 'https://intelx.io/' 3652 3653- regex: 'FemtosearchBot' 3654 name: 'Femtosearch' 3655 category: 'Crawler' 3656 url: 'http://femtosearch.com/' 3657 producer: 3658 name: 'Grier Forensics, LLC' 3659 url: 'https://www.grierforensics.com/' 3660 3661- regex: 'AdsTxtCrawler/' 3662 name: 'AdsTxtCrawler' 3663 category: 'Crawler' 3664 url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler' 3665 producer: 3666 name: 'IAB Technology Laboratory, Inc.' 3667 url: 'https://iabtechlab.com/' 3668 3669- regex: 'Morningscore' 3670 name: 'Morningscore Bot' 3671 category: 'Crawler' 3672 url: 'https://morningscore.io/' 3673 producer: 3674 name: 'Morningscore' 3675 url: 'https://morningscore.io/' 3676 3677- regex: 'Uptime-Kuma' 3678 name: 'Uptime-Kuma' 3679 category: 'Site Monitor' 3680 url: 'https://github.com/louislam/uptime-kuma' 3681 3682- regex: 'OAI-SearchBot' 3683 name: 'OAI-SearchBot' 3684 category: 'Crawler' 3685 url: 'https://platform.openai.com/docs/bots' 3686 producer: 3687 name: 'OpenAI OpCo, LLC' 3688 url: 'https://openai.com/' 3689 3690- regex: 'GPTBot' 3691 name: 'GPTBot' 3692 category: 'Crawler' 3693 url: 'https://platform.openai.com/docs/bots' 3694 producer: 3695 name: 'OpenAI OpCo, LLC' 3696 url: 'https://openai.com/' 3697 3698- regex: 'ChatGPT-User' 3699 name: 'ChatGPT-User' 3700 category: 'Crawler' 3701 url: 'https://platform.openai.com/docs/bots' 3702 producer: 3703 name: 'OpenAI OpCo, LLC' 3704 url: 'https://openai.com/' 3705 3706- regex: 'BrightEdge Crawler' 3707 name: 'BrightEdge' 3708 category: 'Crawler' 3709 url: 'https://www.brightedge.com/' 3710 producer: 3711 name: 'BrightEdge Technologies, Inc' 3712 url: 'https://www.brightedge.com/' 3713 3714- regex: 'sfFeedReader' 3715 name: 'sfFeedReader' 3716 url: 'https://github.com/diem-project/sfFeed2Plugin' 3717 category: 'Feed Fetcher' 3718 3719- regex: 'cyberscan\.io' 3720 name: 'Cyberscan' 3721 category: 'Security Checker' 3722 url: 'https://www.cyberscan.io/' 3723 producer: 3724 name: 'DGC Verwaltungs GmbH' 3725 url: 'https://dgc.org/' 3726 3727- regex: 'researchscan\.comsys\.rwth-aachen\.de' 3728 name: 'Research Scan' 3729 category: 'Crawler' 3730 url: 'http://researchscan.comsys.rwth-aachen.de/' 3731 producer: 3732 name: 'RWTH Aachen University' 3733 url: 'https://www.comsys.rwth-aachen.de/' 3734 3735- regex: 'newspaper' 3736 name: 'Scraping Robot' 3737 category: 'Crawler' 3738 url: 'https://scrapingrobot.com/' 3739 producer: 3740 name: 'Sprious LLC' 3741 url: 'https://sprious.com/' 3742 3743- regex: 'Ant(?:\.com beta|Bot)' 3744 name: 'Ant' 3745 category: 'Crawler' 3746 url: 'https://www.ant.com/' 3747 producer: 3748 name: 'Ant.com Ltd.' 3749 url: 'https://www.ant.com/' 3750 3751- regex: 'WebwikiBot' 3752 name: 'Webwiki' 3753 category: 'Crawler' 3754 url: 'https://www.webwiki.com/' 3755 producer: 3756 name: 'webwiki GmbH' 3757 url: 'https://www.webwiki.com/' 3758 3759- regex: 'phpMyAdmin' 3760 name: 'phpMyAdmin' 3761 category: 'Service Agent' 3762 url: 'https://www.phpmyadmin.net/' 3763 3764- regex: 'Matomo/[\d.]+' 3765 name: 'Matomo' 3766 category: 'Service Agent' 3767 url: 'https://github.com/matomo-org/matomo' 3768 producer: 3769 name: 'InnoCraft Ltd' 3770 url: 'https://matomo.org/' 3771 3772- regex: 'Prometheus' 3773 name: 'Prometheus' 3774 category: 'Service Agent' 3775 url: 'https://github.com/prometheus/prometheus' 3776 producer: 3777 name: 'The Linux Foundation' 3778 url: 'https://www.cncf.io/' 3779 3780- regex: 'ArchiveTeam ArchiveBot' 3781 name: 'ArchiveBot' 3782 category: 'Crawler' 3783 url: 'https://wiki.archiveteam.org/index.php?title=ArchiveBot' 3784 producer: 3785 name: 'ArchiveTeam' 3786 url: 'https://wiki.archiveteam.org/' 3787 3788- regex: 'MADBbot' 3789 name: 'MADBbot' 3790 category: 'Crawler' 3791 url: 'https://madb.zapto.org/bot.html' 3792 3793- regex: 'MeltwaterNews' 3794 name: 'MeltwaterNews' 3795 category: 'Crawler' 3796 producer: 3797 name: 'Meltwater Deutschland GmbH' 3798 url: 'https://www.meltwater.com/' 3799 3800- regex: 'owler' 3801 name: 'OWLer' 3802 category: 'Crawler' 3803 url: 'https://openwebsearch.eu/owler/' 3804 producer: 3805 name: 'Open Search Foundation e.V.' 3806 url: 'https://openwebsearch.eu/' 3807 3808- regex: 'bbc\.co\.uk/display/men/Page\+Monitor' 3809 name: 'BBC Page Monitor' 3810 category: 'Site Monitor' 3811 url: 'https://confluence.dev.bbc.co.uk/display/men/Page+Monitor' 3812 producer: 3813 name: 'BBC' 3814 url: 'https://www.bbc.com/' 3815 3816- regex: 'BBC-Forge-URL-Monitor-Twisted' 3817 name: 'BBC Forge URL Monitor' 3818 category: 'Site Monitor' 3819 url: 'https://www.bbc.com/' 3820 producer: 3821 name: 'BBC' 3822 url: 'https://www.bbc.com/' 3823 3824- regex: 'ClaudeBot' 3825 name: 'ClaudeBot' 3826 category: 'Crawler' 3827 url: 'https://github.com/ClaudeBot/ClaudeBot' 3828 3829- regex: 'Imagesift' 3830 name: 'ImageSift' 3831 category: 'Crawler' 3832 url: 'https://imagesift.com/' 3833 producer: 3834 name: 'Castle Global, Inc.' 3835 url: 'https://thehive.ai/' 3836 3837- regex: 'TactiScout' 3838 name: 'TactiScout' 3839 category: 'Crawler' 3840 url: 'https://find-it.world/TempCrawl/Crawltheque.php' 3841 producer: 3842 name: 'Tactikast' 3843 3844- regex: 'Brightbot' 3845 name: 'BrightBot' 3846 category: 'Crawler' 3847 url: 'https://www.brightbot.app/' 3848 producer: 3849 name: 'Bright Interactive Ltd' 3850 url: 'https://www.builtbybright.com/' 3851 3852- regex: 'DaspeedBot' 3853 name: 'DaspeedBot' 3854 category: 'Crawler' 3855 url: 'https://daspeed.io/' 3856 producer: 3857 name: 'DAWAP SARL' 3858 url: 'https://dawap.fr/' 3859 3860- regex: 'StractBot' 3861 name: 'Stract' 3862 category: 'Crawler' 3863 url: 'https://stract.com/webmasters' 3864 producer: 3865 name: 'Stract' 3866 url: 'https://github.com/StractOrg/stract/' 3867 3868- regex: 'GeedoBot' 3869 name: 'GeedoBot' 3870 category: 'Crawler' 3871 url: 'https://geedo.com/bot/' 3872 3873- regex: 'GeedoProductSearch' 3874 name: 'GeedoProductSearch' 3875 category: 'Crawler' 3876 url: 'https://geedo.com/product-search/' 3877 3878- regex: 'BackupLand' 3879 name: 'BackupLand' 3880 category: 'Crawler' 3881 url: 'https://go.backupland.com/' 3882 producer: 3883 name: 'ООО «КВАРТА»' 3884 url: 'https://go.backupland.com/' 3885 3886- regex: 'Konturbot' 3887 name: 'Konturbot' 3888 category: 'Crawler' 3889 url: 'https://kontur.ru/' 3890 producer: 3891 name: 'АО «ПФ «СКБ Контур»' 3892 url: 'https://kontur.ru/' 3893 3894- regex: 'keys-so-bot' 3895 name: 'Keys.so' 3896 category: 'Crawler' 3897 url: 'https://www.keys.so/' 3898 producer: 3899 name: 'ООО «МОДЕСКО»' 3900 url: 'https://www.modesco.ru/' 3901 3902- regex: 'LetsearchBot' 3903 name: 'LetSearch' 3904 category: 'Crawler' 3905 url: 'https://letsearch.ru/bots' 3906 3907- regex: 'Example3' 3908 name: 'Example3' 3909 category: 'Crawler' 3910 url: 'https://www.example3.com/' 3911 3912- regex: 'StatOnlineRuBot' 3913 name: 'StatOnline.ru' 3914 category: 'Crawler' 3915 url: 'https://statonline.ru/' 3916 producer: 3917 name: 'ООО «Регистратор доменных имен РЕГ.РУ»' 3918 url: 'https://statonline.ru/' 3919 3920- regex: 'Spawning-AI' 3921 name: 'Spawning AI' 3922 category: 'Crawler' 3923 url: 'https://spawning.ai/' 3924 producer: 3925 name: 'Spawning, Inc' 3926 url: 'https://spawning.ai/' 3927 3928- regex: 'domain research project' 3929 name: 'Domain Research Project' 3930 category: 'Crawler' 3931 url: 'https://trentwil.es/domains.html' 3932 producer: 3933 name: 'Trent Wiles' 3934 url: 'https://trentwil.es/' 3935 3936- regex: 'getodin\.com' 3937 name: 'Odin' 3938 category: 'Security Checker' 3939 url: 'https://docs.getodin.com/' 3940 producer: 3941 name: 'Cyble Inc.' 3942 url: 'https://cyble.com/' 3943 3944- regex: 'YouBot' 3945 name: 'YouBot' 3946 category: 'Crawler' 3947 url: 'https://about.you.com/youbot/' 3948 producer: 3949 name: 'SuSea, Inc.' 3950 url: 'https://you.com/' 3951 3952- regex: 'SiteScoreBot' 3953 name: 'SiteScore' 3954 category: 'Crawler' 3955 url: 'https://sitescore.ai/' 3956 3957- regex: 'MBCrawler' 3958 name: 'Monitor Backlinks' 3959 category: 'Crawler' 3960 url: 'https://www.seoptimer.com/monitor-backlinks/' 3961 producer: 3962 name: 'SEOptimer' 3963 url: 'https://www.seoptimer.com/' 3964 3965- regex: 'mariadb-mysql-kbs-bot' 3966 name: 'MariaDB/MySQL Knowledge Base' 3967 category: 'Crawler' 3968 url: 'https://github.com/williamdes/mariadb-mysql-kbs' 3969 producer: 3970 name: 'WDES SAS' 3971 url: 'https://wdes.fr/en/' 3972 3973- regex: 'GitHubCopilotChat' 3974 name: 'GitHubCopilotChat' 3975 category: 'Crawler' 3976 url: 'https://github.com/aaamoon/copilot-gpt4-service' 3977 3978- regex: '^pdrl\.fm' 3979 name: 'Podroll Analyzer' 3980 category: 'Crawler' 3981 url: 'https://podroll.fm' 3982 3983- regex: 'PodUptime/' 3984 name: 'PodUptime' 3985 category: 'Site Monitor' 3986 url: 'https://poduptime.com' 3987 3988- regex: 'anthropic-ai' 3989 name: 'Anthropic AI' 3990 category: 'Crawler' 3991 url: 'https://www.anthropic.com/' 3992 producer: 3993 name: 'Anthropic, PBC' 3994 url: 'https://www.anthropic.com/' 3995 3996- regex: 'NetpeakCheckerBot' 3997 name: 'Netpeak Checker' 3998 category: 'Crawler' 3999 url: 'https://netpeaksoftware.com/checker' 4000 producer: 4001 name: 'Netpeak LTD' 4002 url: 'https://netpeaksoftware.com/' 4003 4004- regex: 'SandobaCrawler' 4005 name: 'Sandoba//Crawler' 4006 category: 'Crawler' 4007 url: 'https://www.sandoba.com/en/crawler/' 4008 producer: 4009 name: 'SANDOBA//EBUSINESS SOLUTIONS' 4010 url: 'https://www.sandoba.com/' 4011 4012- regex: 'SirdataBot' 4013 name: 'Sirdata' 4014 category: 'Crawler' 4015 url: 'https://semantic-api.docs.sirdata.net/contextual-api/contextual-api/introduction' 4016 producer: 4017 name: 'Sirdata SAS' 4018 url: 'https://www.sirdata.com/' 4019 4020- regex: 'CheckMarkNetwork' 4021 name: 'CheckMark Network' 4022 category: 'Crawler' 4023 url: 'https://www.checkmarknetwork.com/spider.html/' 4024 producer: 4025 name: 'Exipert, Inc.' 4026 url: 'https://www.checkmarknetwork.com/' 4027 4028- regex: 'cohere-ai' 4029 name: 'Cohere AI' 4030 category: 'Crawler' 4031 url: 'https://cohere.com/' 4032 producer: 4033 name: 'Cohere, Inc.' 4034 url: 'https://cohere.com/' 4035 4036- regex: 'PerplexityBot' 4037 name: 'PerplexityBot' 4038 category: 'Crawler' 4039 url: 'https://docs.perplexity.ai/docs/perplexitybot' 4040 producer: 4041 name: 'Perplexity AI, Inc.' 4042 url: 'https://www.perplexity.ai/' 4043 4044- regex: 'TTD-Content' 4045 name: 'The Trade Desk Content' 4046 category: 'Crawler' 4047 url: 'https://www.thetradedesk.com/us/ttd-content' 4048 producer: 4049 name: 'The Trade Desk, Inc.' 4050 url: 'https://www.thetradedesk.com/' 4051 4052- regex: 'montastic-monitor' 4053 name: 'Montastic Monitor' 4054 category: 'Site Monitor' 4055 url: 'https://www.montastic.com/' 4056 producer: 4057 name: 'Metadot, Corp.' 4058 url: 'https://www.metadot.com/' 4059 4060- regex: 'Ruby, Twurly v' 4061 name: 'Twurly' 4062 category: 'Crawler' 4063 url: 'https://twurly.org/' 4064 4065- regex: 'Mixnode(?:Cache)?' 4066 name: 'Mixnode' 4067 category: 'Crawler' 4068 url: 'https://www.mixnode.com/' 4069 producer: 4070 name: 'Mixnode Technologies, Inc.' 4071 url: 'https://www.mixnode.com/' 4072 4073- regex: 'CSSCheck' 4074 name: 'CSSCheck' 4075 category: 'Validator' 4076 4077- regex: 'MicrosoftPreview' 4078 name: 'Microsoft Preview' 4079 category: 'Service Agent' 4080 url: 'https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0' 4081 producer: 4082 name: 'Microsoft Corporation' 4083 url: 'https://www.microsoft.com/' 4084 4085- regex: 's~virustotalcloud' 4086 name: 'VirusTotal Cloud' 4087 category: 'Crawler' 4088 url: 'https://www.virustotal.com/' 4089 producer: 4090 name: 'Chronicle Security Ireland Limited' 4091 url: 'https://chronicle.security/' 4092 4093- regex: 'TinEye' 4094 name: 'TinEye' 4095 category: 'Crawler' 4096 url: 'https://tineye.com/' 4097 producer: 4098 name: 'Idée, Inc.' 4099 url: 'https://tineye.com/' 4100 4101- regex: 'e~arsnova-filter-system' 4102 name: 'ARSNova Filter System' 4103 category: 'Crawler' 4104 url: 'https://particify.de/en/' 4105 producer: 4106 name: 'Particify Gerhardt & Weingarten OHG' 4107 url: 'https://particify.de/en/' 4108 4109- regex: 'botify' 4110 name: 'Botify' 4111 category: 'Crawler' 4112 url: 'https://www.botify.com/' 4113 producer: 4114 name: 'BOTIFY SAS' 4115 url: 'https://www.botify.com/' 4116 4117- regex: 'adscanner' 4118 name: 'Adscanner' 4119 category: 'Crawler' 4120 url: 'https://www.alleyesonscreens.com/' 4121 producer: 4122 name: 'AdScanner d.o.o' 4123 url: 'https://www.alleyesonscreens.com/' 4124 4125- regex: 'online-webceo-bot' 4126 name: 'WebCEO' 4127 category: 'Crawler' 4128 url: 'https://www.webceo.com/' 4129 producer: 4130 name: 'WebCEO, LLC' 4131 url: 'https://www.webceo.com/' 4132 4133- regex: 'NetTrack' 4134 name: 'NetTrack' 4135 category: 'Crawler' 4136 url: 'https://web.archive.org/web/20160607151934/https://nettrack.info/' 4137 4138- regex: 'htmlyse' 4139 name: 'htmlyse' 4140 category: 'Crawler' 4141 url: 'https://www.htmlyse.com/' 4142 producer: 4143 name: 'Vistex LTD' 4144 url: 'https://www.htmlyse.com/' 4145 4146- regex: 'TrendsmapResolver' 4147 name: 'Trendsmap' 4148 category: 'Crawler' 4149 url: 'https://www.trendsmap.com/' 4150 producer: 4151 name: 'Trendsmap Pty Ltd' 4152 url: 'https://www.trendsmap.com/' 4153 4154- regex: 'Shareaholic(?:bot)?' 4155 name: 'Steve Bot' 4156 category: 'Crawler' 4157 url: 'https://www.shareaholic.com/steve' 4158 producer: 4159 name: 'Shareaholic, Inc.' 4160 url: 'https://www.shareaholic.com/' 4161 4162- regex: 'keycdn-tools:' 4163 name: 'KeyCDN Tools' 4164 category: 'Service Agent' 4165 url: 'https://tools.keycdn.com/geo' 4166 4167- regex: 'keycdn-tools/' 4168 name: 'KeyCDN Tools' 4169 category: 'Service Agent' 4170 url: 'https://tools.keycdn.com/' 4171 producer: 4172 name: 'proinity LLC' 4173 url: 'https://www.keycdn.com/' 4174 4175- regex: 'Arquivo-web-crawler' 4176 name: 'Arquivo.pt' 4177 category: 'Crawler' 4178 url: 'https://sobre.arquivo.pt/en/help/crawling-and-archiving-web-content/' 4179 producer: 4180 name: 'FCT|FCCN' 4181 url: 'https://www.fct.pt/' 4182 4183- regex: 'WhatsMyIP\.org' 4184 name: 'WhatsMyIP.org' 4185 category: 'Service Agent' 4186 url: 'https://www.whatsmyip.org/ua/' 4187 4188- regex: 'SenutoBot' 4189 name: 'Senuto' 4190 category: 'Crawler' 4191 url: 'https://www.senuto.com/' 4192 producer: 4193 name: 'Senuto Sp. z o.o.' 4194 url: 'https://www.senuto.com/' 4195 4196- regex: 'GozleBot' 4197 name: 'Gozle' 4198 category: 'Crawler' 4199 url: 'https://gozle.com.tm/en/blog/post/1' 4200 producer: 4201 name: 'Doly Horjun HJ' 4202 url: 'https://gozle.com.tm/' 4203 4204- regex: 'Quantcastbot' 4205 name: 'Quantcast' 4206 category: 'Crawler' 4207 url: 'https://www.quantcast.com/bot/' 4208 producer: 4209 name: 'Quantcast Corp.' 4210 url: 'https://www.quantcast.com/' 4211 4212- regex: 'FontRadar' 4213 name: 'FontRadar' 4214 category: 'Crawler' 4215 url: 'https://www.fontradar.com/' 4216 producer: 4217 name: 'EMDASH SAS' 4218 url: 'https://www.fontradar.com/' 4219 4220- regex: 'ViberUrlDownloader' 4221 name: 'Viber Url Downloader' 4222 category: 'Service Agent' 4223 url: 'https://www.viber.com/' 4224 producer: 4225 name: 'Viber Media S.à r.l.' 4226 url: 'https://www.viber.com/' 4227 4228- regex: '^Zeno$' 4229 name: 'Zeno' 4230 category: 'Crawler' 4231 url: 'https://github.com/internetarchive/Zeno' 4232 producer: 4233 name: 'The Internet Archive' 4234 url: 'https://archive.org/' 4235 4236- regex: 'Barracuda Sentinel' 4237 name: 'Barracuda Sentinel' 4238 category: 'Service Agent' 4239 url: 'https://sentinel.barracudanetworks.com/' 4240 producer: 4241 name: 'Barracuda Networks, Inc.' 4242 url: 'https://www.barracudanetworks.com/' 4243 4244- regex: 'RuxitSynthetic' 4245 name: 'RuxitSynthetic' 4246 category: 'Site Monitor' 4247 url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164' 4248 producer: 4249 name: 'Dynatrace LLC' 4250 url: 'https://www.dynatrace.com/' 4251 4252- regex: 'DynatraceSynthetic' 4253 name: 'DynatraceSynthetic' 4254 category: 'Site Monitor' 4255 url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164' 4256 producer: 4257 name: 'Dynatrace LLC' 4258 url: 'https://www.dynatrace.com/' 4259 4260- regex: 'sitebulb' 4261 name: 'Sitebulb' 4262 category: 'Crawler' 4263 url: 'https://sitebulb.com/' 4264 producer: 4265 name: 'Sitebulb Limited' 4266 url: 'https://sitebulb.com/' 4267 4268- regex: 'Monsidobot' 4269 name: 'Monsidobot' 4270 category: 'Crawler' 4271 url: 'https://monsido.com/bot-html' 4272 producer: 4273 name: 'Monsido LLC' 4274 url: 'https://monsido.com/' 4275 4276- regex: 'AccompanyBot' 4277 name: 'AccompanyBot' 4278 category: 'Crawler' 4279 url: 'https://www.accompany.com/' 4280 producer: 4281 name: 'Accompani, Inc' 4282 url: 'https://www.accompany.com/' 4283 4284- regex: 'Ghost Inspector' 4285 name: 'Ghost Inspector' 4286 category: 'Site Monitor' 4287 url: 'https://docs.ghostinspector.com/faq/#how-do-i-detect-ghost-inspector-test-runner-traffic-on-my-site' 4288 producer: 4289 name: 'Ghost Inspector, Inc.' 4290 url: 'https://www.ghostinspector.com/' 4291 4292- regex: 'Google-Apps-Script' 4293 name: 'Google Apps Script' 4294 category: 'Service Agent' 4295 url: 'https://www.google.com/script/start/' 4296 4297- regex: 'SiteOne-Crawler' 4298 name: 'SiteOne Crawler' 4299 category: 'Crawler' 4300 url: 'https://crawler.siteone.io/bot/' 4301 producer: 4302 name: 'SiteOne s.r.o.' 4303 url: 'https://www.siteone.io/' 4304 4305- regex: 'Detectify' 4306 name: 'Detectify' 4307 category: 'Security Checker' 4308 url: 'https://support.detectify.com/support/solutions/articles/48001049001-how-to-allow-detectify-to-access-your-site' 4309 producer: 4310 name: 'Detectify AB' 4311 url: 'https://detectify.com/' 4312 4313- regex: 'DomCopBot' 4314 name: 'DomCop Bot' 4315 category: 'Crawler' 4316 url: 'https://www.domcop.com/bot' 4317 producer: 4318 name: 'Axeman Technology Solutions LLP' 4319 url: 'https://axemantech.com/' 4320 4321- regex: 'Paqlebot' 4322 name: 'Paqlebot' 4323 category: 'Crawler' 4324 url: 'https://www.paqle.dk/about/paqlebot' 4325 producer: 4326 name: 'Paqle A/S' 4327 url: 'https://www.paqle.dk/' 4328 4329- regex: 'Wibybot' 4330 name: 'Wibybot' 4331 category: 'Crawler' 4332 url: 'https://www.wiby.me/' 4333 4334- regex: 'Synapse' 4335 name: 'Synapse' 4336 category: 'Crawler' 4337 url: 'https://github.com/matrix-org/synapse' 4338 4339- regex: 'OSZKbot' 4340 name: 'OSZKbot' 4341 category: 'Crawler' 4342 url: 'http://mekosztaly.oszk.hu/mia/' 4343 producer: 4344 name: 'National Szechenyi Library' 4345 url: 'https://webarchivum.oszk.hu/' 4346 4347- regex: 'ZoomBot' 4348 name: 'ZoomBot' 4349 category: 'Crawler' 4350 url: 'https://suite.seozoom.it/bot.html' 4351 producer: 4352 name: 'SEO Cube S.r.l.' 4353 url: 'https://www.seocube.it/' 4354 4355- regex: 'RavenCrawler' 4356 name: 'RavenCrawler' 4357 category: 'Crawler' 4358 url: 'https://raventools.com/site-auditor/' 4359 producer: 4360 name: 'TapClicks, Inc.' 4361 url: 'https://www.tapclicks.com/' 4362 4363- regex: 'KadoBot' 4364 name: 'KadoBot' 4365 category: 'Crawler' 4366 url: 'https://www.kadolijst.nl/bot' 4367 producer: 4368 name: 'Kadolijst' 4369 url: 'https://www.kadolijst.nl/' 4370 4371- regex: 'Dubbotbot' 4372 name: 'Dubbotbot' 4373 category: 'Crawler' 4374 url: 'https://help.dubbot.com/en/articles/6746594-example-custom-user-agent' 4375 producer: 4376 name: 'DubBot' 4377 url: 'https://dubbot.com/' 4378 4379- regex: 'Swiftbot' 4380 name: 'Swiftbot' 4381 category: 'Crawler' 4382 url: 'https://swiftype.com/swiftbot' 4383 producer: 4384 name: 'Elasticsearch, B.V.' 4385 url: 'https://www.elastic.co/' 4386 4387- regex: 'EyeMonIT' 4388 name: 'EyeMonit' 4389 category: 'Site Monitor' 4390 url: 'https://eyemonit.com/' 4391 producer: 4392 name: 'EyeMonit' 4393 url: 'https://eyemonit.com/' 4394 4395- regex: 'ThousandEyes' 4396 name: 'ThousandEyes' 4397 category: 'Site Monitor' 4398 url: 'https://www.thousandeyes.com/' 4399 producer: 4400 name: 'Cisco Systems, Inc.' 4401 url: 'https://www.cisco.com/' 4402 4403- regex: 'OmtrBot' 4404 name: 'OmtrBot' 4405 category: 'Site Monitor' 4406 4407- regex: 'WebMon' 4408 name: 'WebMon' 4409 category: 'Site Monitor' 4410 4411- regex: 'AdsTxtCrawlerTP' 4412 name: 'AdsTxtCrawlerTP' 4413 category: 'Crawler' 4414 4415- regex: 'fragFINN' 4416 name: 'fragFINN' 4417 category: 'Crawler' 4418 url: 'https://www.fragfinn.de/' 4419 producer: 4420 name: 'fragFINN e.V.' 4421 url: 'https://www.fragfinn.de/' 4422 4423- regex: 'Clickagy' 4424 name: 'Clickagy' 4425 category: 'Crawler' 4426 url: 'https://www.clickagy.com/' 4427 producer: 4428 name: 'Clickagy, LLC' 4429 url: 'https://www.clickagy.com/' 4430 4431- regex: 'kiwitcms-gitops' 4432 name: 'Kiwi TCMS GitOps' 4433 category: 'Service Agent' 4434 url: 'https://kiwitcms.org' 4435 producer: 4436 name: 'Open Technologies Bulgaria, Ltd.' 4437 url: 'https://kiwitcms.org' 4438 4439- regex: 'webtru_crawler' 4440 name: 'webtru' 4441 category: 'Crawler' 4442 url: 'https://webtru.io/' 4443 producer: 4444 name: 'DataSign Inc.' 4445 url: 'https://datasign.jp/' 4446 4447- regex: 'URLSuMaBot' 4448 name: 'URLSuMaBot' 4449 category: 'Crawler' 4450 url: 'https://www.urlsuma.de/' 4451 4452- regex: '360JK yunjiankong' 4453 name: '360JK' 4454 category: 'Site Monitor' 4455 url: 'http://jk.cloud.360.cn/' 4456 producer: 4457 name: '360 Security Technology Inc.' 4458 url: 'https://www.360.cn/' 4459 4460- regex: 'UCSBNetworkMeasurement' 4461 name: 'UCSB Network Measurement' 4462 category: 'Crawler' 4463 url: 'https://www.it.ucsb.edu/' 4464 producer: 4465 name: 'University of California, Santa Barbara' 4466 url: 'https://www.it.ucsb.edu/' 4467 4468- regex: 'Plesk screenshot bot' 4469 name: 'Plesk Screenshot Service' 4470 category: 'Service Agent' 4471 url: 'https://support.plesk.com/hc/en-us/articles/13302778306199-What-is-Plesk-Screenshot-Service' 4472 producer: 4473 name: 'Plesk International GmbH' 4474 url: 'https://www.plesk.com/' 4475 4476- regex: 'Who\.is' 4477 name: 'Who.is Bot' 4478 category: 'Crawler' 4479 url: 'https://who.is/' 4480 4481- regex: 'Probely' 4482 name: 'Probely' 4483 category: 'Security Checker' 4484 url: 'https://probely.com/sos/' 4485 producer: 4486 name: 'Probely - Soluções de Cibersegurança, S.A.' 4487 url: 'https://probely.com/' 4488 4489- regex: 'Uptimia' 4490 name: 'Uptimia' 4491 category: 'Site Monitor' 4492 url: 'https://www.uptimia.com/' 4493 producer: 4494 name: 'JJ Online GmbH' 4495 url: 'https://www.uptimia.com/' 4496 4497- regex: '2GDPR' 4498 name: '2GDPR' 4499 category: 'Service Agent' 4500 url: 'https://2gdpr.com/tos' 4501 producer: 4502 name: '2GDPR' 4503 url: 'https://2gdpr.com/' 4504 4505- regex: 'abuse\.xmco\.fr' 4506 name: 'Serenety' 4507 category: 'Security Checker' 4508 url: 'https://abuse.xmco.fr/' 4509 producer: 4510 name: 'XMCO, SASU' 4511 url: 'https://www.xmco.fr/' 4512 4513- regex: 'CheckHost' 4514 name: 'CheckHost' 4515 category: 'Site Monitor' 4516 url: 'https://check-host.net/' 4517 producer: 4518 name: 'CheckHost' 4519 url: 'https://check-host.net/' 4520 4521- regex: 'LAC_IAHarvester' 4522 name: 'LAC IA Harvester' 4523 category: 'Crawler' 4524 url: 'https://library-archives.canada.ca/eng/services/government-canada/web-social-media-preservation-program/Pages/web-archive.aspx' 4525 producer: 4526 name: 'Library and Archives Canada' 4527 url: 'https://library-archives.canada.ca/' 4528 4529- regex: 'InsytfulBot' 4530 name: 'InsytfulBot' 4531 category: 'Crawler' 4532 url: 'https://www.insytful.com/' 4533 producer: 4534 name: 'Zengenti Limited' 4535 url: 'https://www.zengenti.com/' 4536 4537- regex: 'statista\.com' 4538 name: 'Statista' 4539 category: 'Crawler' 4540 url: 'https://www.statista.com/' 4541 producer: 4542 name: 'Statista, Inc.' 4543 url: 'https://www.statista.com/' 4544 4545- regex: 'SubstackContentFetch' 4546 name: 'Substack Content Fetch' 4547 category: 'Crawler' 4548 url: 'https://substack.com/' 4549 producer: 4550 name: 'Substack, Inc.' 4551 url: 'https://substack.com/' 4552 4553- regex: '^ds9' 4554 name: 'Deep SEARCH 9' 4555 category: 'Crawler' 4556 url: 'https://www.copyright.com/blog/ccc-expands-corporate-solutions-offering-with-new-technology/' 4557 producer: 4558 name: 'Copyright Clearance Center, Inc.' 4559 url: 'https://www.copyright.com/' 4560 4561- regex: 'LiveJournal\.com' 4562 name: 'LiveJournal' 4563 url: 'https://www.livejournal.com/' 4564 category: 'Feed Fetcher' 4565 producer: 4566 name: 'ООО "СИМ"' 4567 url: 'https://www.livejournal.com/' 4568 4569- regex: 'bitdiscovery' 4570 name: 'Tenable.asm' 4571 category: 'Security Checker' 4572 url: 'https://bitdiscovery.com/' 4573 producer: 4574 name: 'Tenable, Inc.' 4575 url: 'https://www.tenable.com/' 4576 4577- regex: 'Castopod' 4578 name: 'Castopod' 4579 category: 'Crawler' 4580 url: 'https://www.castopod.org/' 4581 4582- regex: 'Elastic/Synthetics' 4583 name: 'Elastic Synthetics' 4584 category: 'Site Monitor' 4585 url: 'https://github.com/elastic/synthetics' 4586 producer: 4587 name: 'Elasticsearch B.V.' 4588 url: 'https://www.elastic.co/' 4589 4590- regex: 'WDG_Validator' 4591 name: 'WDG HTML Validator' 4592 category: 'Validator' 4593 url: 'http://www.htmlhelp.com/tools/validator/' 4594 4595- regex: 'scan@aegis.network' 4596 name: 'Aegis' 4597 category: 'Crawler' 4598 url: 'https://web.archive.org/web/20180910002802/http://www.aegis.network/' 4599 4600- regex: 'CrawlyProjectCrawler' 4601 name: 'Crawly Project' 4602 category: 'Crawler' 4603 url: 'https://web.archive.org/web/20240326141952/https://crawlyproject.digitaldragon.dev/' 4604 4605- regex: 'BDFetch' 4606 name: 'BDFetch' 4607 category: 'Crawler' 4608 url: 'https://web.archive.org/web/20130821043949/http://www.branddimensions.com/' 4609 4610- regex: 'PunkMap' 4611 name: 'Punk Map' 4612 category: 'Security Checker' 4613 url: 'https://github.com/openeasm/punkmap' 4614 4615- regex: 'GenomeCrawlerd' 4616 name: 'Deepfield Genome' 4617 category: 'Crawler' 4618 url: 'https://www.nokia.com/networks/ip-networks/deepfield/genome/' 4619 producer: 4620 name: 'Nokia Corporation' 4621 url: 'https://www.nokia.com/' 4622 4623- regex: 'Gaisbot' 4624 name: 'Gaisbot' 4625 category: 'Crawler' 4626 url: 'https://web.archive.org/web/20090604121511/https://gais.cs.ccu.edu.tw/robot.php' 4627 4628- regex: 'FAST-WebCrawler' 4629 name: 'AlltheWeb' 4630 category: 'Crawler' 4631 url: 'https://web.archive.org/web/20041020050801/http://www.alltheweb.com/help/webmaster/crawler' 4632 4633- regex: 'ducks\.party' 4634 name: 'ducks.party' 4635 category: 'Security Checker' 4636 url: 'https://ducks.party/' 4637 4638- regex: 'DepSpid' 4639 name: 'DepSpid' 4640 category: 'Crawler' 4641 url: 'https://web.archive.org/web/20080321224033/http://about.depspid.net/' 4642 4643- regex: 'Website-info\.net' 4644 name: 'Website-info' 4645 category: 'Crawler' 4646 url: 'https://website-info.net/robot' 4647 producer: 4648 name: 'Meins und Vogel GmbH' 4649 url: 'https://muv.com/' 4650 4651- regex: 'RedekenBot' 4652 name: 'RedekenBot' 4653 category: 'Crawler' 4654 url: 'https://www.redeken.com/en/help/bot.html' 4655 producer: 4656 name: 'Redeken' 4657 url: 'https://www.redeken.com/' 4658 4659- regex: 'semaltbot' 4660 name: 'semaltbot' 4661 category: 'Crawler' 4662 url: 'https://semalt.net/' 4663 producer: 4664 name: 'Semalt LP' 4665 url: 'https://semalt.net/' 4666 4667- regex: 'MakeMerryBot' 4668 name: 'MakeMerryBot' 4669 category: 'Crawler' 4670 url: 'https://makemerry.app/bots' 4671 4672- regex: 'Timpibot' 4673 name: 'Timpibot' 4674 category: 'Crawler' 4675 url: 'https://timpi.io/' 4676 producer: 4677 name: 'Timpi Inc.' 4678 url: 'https://timpi.io/' 4679 4680- regex: 'Validbot' 4681 name: 'ValidBot' 4682 category: 'Crawler' 4683 url: 'https://www.validbot.com/' 4684 producer: 4685 name: 'Jake Olefsky LLC' 4686 url: 'https://www.validbot.com/' 4687 4688- regex: 'NPBot' 4689 name: 'NameProtectBot' 4690 category: 'Crawler' 4691 url: 'https://www.cscglobal.com/cscglobal/home/' 4692 producer: 4693 name: 'NameProtect, Inc.' 4694 url: 'https://www.cscglobal.com/' 4695 4696- regex: 'domaincodex\.com' 4697 name: 'Domain Codex' 4698 category: 'Crawler' 4699 url: 'https://www.domaincodex.com/' 4700 producer: 4701 name: 'Erie Data Systems, LLC' 4702 url: 'https://www.eriedatasys.com/' 4703 4704- regex: 'Swisscows Favicons' 4705 name: 'Swisscows Favicons' 4706 category: 'Crawler' 4707 url: 'https://swisscows.com/' 4708 producer: 4709 name: 'Swisscows AG' 4710 url: 'https://swisscows.com/' 4711 4712- regex: 'leak\.info' 4713 name: 'leak.info' 4714 category: 'Crawler' 4715 url: 'http://www.leak.info/' 4716 4717- regex: 'workona' 4718 name: 'Workona' 4719 category: 'Crawler' 4720 url: 'https://workona.com/' 4721 producer: 4722 name: 'Workona, Inc.' 4723 url: 'https://workona.com/' 4724 4725- regex: 'Bloglines' 4726 name: 'Bloglines' 4727 category: 'Crawler' 4728 url: 'https://web.archive.org/web/20140309033202/http://www.bloglines.com/' 4729 producer: 4730 name: 'Reply!, Inc.' 4731 url: 'https://www.reply.com/' 4732 4733- regex: 'heritrix' 4734 name: 'Heritrix' 4735 category: 'Crawler' 4736 url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix' 4737 producer: 4738 name: 'The Internet Archive' 4739 url: 'https://archive.org' 4740 4741- regex: 'search\.marginalia\.nu' 4742 name: 'Marginalia' 4743 category: 'Crawler' 4744 url: 'https://www.marginalia.nu/marginalia-search/for-webmasters/' 4745 producer: 4746 name: 'Marginalia' 4747 url: 'https://www.marginalia.nu/' 4748 4749- regex: 'vu-server-health-scanner' 4750 name: 'VU Server Health Scanner' 4751 category: 'Security Checker' 4752 url: 'https://130.37.198.75/index.html' 4753 producer: 4754 name: 'VU Amsterdam' 4755 url: 'https://vu.nl/en' 4756 4757- regex: 'Functionize' 4758 name: 'Functionize' 4759 category: 'Crawler' 4760 url: 'https://www.functionize.com/' 4761 producer: 4762 name: 'Functionize, Inc.' 4763 url: 'https://www.functionize.com/' 4764 4765- regex: 'Prerender' 4766 name: 'Prerender' 4767 category: 'Crawler' 4768 url: 'https://docs.prerender.io/docs/33-overview-of-prerender-crawlers' 4769 producer: 4770 name: 'saas.group Inc.' 4771 url: 'https://saas.group/' 4772 4773- regex: 'bl\.uk_ldfc_bot' 4774 name: 'The British Library Legal Deposit Bot' 4775 category: 'Crawler' 4776 url: 'https://www.bl.uk/' 4777 producer: 4778 name: 'The British Library' 4779 url: 'https://www.bl.uk/' 4780 4781- regex: 'Miniature\.io' 4782 name: 'Miniature.io' 4783 category: 'Service Agent' 4784 url: 'https://miniature.io/' 4785 producer: 4786 name: 'LCX Ventures Ltd' 4787 url: 'https://www.lcxventures.com/' 4788 4789- regex: 'Convertify' 4790 name: 'Convertify' 4791 category: 'Service Agent' 4792 url: 'https://www.convertify.app/' 4793 producer: 4794 name: 'Convertify' 4795 url: 'https://www.convertify.app/' 4796 4797- regex: 'ZoteroTranslationServer' 4798 name: 'Zotero Translation Server' 4799 category: 'Service Agent' 4800 url: 'https://github.com/wikimedia/mediawiki-services-zotero' 4801 producer: 4802 name: 'The Wikimedia Foundation, Inc.' 4803 url: 'https://www.wikimedia.org/' 4804 4805- regex: 'MuckRack' 4806 name: 'MuckRack' 4807 category: 'Crawler' 4808 url: 'https://muckrack.com/' 4809 producer: 4810 name: 'Muck Rack, LLC' 4811 url: 'https://muckrack.com/' 4812 4813- regex: 'Golfe' 4814 name: 'Golfe' 4815 category: 'Crawler' 4816 url: 'http://www.goo-olfe.ae/bot.html' 4817 4818- regex: 'SpiderLing' 4819 name: 'SpiderLing' 4820 category: 'Crawler' 4821 url: 'https://nlp.fi.muni.cz/projects/biwec/' 4822 producer: 4823 name: 'Natural Language Processing Centre' 4824 url: 'https://nlp.fi.muni.cz/' 4825 4826- regex: 'Bravebot' 4827 name: 'Bravebot' 4828 category: 'Search bot' 4829 url: 'https://search.brave.com/help/brave-search-crawler' 4830 producer: 4831 name: 'Brave Software, Inc.' 4832 url: 'https://brave.com/' 4833 4834- regex: '1001FirmsBot' 4835 name: '1001FirmsBot' 4836 category: 'Crawler' 4837 url: 'https://www.1001firms.com/1001firmsbot.php' 4838 4839- regex: 'SteamChatURLLookup' 4840 name: 'Steam Chat URL Lookup' 4841 category: 'Service Agent' 4842 url: 'https://help.steampowered.com/en/faqs/view/595C-42F4-3B66-E02F' 4843 producer: 4844 name: 'Valve Corporation' 4845 url: 'https://www.valvesoftware.com/' 4846 4847- regex: 'ohdear\.app' 4848 name: 'Oh Dear' 4849 category: 'Site Monitor' 4850 url: 'https://ohdear.app/docs/faq/what-is-the-oh-dear-crawler-doing-in-my-logs' 4851 producer: 4852 name: 'Immutable, SNC' 4853 url: 'https://ohdear.app/' 4854 4855- regex: 'Inspici' 4856 name: 'Inspici' 4857 category: 'Crawler' 4858 url: 'https://www.inspici.com/' 4859 producer: 4860 name: 'Inspici, LLC' 4861 url: 'https://www.inspici.com/' 4862 4863- regex: 'peer39_crawler' 4864 name: 'Peer39' 4865 category: 'Crawler' 4866 url: 'https://www.peer39.com/crawler-notice' 4867 producer: 4868 name: 'Peer39 Tech, LLC' 4869 url: 'https://www.peer39.com/' 4870 4871- regex: 'Pandalytics' 4872 name: 'Pandalytics' 4873 category: 'Crawler' 4874 url: 'https://www.domainsbot.com/business-intelligence/' 4875 producer: 4876 name: 'DomainsBot, Inc.' 4877 url: 'https://www.domainsbot.com/' 4878 4879- regex: 'CloudServerMarketSpider' 4880 name: 'CloudServerMarketSpider' 4881 category: 'Crawler' 4882 url: 'https://web.archive.org/web/20151228225429/https://cloudservermarket.com/spider.html' 4883 4884- regex: 'Pigafetta' 4885 name: 'Pigafetta' 4886 category: 'Crawler' 4887 url: 'https://visual-seo.com/Pigafetta-Bot' 4888 producer: 4889 name: 'aStonish Studio Srl' 4890 url: 'http://www.astonishstudio.com/' 4891 4892- regex: 'Cotoyogi' 4893 name: 'Cotoyogi' 4894 category: 'Crawler' 4895 url: 'https://ds.rois.ac.jp/center8/crawler/' 4896 producer: 4897 name: 'Joint Support-Center for Data Science Research (ROIS-DS)' 4898 url: 'https://ds.rois.ac.jp/' 4899 4900- regex: 'SuggestBot' 4901 name: 'SuggestBot' 4902 category: 'Crawler' 4903 url: 'https://github.com/nettrom/suggestbot' 4904 4905- regex: 'cms-experiment' 4906 name: 'CMS Experiment' 4907 category: 'Security Checker' 4908 url: 'https://securitee.org/cms-experiment-fall2024/' 4909 4910- regex: 'SiteCheckerBotCrawler' 4911 name: 'SiteCheckerBotCrawler' 4912 category: 'Crawler' 4913 url: 'https://sitechecker.pro/' 4914 producer: 4915 name: 'Cyber Circus Limited' 4916 url: 'https://sitechecker.pro/' 4917 4918- regex: 'SBIder' 4919 name: 'SBIder' 4920 category: 'Crawler' 4921 url: 'https://www.sitesell.com/sbider.html' 4922 producer: 4923 name: 'SiteSell Inc.' 4924 url: 'https://www.sitesell.com/' 4925 4926- regex: 'LightspeedSystemsCrawler' 4927 name: 'LightspeedSystemsCrawler' 4928 category: 'Crawler' 4929 url: 'https://www.lightspeedsystems.com/' 4930 producer: 4931 name: 'Lightspeed Systems, Inc.' 4932 url: 'https://www.lightspeedsystems.com/' 4933 4934- regex: 'Research JLU' 4935 name: 'Research JLU' 4936 category: 'Crawler' 4937 url: 'https://www.uni-giessen.de/en/research' 4938 producer: 4939 name: 'Justus Liebig University Giessen' 4940 url: 'https://www.uni-giessen.de/en' 4941 4942- regex: '(?:hgf|OS)AlphaXCrawl' 4943 name: 'AlphaXCrawl' 4944 category: 'Crawler' 4945 url: 'https://www.fim.uni-passau.de/en/data-science/research/open-search' 4946 producer: 4947 name: 'University of Passau' 4948 url: 'https://www.uni-passau.de/en/' 4949 4950- regex: 'WPMU DEV' 4951 name: 'WPMU DEV' 4952 category: 'Crawler' 4953 url: 'https://wpmudev.com/docs/wpmu-dev-plugins/broken-link-checker/#broken-link-checker-user-agent' 4954 producer: 4955 name: 'Incsub, LLC.' 4956 url: 'https://incsub.com/' 4957 4958- regex: 'SnoopSecInspect' 4959 name: 'SnoopSecInspect' 4960 category: 'Security Checker' 4961 url: 'https://web.archive.org/web/20241206193253/https://snoopsec.us.to/' 4962 4963- regex: 'ModatScanner' 4964 name: 'ModatScanner' 4965 category: 'Security Checker' 4966 url: 'https://www.modat.io/scanning' 4967 producer: 4968 name: 'Modat B.V.' 4969 url: 'https://www.modat.io/' 4970 4971- regex: 'researchcyber\.net' 4972 name: 'researchcyber.net' 4973 category: 'Security Checker' 4974 url: 'https://web.archive.org/web/20241219082407/https://researchcyber.net/' 4975 4976- regex: 'CrystalSemanticsBot' 4977 name: 'CrystalSemanticsBot' 4978 category: 'Crawler' 4979 url: 'https://web.archive.org/web/20121230203310/http://www.crystalsemantics.com/user-agent/' 4980 producer: 4981 name: 'Crystal Semantics Ltd.' 4982 url: 'https://web.archive.org/web/20121029062239/http://www.crystalsemantics.com/' 4983 4984- regex: 'najdu\.s\.holubem\.eu' 4985 name: 'najdu.s.holubem.eu' 4986 category: 'Crawler' 4987 url: 'https://najdu.s.holubem.eu/' 4988 4989- regex: 'VORTEX/' 4990 name: 'VORTEX' 4991 category: 'Crawler' 4992 url: 'https://marty.anstey.ca/robots/vortex' 4993 4994- regex: 'xtate/(\d+\.[.\d]+)' 4995 name: 'xtate' 4996 category: 'Crawler' 4997 url: 'https://github.com/babycoff/xtate' 4998 4999- regex: 'FediList Agent/' 5000 name: 'FediList' 5001 category: 'Social Media Agent' 5002 url: 'https://fedilist.com/' 5003 5004- regex: 'Grafana/(\d+\.[.\d]+)' 5005 name: 'Grafana' 5006 category: 'Site Monitor' 5007 url: 'https://github.com/grafana/grafana' 5008 producer: 5009 name: 'Grafana Labs' 5010 url: 'https://grafana.com/' 5011 5012- regex: 'github-camo' 5013 name: 'Github Camo' 5014 category: 'Crawler' 5015 url: 'https://github.com/atmos/camo' 5016 producer: 5017 name: 'Github' 5018 url: 'https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/about-anonymized-urls' 5019 5020- regex: 'Bluesky' 5021 name: 'Bluesky' 5022 category: 'Social Media Agent' 5023 url: 'https://bsky.app' 5024 producer: 5025 name: 'Bluesky Social PBC' 5026 url: 'https://bsky.app' 5027 5028- regex: 'OpenGraph\.io' 5029 name: 'OpenGraph.io' 5030 category: 'Crawler' 5031 url: 'https://www.opengraph.io' 5032 producer: 5033 name: 'OpenGraph.io' 5034 url: 'https://www.opengraph.io' 5035 5036# Generic bots 5037- regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherx?web|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|xx032_bo9vs83_2a|sslshed|geckotrail|Wordup|Keydrop|\(compatible\)|John Recon|SPARK COMMIT|masjesu|Komaru_The_Cat|Jesus Christ of Nazareth is LORD|Kowai|Hakai|LoliSec|LMAO|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|Node.js|Report Runner|url|Zeus|ZmEu)$|OnlyScans|TheInternetSearchx' 5038 name: 'Generic Bot' 5039 5040# Generic detections 5041- regex: '[a-z0-9_-]*(?:(?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|(?<!node-|uclient-|Mikrotik/\d\.[x\d] |electron-)fetch(?:er)?|indexer|inspector|monitor|(?<!Microsoft |banshee-)project(?!or)|(?<!Google Wap |Blue |SpeedMode; )proxy|(?<!P)research|resolver|robots|(?<!Cam)scanner|scraper|script|searcher|(?<!-)security|spider(?! 8)|study|transcoder|uptime|user[ _]?agent|validator)(?:[^a-z]|$)' 5042 name: 'Generic Bot' 5043