xref: /plugin/botmon/config/known-bots.json (revision 12993035b5a997893e460942f7a5343107194a53)
1[	{"id": "bingbot",
2	 "n": "BingBot",
3	 "r": ["bingbot"],
4	 "rx": ["\\sbingbot\\/(\\d+\\.\\d+);"],
5	 "url": "http://www.bing.com/bingbot.htm"
6	},
7	{"id": "googlebot",
8	 "n": "GoogleBot",
9	 "r": ["Googlebot"],
10	 "rx": ["Googlebot\\/(\\d+\\.\\d+)", "Googlebot-Image\\/(\\d+\\.\\d+)"],
11	 "url": "http://www.google.com/bot.html"
12	},
13	{"id": "googleads",
14	 "n": "Google Ads",
15	 "r": ["AdsBot-Google", "AdsBot-Google-Mobile", "Mediapartners-Google"],
16	 "rx": ["AdsBot-Google;","AdsBot-Google-Mobile;", "Mediapartners-Google\\/(\\d+\\.\\d+);"],
17	 "url": "https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers"
18	},
19	{"id": "googleapi",
20	 "n": "Google API Crawler",
21	 "r": ["APIs-Google"],
22	 "rx": ["APIs-Google"],
23	 "url": "https://developers.google.com/search/docs/crawling-indexing/google-special-case-crawlers"
24	},
25	{"id": "googleother",
26	 "n": "GoogleOther",
27	 "r": ["GoogleOther"],
28	 "rx": ["\\sGoogleOther(\\-\\w+)?[\\)\\/]"],
29	 "url": "https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers#googleother"
30	},
31	{"id": "googinspct",
32	 "n": "Google-InspectionTool",
33	 "r": ["Google-InspectionTool"],
34	 "rx": ["\\sGoogle-InspectionTool(\\-\\w+)?[\\)\\/]"],
35	 "url": "https://support.google.com/webmasters/answer/9012289"
36	},
37	{"id": "goognblm",
38	 "n": "Google NotebookLM User",
39	 "r": ["Google-NotebookLM"],
40	 "rx": ["Google-NotebookLM"],
41	 "url": "https://developers.google.com/search/docs/crawling-indexing/google-user-triggered-fetchers"
42	},
43	{"id": "applebot",
44	 "n": "Applebot",
45	 "r": ["Applebot", "Applebot-Extended"],
46	 "rx": ["Applebot\\/(\\d+\\.\\d+);", "Applebot\\-Extended\\/(\\d+\\.\\d+);"],
47	 "url": "http://www.apple.com/go/applebot"
48	},
49	{"id": "baidu",
50	 "n": "Baidu Bots",
51	 "r": ["Baiduspider", "Baiduspider-image", "Baiduspider-video", "Baiduspider-news", "Baiduspider-favo", "Baiduspider-cpro", "Baiduspider-ads", "Baiduspider-render"],
52	 "rx": ["Baiduspider"],
53	 "url": "http://www.baidu.com/search/spider.html"
54	},
55	{"id": "reddit",
56	 "n": "RedditBot",
57	 "r": ["RedditBot"],
58	 "rx": [" redditbot\\/(\\d+\\.\\d+);"],
59	 "url": "http://www.reddit.com/feedback"
60	},
61	{"id": "slurp",
62	 "n": "Slurp (Yahoo!)",
63	 "r": ["Slurp"],
64	 "rx": ["Slurp[\\s;\\)]"],
65	 "url": "http://help.yahoo.com/help/us/ysearch/slurp"
66	},
67	{"id": "yahoolp",
68	 "n": "Yahoo! Link Preview (User)",
69	 "r": ["Yahoo Link Preview"],
70	 "rx": ["Yahoo Link Preview;"],
71	 "url": "https://help.yahoo.com/kb/SLN23615.html"
72	},
73	{"id": "ddg",
74	 "n": "DuckDuck Bot",
75	 "r": ["DuckDuckBot","DuckAssistBot","DuckDuckGo-Favicons-Bot"],
76	 "rx": ["DuckDuckBot\\/(\\d+\\.\\d+);", "DuckAssistBot\\/(\\d+\\.\\d+);", "DuckDuckGo-Favicons-Bot\\/(\\d+\\.\\d+);"],
77	 "url": "https://duckduckgo.com/duckduckbot.html"
78	},
79	{"id": "marginalia",
80	 "n": "Marginalia.nu",
81	 "r": [],
82	 "rx": ["^search\\.marginalia\\.nu"],
83	 "url": "https://marginalia-search.com/"
84	},
85	{"id": "openai",
86	 "n": "OpenAI/ChatGPT",
87	 "r": ["OAI-SearchBot", "GPTBot"],
88	 "rx": ["OAI-SearchBot\\/(\\d+\\.\\d+);", "GPTBot\\/(\\d+\\.\\d+);"],
89	 "url": "https://platform.openai.com/docs/bots/"
90	},
91	{"id": "chatgpt",
92	 "n": "ChatGPT User",
93	 "r": ["ChatGPT-User"],
94	 "rx": ["ChatGPT\\-User\\/(\\d\\.\\d);"],
95	 "url": "https://platform.openai.com/docs/bots/"
96	},
97	{"id": "claude",
98	 "n": "Anthropic Claude",
99	 "r": ["ClaudeBot", "Claude-SearchBot"],
100	 "rx": ["ClaudeBot\\/(\\d+\\.\\d+);"],
101	 "url": "https://support.claude.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler"
102	},
103	{"id": "claudeusr",
104	 "n": "ClaudeAI User",
105	 "r": ["Claude-User"],
106	 "rx": ["Claude-User\\/(\\d+\\.\\d+);"],
107	 "url": "https://support.claude.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler"
108	},
109	{"id": "perplexity",
110	 "n": "Perplexity Bot",
111	 "r": ["PerplexityBot"],
112	 "rx": ["PerplexityBot\\/(\\d+\\.\\d+);"],
113	 "url": "https://perplexity.ai/perplexitybot"
114	},
115	{"id": "perplexityusr",
116	 "n": "Perplexity User",
117	 "r": ["Perplexity‑User"],
118	 "rx": ["Perplexity\\-User\\/(\\d+\\.\\d+);"],
119	 "url": "https://perplexity.ai/perplexitybot"
120	},
121	{"id": "metabots",
122	 "n": "Meta/Facebook",
123	 "r": ["meta-webindexer","meta-externalads","meta-externalagent"],
124	 "rx": ["facebook[cw]\\w+\\/(\\d+\\.\\d+)", "meta-externala\\w+\\/(\\d+\\.\\d+)"],
125	 "url": "https://developers.facebook.com/docs/sharing/webmasters/crawler"
126	},
127	{"id": "metauser",
128	 "n": "Meta/Facebook User",
129	 "r": ["facebookexternalhit","facebookcatalog"],
130	 "rx": ["facebookexternalhit\\/(\\d+\\.?\\d*)", "meta-externalfetcher\\/(\\d\\.\\d)", "facebookcatalog\\/(\\d+\\.?\\d*)"],
131	 "url": "https://developers.facebook.com/docs/sharing/webmasters/crawler"
132	},
133	{"id": "qwant",
134	 "n": "Qwant Web crawler",
135	 "r": ["Qwantbot", "Qwantbot-news"],
136	 "rx": ["Qwantbot\\/(\\d+\\.\\d+)[_;\\s]"],
137	 "url": "https://help.qwant.com/bot/"
138	},
139	{"id": "yandex",
140	 "n": "Yandex",
141	 "r": ["YandexBot", "YandexAdNet", "YandexBlogs", "YandexImages", "YandexImageResizer", "YandexMarket", "YandexMedia", "YandexOntoDB", "YandexSitelinks","YandexSpravBot", "YandexVertis", "YandexVerticals", "YandexVideo", "YandexWebmaster", "YandexComBot"],
142	 "rx": ["Yandex\\w+\\/(\\d+\\.\\d+);"],
143	 "url": "http://yandex.com/bots"
144	},
145	{"id": "seznambot",
146	 "n": "SeznamBot",
147	 "r": ["SeznamBot"],
148	 "rx": ["SeznamBot\\/(\\d+\\.\\d+);"],
149	 "url": "https://o-seznam.cz/napoveda/vyhledavani/en/seznambot-crawler/"
150	},
151	{"id": "ahrefs",
152	 "n": "Ahrefs",
153	 "r": ["AhrefsBot", "AhrefsSiteAudit"],
154	 "rx": ["AhrefsBot\\/(\\d+\\.\\d+);", "AhrefsSiteAudit\\/(\\d+\\.\\d+);"],
155	 "url": "https://ahrefs.com/robot/"
156	},
157	{"id": "ccbot",
158	 "n": "CommonCrawl Bot",
159	 "r": ["CCBot"],
160	 "rx": ["CCBot\\/(\\d+\\.\\d+)[\\s\\.;]*"],
161	 "url": "https://commoncrawl.org/bot.html"
162	},
163	{"id": "mjbot",
164	 "n": "Majestic Crawler (UK)",
165	 "r": ["MJ12bot"],
166	 "rx": ["MJ12bot\\/v?(\\d+\\.\\d+)[\\s\\.;]"],
167	 "url": "http://www.majestic12.co.uk/bot.php"
168	},
169	{"id": "petal",
170	 "n": "PetalSearch Bot (CN)",
171	 "r": ["PetalBot", "AspiegelBot"],
172	 "rx": ["[\\s;]PetalBot[\\s\\/;]", "AspiegelBot[\\)$]"],
173	 "url": "https://webmaster.petalsearch.com/site/petalbot"
174	},
175	{"id": "barkrowler",
176	 "n": "Barkrowler (Babbar)",
177	 "r": ["Barkrowler"],
178	 "rx": ["[\\s;^]Barkrowler\\/(\\d+\\.\\d+)?"],
179	 "url": "https://babbar.tech/crawler"
180	},
181	{"id": "semrush",
182	 "n": "SemrushBot",
183	 "r": ["SemrushBot"],
184	 "rx": ["SemrushBot-?[A-Z]*\\/(\\d+\\.?\\d*)[;~\\s]"],
185	 "url": "http://www.semrush.com/bot.html"
186	},
187	{"id": "bytespider",
188	 "n": "Bytespider (ByteDance, TikTok)",
189	 "r": ["Bytespider"],
190	 "rx": ["Bytespider[;$]"],
191	 "url": "https://darkvisitors.com/agents/bytespider"
192	},
193	{"id": "dfseo",
194	 "n": "DataForSEO Bot",
195	 "r": ["DataForSeoBot"],
196	 "rx": ["DataForSeoBot[;\\/](\\d\\.\\d)?"],
197	 "url": "https://dataforseo.com/dataforseo-bot"
198	},
199	{"id": "hunter",
200	 "n": "Hunter (Velen) Crawler",
201	 "r": ["VelenPublicWebCrawler"],
202	 "rx": ["VelenPublicWebCrawler\\/(\\d\\.\\d)"],
203	 "url": "https://velen.io"
204	},
205	{"id": "serpstat",
206	 "n": "Serpstat Bot",
207	 "r": ["serpstatbot"],
208	 "rx": ["serpstatbot\\/(\\d\\.\\d)\\s"],
209	 "url": "https://serpstatbot.com/"
210	},
211	{"id": "netestate",
212	 "n": "netEstate NE Crawler",
213	 "r": ["netEstate NE Crawler"],
214	 "rx": ["netEstate NE Crawler\\s"],
215	 "url": "http://www.website-datenbank.de/"
216	},
217	{"id": "imagesift",
218	 "n": "Hive Imagesift Bot (AI Scraper)",
219	 "r": ["ImagesiftBot"],
220	 "rx": ["ImagesiftBot[;\\s\\/]"],
221	 "url": "https://darkvisitors.com/agents/imagesiftbot"
222	},
223	{"id": "moz",
224	 "n": "Moz.com Bots",
225	 "r": ["dotbot", "rogerbot"],
226	 "rx": ["DotBot\\/(\\d\\.\\d)[\\.;\\s]", "RogerBot\\/(\\d\\.\\d)[\\.;\\s]"],
227	 "url": "https://moz.com/help/moz-procedures/crawlers/dotbot"
228	},
229	{"id": "maui",
230	 "n": "MauiBot (unknown developer)",
231	 "r": ["MauiBot"],
232	 "rx": ["MauiBot\\s"],
233	 "url": "https://darkvisitors.com/agents/mauibot"
234	},
235	{"id": "plagaware",
236	 "n": "PlagAwareBot (DE)",
237	 "r": ["PlagAwareBot"],
238	 "rx": ["PlagAwareBot\\/(\\d+\\.\\d+)"],
239	 "url": "https://www.plagaware.com/bot"
240	},
241	{"id": "discord",
242	 "n": "Discord User",
243	 "r": ["Discordbot"],
244	 "rx": ["Discordbot\\/(\\d\\.\\d);"],
245	 "url": "https://datadome.co/bots/discordbot/"
246	},
247	{"id": "twitter",
248	 "n": "Twitter User",
249	 "r": ["Twitterbot"],
250	 "rx": ["^Twitterbot\\/(\\d\\.?\\d*)"],
251	 "url": "https://datadome.co/bots/twitterbot/"
252	},
253	{"id": "whatsapp",
254	 "n": "WhatsApp User",
255	 "r": ["WhatsApp"],
256	 "rx": ["WhatsApp/(\\d+\\.?\\d*)"],
257	 "url": "https://darkvisitors.com/agents/whatsapp/"
258	},
259	{"id": "tiktok",
260	 "n": "TikTok User",
261	 "r": ["TikTokSpider"],
262	 "rx": ["TikTokSpider;"],
263	 "url": "https://darkvisitors.com/agents/tiktokspider"
264	},
265	{"id": "blexbot",
266	 "n": "BLEXBot",
267	 "r": ["BLEXBot"],
268	 "rx": ["BLEXBot\\/(\\d\\.\\d);"],
269	 "url": "https://help.seranking.com/en/blex-crawler"
270	},
271	{"id": "sogou",
272	 "n": "Sogou web spider",
273	 "r": ["Sogou web spider"],
274	 "rx": ["Sogou web spider\\/(\\d+\\.\\d+)[;\\s\\(\\.]"],
275	 "url": "http://www.sogou.com/docs/help/webmasters.htm#07"
276	},
277	{"id": "amazon",
278	 "n": "Amazonbot",
279	 "r": ["Amazonbot"],
280	 "rx": ["Amazonbot\\/(\\d+\\.\\d+)[;\\s\\(\\.]"],
281	 "url": "https://developer.amazon.com/amazonbot"
282	},
283	{"id": "sop",
284	 "n": "SitesOverPagesBot",
285	 "r": ["SitesOverPagesBot"],
286	 "rx": ["SitesOverPagesBot\\/(\\d+\\.\\d+)[;\\s\\(\\.]"],
287	 "url": "https://www.sitesoverpages.com/bot"
288	},
289	{"id": "startme",
290	 "n": "StartmeBot",
291	 "r": ["StartmeBot"],
292	 "rx": ["StartmeBot\\/(\\d+\\.\\d+)[;\\s\\(\\.]"],
293	 "url": "https://support.start.me/en/articles/9182933-what-is-startmebot"
294	},
295	{"id": "bitsight",
296	 "n": "BitSightBot",
297	 "r": ["BitSightBot"],
298	 "rx": ["BitSightBot\\/(\\d+\\.\\d+)"],
299	 "url": "https://datadome.co/bots/bitsightbot/"
300	},
301	{"id": "terracotta",
302	 "n": "TerraCotta Bot",
303	 "r": ["TerraCotta"],
304	 "rx": ["TerraCotta"],
305	 "url": "https://github.com/CeramicTeam/CeramicTerracotta"
306	},
307	{"id": "halobot",
308	 "n": "HaloBot",
309	 "r": ["HaloBot"],
310	 "rx": [" HaloBot\\/(\\d+)\\."],
311	 "url": "https://www.haloscan.com/"
312	},
313	{"id": "wayback",
314	 "n": "Internet Archive",
315	 "r": ["archive.org_bot"],
316	 "rx": [" archive.org_bot "],
317	 "url": "https://archive.org/details/archive.org_bot"
318	},
319	{"id": "alibaba",
320	 "n": "Alibaba AliyunSec Bot",
321	 "r": ["AliyunSecBot"],
322	 "rx": ["AliyunSecBot/Aliyun"],
323	 "url": "https://www.aqtronix.com/useragents/?Action=ShowAgentDetails&Name=AliyunSecBot"
324	},
325	{"id": "ibou",
326	 "n": "Ibou Bot",
327	 "r": ["IbouBot"],
328	 "rx": ["IbouBot\\/(\\d+\\.\\d+)"],
329	 "url": "https://ibou.io/iboubot.html"
330	},
331	{"id": "bnl",
332	 "n": "BnL Harvester",
333	 "r": [],
334	 "rx": ["NLUX_IAHarvester\\/(\\d+)"],
335	 "url": "http://crawl.bnl.lu/"
336	},
337	{"id": "turnitin",
338	 "n": "TurnitinBot",
339	 "r": [],
340	 "rx": ["Turnitin\\s"],
341	 "url": "https://www.turnitin.com/robot/crawlerinfo.html"
342	}
343]