xref: /plugin/botmon/script.js (revision 7bd08c309d927f2844d27929cdc2e261800022d5)
1/* DokuWiki BotMon Plugin Script file */
2/* 30.08.2025 - 0.1.6 - pre-release */
3/* Authors: Sascha Leib <ad@hominem.info> */
4
5const BotMon = {
6
7	init: function() {
8		console.info('BotMon.init()');
9
10		// find the plugin basedir:
11		this._baseDir = document.currentScript.src.substring(0, document.currentScript.src.indexOf('/exe/'))
12			+ '/plugins/botmon/';
13
14		// read the page language from the DOM:
15		this._lang = document.getRootNode().documentElement.lang || this._lang;
16
17		// get the time offset:
18		this._timeDiff = BotMon.t._getTimeOffset();
19
20		// init the sub-objects:
21		BotMon.t._callInit(this);
22	},
23
24	_baseDir: null,
25	_lang: 'en',
26	_today: (new Date()).toISOString().slice(0, 10),
27	_timeDiff: '',
28
29	/* internal tools */
30	t: {
31
32		/* helper function to call inits of sub-objects */
33		_callInit: function(obj) {
34			//console.info('BotMon.t._callInit(obj=',obj,')');
35
36			/* call init / _init on each sub-object: */
37			Object.keys(obj).forEach( (key,i) => {
38				const sub = obj[key];
39				let init = null;
40				if (typeof sub === 'object' && sub.init) {
41					init = sub.init;
42				}
43
44				// bind to object
45				if (typeof init == 'function') {
46					const init2 = init.bind(sub);
47					init2(obj);
48				}
49			});
50		},
51
52		/* helper function to calculate the time difference to UTC: */
53		_getTimeOffset: function() {
54			const now = new Date();
55			let offset = now.getTimezoneOffset(); // in minutes
56			const sign = Math.sign(offset); // +1 or -1
57			offset = Math.abs(offset); // always positive
58
59			let hours = 0;
60			while (offset >= 60) {
61				hours += 1;
62				offset -= 60;
63			}
64			return ( hours > 0 ? sign * hours + ' h' : '') + (offset > 0 ? ` ${offset} min` : '');
65		}
66	}
67};
68
69/* everything specific to the "Today" tab is self-contained in the "live" object: */
70BotMon.live = {
71	init: function() {
72		//console.info('BotMon.live.init()');
73
74		// set the title:
75		const tDiff = '(<abbr title="Coordinated Universal Time">UTC</abbr>' + (BotMon._timeDiff != '' ? `, ${BotMon._timeDiff}` : '' ) + ')';
76		BotMon.live.gui.status.setTitle(`Data for <time datetime=${BotMon._today}>${BotMon._today}</time> ${tDiff}`);
77
78		// init sub-objects:
79		BotMon.t._callInit(this);
80	},
81
82	data: {
83		init: function() {
84			//console.info('BotMon.live.data.init()');
85
86			// call sub-inits:
87			BotMon.t._callInit(this);
88		},
89
90		// this will be called when the known json files are done loading:
91		_dispatch: function(file) {
92			//console.info('BotMon.live.data._dispatch(,',file,')');
93
94			// shortcut to make code more readable:
95			const data = BotMon.live.data;
96
97			// set the flags:
98			switch(file) {
99				case 'bots':
100					data._dispatchBotsLoaded = true;
101					break;
102				case 'clients':
103					data._dispatchClientsLoaded = true;
104					break;
105				case 'platforms':
106					data._dispatchPlatformsLoaded = true;
107					break;
108				default:
109					// ignore
110			}
111
112			// are all the flags set?
113			if (data._dispatchBotsLoaded && data._dispatchClientsLoaded && data._dispatchPlatformsLoaded) {
114				// chain the log files loading:
115				BotMon.live.data.loadLogFile('srv', BotMon.live.data._onServerLogLoaded);
116			}
117		},
118		// flags to track which data files have been loaded:
119		_dispatchBotsLoaded: false,
120		_dispatchClientsLoaded: false,
121		_dispatchPlatformsLoaded: false,
122
123		// event callback, after the server log has been loaded:
124		_onServerLogLoaded: function() {
125			//console.info('BotMon.live.data._onServerLogLoaded()');
126
127			// chain the client log file to load:
128			BotMon.live.data.loadLogFile('log', BotMon.live.data._onClientLogLoaded);
129		},
130
131		// event callback, after the client log has been loaded:
132		_onClientLogLoaded: function() {
133			console.info('BotMon.live.data._onClientLogLoaded()');
134
135			// chain the ticks file to load:
136			BotMon.live.data.loadLogFile('tck', BotMon.live.data._onTicksLogLoaded);
137
138		},
139
140		// event callback, after the tiker log has been loaded:
141		_onTicksLogLoaded: function() {
142			console.info('BotMon.live.data._onTicksLogLoaded()');
143
144			// analyse the data:
145			BotMon.live.data.analytics.analyseAll();
146
147			// sort the data:
148			// #TODO
149
150			// display the data:
151			BotMon.live.gui.overview.make();
152
153			console.log(BotMon.live.data.model._visitors);
154
155		},
156
157		model: {
158			// visitors storage:
159			_visitors: [],
160
161			// find an already existing visitor record:
162			findVisitor: function(id) {
163
164				// shortcut to make code more readable:
165				const model = BotMon.live.data.model;
166
167				// loop over all visitors already registered:
168				for (let i=0; i<model._visitors.length; i++) {
169					const v = model._visitors[i];
170					if (v && v.id == id) return v;
171				}
172				return null; // nothing found
173			},
174
175			/* if there is already this visit registered, return it (used for updates) */
176			_getVisit: function(visit, view) {
177
178				// shortcut to make code more readable:
179				const model = BotMon.live.data.model;
180
181
182				for (let i=0; i<visit._pageViews.length; i++) {
183					const pv = visit._pageViews[i];
184					if (pv.pg == view.pg && // same page id, and
185						view.ts.getTime() - pv._firstSeen.getTime() < 1200000) { // seen less than 20 minutes ago
186							return pv; // it is the same visit.
187					}
188				}
189				return null; // not found
190			},
191
192			// register a new visitor (or update if already exists)
193			registerVisit: function(dat) {
194				//console.info('registerVisit', dat);
195
196				// shortcut to make code more readable:
197				const model = BotMon.live.data.model;
198
199				// check if it already exists:
200				let visitor = model.findVisitor(dat.id);
201				if (!visitor) {
202					const bot = BotMon.live.data.bots.match(dat.client);
203
204					model._visitors.push(dat);
205					visitor = dat;
206					visitor._firstSeen = dat.ts;
207					visitor._lastSeen = dat.ts;
208					visitor._isBot = ( bot ? 1.0 : 0.0 ); // likelihood of being a bot; primed to 0% or 100% in case of a known bot
209					visitor._pageViews = []; // array of page views
210					visitor._hasReferrer = false; // has at least one referrer
211					visitor._jsClient = false; // visitor has been seen logged by client js as well
212					visitor._client = bot ?? BotMon.live.data.clients.match(dat.client) ?? null; // client info (browser, bot, etc.)
213					visitor._platform = BotMon.live.data.platforms.match(dat.client); // platform info
214
215					// known bots get the bot ID as identifier:
216					if (bot) visitor.id = bot.id;
217				}
218
219				// find browser
220
221				// is this visit already registered?
222				let prereg = model._getVisit(visitor, dat);
223				if (!prereg) {
224					// add the page view to the visitor:
225					prereg = {
226						_by: 'srv',
227						ip: dat.ip,
228						pg: dat.pg,
229						ref: dat.ref || '',
230						_firstSeen: dat.ts,
231						_lastSeen: dat.ts,
232						_jsClient: false
233					};
234					visitor._pageViews.push(prereg);
235				}
236
237				// update referrer state:
238				visitor._hasReferrer = visitor._hasReferrer ||
239					(prereg.ref !== undefined && prereg.ref !== '');
240
241				// update time stamp for last-seen:
242				visitor._lastSeen = dat.ts;
243
244				// if needed:
245				return visitor;
246			},
247
248			// updating visit data from the client-side log:
249			updateVisit: function(dat) {
250				//console.info('updateVisit', dat);
251
252				// shortcut to make code more readable:
253				const model = BotMon.live.data.model;
254
255				let visitor = model.findVisitor(dat.id);
256				if (!visitor) {
257					visitor = model.registerVisit(dat);
258				}
259				if (visitor) {
260					visitor._lastSeen = dat.ts;
261					visitor._jsClient = true; // seen by client js
262else {
263					console.warn(`No visit with ID ${dat.id}.`);
264					return;
265				}
266
267				// find the page view:
268				let prereg = model._getVisit(visitor, dat);
269				if (prereg) {
270					// update the page view:
271					prereg._lastSeen = dat.ts;
272					prereg._jsClient = true; // seen by client js
273				} else {
274					// add the page view to the visitor:
275					prereg = {
276						_by: 'log',
277						ip: dat.ip,
278						pg: dat.pg,
279						ref: dat.ref || '',
280						_firstSeen: dat.ts,
281						_lastSeen: dat.ts,
282						_jsClient: true
283					};
284					visitor._pageViews.push(prereg);
285				}
286			},
287
288			// updating visit data from the ticker log:
289			updateTicks: function(dat) {
290				//console.info('updateTicks', dat);
291
292				// shortcut to make code more readable:
293				const model = BotMon.live.data.model;
294
295				// find the visit info:
296				let visitor = model.findVisitor(dat.id);
297				if (!visitor) {
298					console.warn(`No visitor with ID ${dat.id}, registering a new one.`);
299					visitor = model.registerVisit(dat);
300				}
301				if (visitor) {
302					// update "last seen":
303					if (visitor._lastSeen < dat.ts) visitor._lastSeen = dat.ts;
304
305					// get the page view info:
306					const pv = model._getVisit(visitor, dat);
307					if (pv) {
308						// update the page view info:
309						if (pv._lastSeen.getTime() < dat.ts.getTime()) pv._lastSeen = dat.ts;
310					} else {
311						console.warn(`No page view for visit ID ${dat.id}, page ${dat.pg}, registering a new one.`);
312
313						// add a new page view to the visitor:
314						const newPv = {
315							_by: 'tck',
316							ip: dat.ip,
317							pg: dat.pg,
318							ref: '',
319							_firstSeen: dat.ts,
320							_lastSeen: dat.ts,
321							_jsClient: false
322						};
323						visitor._pageViews.push(newPv);
324					}
325
326				} else {
327					console.warn(`No visit with ID ${dat.id}.`);
328					return;
329				}
330
331			}
332		},
333
334		analytics: {
335
336			init: function() {
337				console.info('BotMon.live.data.analytics.init()');
338			},
339
340			// data storage:
341			data: {
342				totalVisits: 0,
343				totalPageViews: 0,
344				bots: {
345					known: 0,
346					likely: 0,
347					human: 0,
348					users: 0
349				}
350			},
351
352			// sort the visits by type:
353			groups: {
354				knownBots: [],
355				likelyBots: [],
356				humans: [],
357				users: []
358			},
359
360			// all analytics
361			analyseAll: function() {
362				//console.info('BotMon.live.data.analytics.analyseAll()');
363
364				// shortcut to make code more readable:
365				const model = BotMon.live.data.model;
366
367				// loop over all visitors:
368				model._visitors.forEach( (v) => {
369
370					// count visits and page views:
371					this.data.totalVisits += 1;
372					this.data.totalPageViews += v._pageViews.length;
373
374					// check for typical bot aspects:
375					let botScore = v._isBot; // start with the known bot score
376
377					if (v._isBot >= 1.0) { // known bots
378
379						this.data.bots.known += 1;
380						this.groups.knownBots.push(v);
381
382					} if (v.usr && v.usr != '') { // known users
383						this.groups.users.push(v);
384						this.data.bots.users += 1;
385					} else {
386						// not a known bot, nor a known user; check other aspects:
387
388						// no referrer at all:
389						if (!v._hasReferrer) botScore += 0.2;
390
391						// no js client logging:
392						if (!v._jsClient) botScore += 0.2;
393
394						// average time between page views less than 30s:
395						if (v._pageViews.length > 1) {
396							botScore -= 0.2; // more than one view: good!
397							let totalDiff = 0;
398							for (let i=1; i<v._pageViews.length; i++) {
399								const diff = v._pageViews[i]._firstSeen.getTime() - v._pageViews[i-1]._lastSeen.getTime();
400								totalDiff += diff;
401							}
402							const avgDiff = totalDiff / (v._pageViews.length - 1);
403							if (avgDiff < 30000) botScore += 0.2;
404							else if (avgDiff < 60000) botScore += 0.1;
405						}
406
407						// decide based on the score:
408						if (botScore >= 0.5) {
409							this.data.bots.likely += 1;
410							this.groups.likelyBots.push(v);
411						} else {
412							this.data.bots.human += 1;
413							this.groups.humans.push(v);
414						}
415					}
416				});
417
418				console.log(this.data);
419				console.log(this.groups);
420			}
421
422		},
423
424		bots: {
425			// loads the list of known bots from a JSON file:
426			init: async function() {
427				//console.info('BotMon.live.data.bots.init()');
428
429				// Load the list of known bots:
430				BotMon.live.gui.status.showBusy("Loading known bots …");
431				const url = BotMon._baseDir + 'data/known-bots.json';
432				try {
433					const response = await fetch(url);
434					if (!response.ok) {
435						throw new Error(`${response.status} ${response.statusText}`);
436					}
437
438					BotMon.live.data.bots._list = await response.json();
439					BotMon.live.data.bots._ready = true;
440
441					// TODO: allow using the bots list...
442				} catch (error) {
443					BotMon.live.gui.status.setError("Error while loading the ‘known bots’ file: " + error.message);
444				} finally {
445					BotMon.live.gui.status.hideBusy("Status: Done.");
446					BotMon.live.data._dispatch('bots')
447				}
448			},
449
450			// returns bot info if the clientId matches a known bot, null otherwise:
451			match: function(client) {
452				//console.info('BotMon.live.data.bots.match(',client,')');
453
454				if (client) {
455					for (let i=0; i<BotMon.live.data.bots._list.length; i++) {
456						const bot = BotMon.live.data.bots._list[i];
457						for (let j=0; j<bot.rx.length; j++) {
458							if (client.match(new RegExp(bot.rx[j]))) {
459								return bot; // found a match
460							}
461						}
462						return null; // not found!
463					}
464				}
465			},
466
467			// indicates if the list is loaded and ready to use:
468			_ready: false,
469
470			// the actual bot list is stored here:
471			_list: []
472		},
473
474		clients: {
475			// loads the list of known clients from a JSON file:
476			init: async function() {
477				//console.info('BotMon.live.data.clients.init()');
478
479				// Load the list of known bots:
480				BotMon.live.gui.status.showBusy("Loading known clients");
481				const url = BotMon._baseDir + 'data/known-clients.json';
482				try {
483					const response = await fetch(url);
484					if (!response.ok) {
485						throw new Error(`${response.status} ${response.statusText}`);
486					}
487
488					BotMon.live.data.clients._list = await response.json();
489					BotMon.live.data.clients._ready = true;
490
491				} catch (error) {
492					BotMon.live.gui.status.setError("Error while loading the known clients file: " + error.message);
493				} finally {
494					BotMon.live.gui.status.hideBusy("Status: Done.");
495					BotMon.live.data._dispatch('clients')
496				}
497			},
498
499			// returns bot info if the clientId matches a known bot, null otherwise:
500			match: function(cid) {
501				//console.info('BotMon.live.data.clients.match(',cid,')');
502
503				let match = {"n": "Unknown", "v": -1, "id": null};
504
505				if (cid) {
506					BotMon.live.data.clients._list.find(client => {
507						let r = false;
508						for (let j=0; j<client.rx.length; j++) {
509							const rxr = cid.match(new RegExp(client.rx[j]));
510							if (rxr) {
511								match.n = client.n;
512								match.v = (rxr.length > 1 ? rxr[1] : -1);
513								match.id = client.id || null;
514								r = true;
515								break;
516							}
517						}
518						return r;
519					});
520				}
521
522				return match;
523			},
524
525			// indicates if the list is loaded and ready to use:
526			_ready: false,
527
528			// the actual bot list is stored here:
529			_list: []
530
531		},
532
533		platforms: {
534			// loads the list of known platforms from a JSON file:
535			init: async function() {
536				//console.info('BotMon.live.data.platforms.init()');
537
538				// Load the list of known bots:
539				BotMon.live.gui.status.showBusy("Loading known platforms");
540				const url = BotMon._baseDir + 'data/known-platforms.json';
541				try {
542					const response = await fetch(url);
543					if (!response.ok) {
544						throw new Error(`${response.status} ${response.statusText}`);
545					}
546
547					BotMon.live.data.platforms._list = await response.json();
548					BotMon.live.data.platforms._ready = true;
549
550				} catch (error) {
551					BotMon.live.gui.status.setError("Error while loading the known platforms file: " + error.message);
552				} finally {
553					BotMon.live.gui.status.hideBusy("Status: Done.");
554					BotMon.live.data._dispatch('platforms')
555				}
556			},
557
558			// returns bot info if the browser id matches a known platform:
559			match: function(cid) {
560				//console.info('BotMon.live.data.platforms.match(',cid,')');
561
562				let match = {"n": "Unknown", "id": null};
563
564				if (cid) {
565					BotMon.live.data.platforms._list.find(platform => {
566						let r = false;
567						for (let j=0; j<platform.rx.length; j++) {
568							const rxr = cid.match(new RegExp(platform.rx[j]));
569							if (rxr) {
570								match.n = platform.n;
571								match.v = (rxr.length > 1 ? rxr[1] : -1);
572								match.id = platform.id || null;
573								r = true;
574								break;
575							}
576						}
577						return r;
578					});
579				}
580
581				return match;
582			},
583
584			// indicates if the list is loaded and ready to use:
585			_ready: false,
586
587			// the actual bot list is stored here:
588			_list: []
589
590		},
591
592		loadLogFile: async function(type, onLoaded = undefined) {
593			// console.info('BotMon.live.data.loadLogFile(',type,')');
594
595			let typeName = '';
596			let columns = [];
597
598			switch (type) {
599				case "srv":
600					typeName = "Server";
601					columns = ['ts','ip','pg','id','typ','usr','client','ref'];
602					break;
603				case "log":
604					typeName = "Page load";
605					columns = ['ts','ip','pg','id','usr','lt','ref','client'];
606					break;
607				case "tck":
608					typeName = "Ticker";
609					columns = ['ts','ip','pg','id','client'];
610					break;
611				default:
612					console.warn(`Unknown log type ${type}.`);
613					return;
614			}
615
616			// Show the busy indicator and set the visible status:
617			BotMon.live.gui.status.showBusy(`Loading ${typeName} log file …`);
618
619			// compose the URL from which to load:
620			const url = BotMon._baseDir + `logs/${BotMon._today}.${type}.txt`;
621			//console.log("Loading:",url);
622
623			// fetch the data:
624			try {
625				const response = await fetch(url);
626				if (!response.ok) {
627					throw new Error(`${response.status} ${response.statusText}`);
628				}
629
630				const logtxt = await response.text();
631
632				logtxt.split('\n').forEach((line) => {
633					if (line.trim() === '') return; // skip empty lines
634					const cols = line.split('\t');
635
636					// assign the columns to an object:
637					const data = {};
638					cols.forEach( (colVal,i) => {
639						colName = columns[i] || `col${i}`;
640						const colValue = (colName == 'ts' ? new Date(colVal) : colVal);
641						data[colName] = colValue;
642					});
643
644					// register the visit in the model:
645					switch(type) {
646						case 'srv':
647							BotMon.live.data.model.registerVisit(data);
648							break;
649						case 'log':
650							BotMon.live.data.model.updateVisit(data);
651							break;
652						case 'tck':
653							BotMon.live.data.model.updateTicks(data);
654							break;
655						default:
656							console.warn(`Unknown log type ${type}.`);
657							return;
658					}
659				});
660
661				if (onLoaded) {
662					onLoaded(); // callback after loading is finished.
663				}
664
665			} catch (error) {
666				BotMon.live.gui.status.setError(`Error while loading the ${typeName} log file: ${error.message}.`);
667			} finally {
668				BotMon.live.gui.status.hideBusy("Status: Done.");
669			}
670		}
671	},
672
673	gui: {
674
675		overview: {
676			make: function() {
677				const data = BotMon.live.data.analytics.data;
678				const parent = document.getElementById('botmon__today__content');
679				if (parent) {
680					jQuery(parent).prepend(jQuery(`
681						<details id="botmon__today__overview" open>
682							<summary>Overview</summary>
683							<div class="grid-3-columns">
684								<dl>
685									<dt>Web metrics</dt>
686									<dd><span>Total visits:</span><span>${data.totalVisits}</span></dd>
687									<dd><span>Total page views:</span><span>${data.totalPageViews}</span></dd>
688									<dd><span>Bounce rate:</span><span>(TBD)</span></dd>
689								</dl>
690								<dl>
691									<dt>Bots vs. Humans</dt>
692									<dd><span>Known bots:</span><span>${data.bots.known}</span></dd>
693									<dd><span>Likely bots:</span><span>${data.bots.likely}</span></dd>
694									<dd><span>Probably humans:</span><span>${data.bots.human}</span></dd>
695									<dd><span>Registered users:</span><span>${data.bots.users}</span></dd>
696								</dl>
697								<dl id="botmon__botslist">
698									<dt>Known bots</dt>
699								</dl>
700							</div>
701						</details>
702					`));
703				}
704			}
705		},
706		status: {
707			setText: function(txt) {
708				const el = document.getElementById('botmon__today__status');
709				if (el && BotMon.live.gui.status._errorCount <= 0) {
710					el.innerText = txt;
711				}
712			},
713
714			setTitle: function(html) {
715				const el = document.getElementById('botmon__today__title');
716				if (el) {
717					el.innerHTML = html;
718				}
719			},
720
721			setError: function(txt) {
722				console.error(txt);
723				BotMon.live.gui.status._errorCount += 1;
724				const el = document.getElementById('botmon__today__status');
725				if (el) {
726					el.innerText = "An error occured. See the browser log for details!";
727					el.classList.add('error');
728				}
729			},
730			_errorCount: 0,
731
732			showBusy: function(txt = null) {
733				BotMon.live.gui.status._busyCount += 1;
734				const el = document.getElementById('botmon__today__busy');
735				if (el) {
736					el.style.display = 'inline-block';
737				}
738				if (txt) BotMon.live.gui.status.setText(txt);
739			},
740			_busyCount: 0,
741
742			hideBusy: function(txt = null) {
743				const el = document.getElementById('botmon__today__busy');
744				BotMon.live.gui.status._busyCount -= 1;
745				if (BotMon.live.gui.status._busyCount <= 0) {
746					if (el) el.style.display = 'none';
747					if (txt) BotMon.live.gui.status.setText(txt);
748				}
749			}
750		}
751
752	}
753};
754
755/* launch only if the BotMon admin panel is open: */
756if (document.getElementById('botmon__admin')) {
757	BotMon.init();
758}