xref: /plugin/botmon/action.php (revision 69b73efaac25b0813be925b1c45704798c54753b)
1<?php
2
3use dokuwiki\Extension\EventHandler;
4use dokuwiki\Extension\Event;
5use dokuwiki\Logger;
6
7/**
8 * Action Component for the Bot Monitoring Plugin
9 *
10 * @license	GPL 3 (http://www.gnu.org/licenses/gpl.html)
11 * @author	 Sascha Leib <sascha.leib(at)kolmio.com>
12 */
13
14class action_plugin_botmon extends DokuWiki_Action_Plugin {
15
16	public function __construct() {
17
18		// determine if a captcha should be loaded:
19		$this->showCaptcha = 'Z'; // Captcha unknown
20
21		$useCaptcha = $this->getConf('useCaptcha'); // should we show a captcha?
22
23		if ($useCaptcha !== 'disabled') {
24			if ($_SERVER['REQUEST_METHOD'] == 'HEAD') {
25				$this->showCaptcha = 'H'; // Method is HEAD, no need for captcha
26			} elseif ($this->captchaWhitelisted()) {
27				$this->showCaptcha = 'W'; // IP is whitelisted, no captcha
28			} elseif ($this->hasCaptchaCookie()) {
29				$this->showCaptcha = 'N'; // No, user already has a cookie, don't show the captcha
30			} else {
31				$this->showCaptcha = 'Y'; // Yes, show the captcha
32			}
33		}
34
35		/* DEBUGGING ONLY: */
36		$_SERVER['HTTP_CF_IPCOUNTRY'] = 'XF'; // simulate Cloudflare header for testing
37	}
38
39	/**
40	 * Registers a callback functions
41	 *
42	 * @param EventHandler $controller DokuWiki's event controller object
43	 * @return void
44	 */
45	public function register(EventHandler $controller) {
46
47		global $ACT;
48
49		// populate the session id and type:
50		$this->setSessionInfo();
51
52		// insert header data into the page:
53		if ($ACT == 'show' || $ACT == 'edit' || $ACT == 'media') {
54			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertHeader');
55
56			// Override the page rendering, if a captcha needs to be displayed:
57			$controller->register_hook('TPL_ACT_RENDER', 'BEFORE', $this, 'insertCaptchaCode');
58
59		} else if ($ACT == 'admin' && isset($_REQUEST['page']) && $_REQUEST['page'] == 'botmon') {
60			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertAdminHeader');
61		}
62
63		// also show a captcha before the image preview
64		$controller->register_hook('TPL_IMG_DISPLAY', 'BEFORE', $this, 'showImageCaptcha');
65
66		// write to the log after the page content was displayed:
67		$controller->register_hook('TPL_CONTENT_DISPLAY', 'AFTER', $this, 'writeServerLog');
68
69	}
70
71	/* session information */
72	private $sessionId = null;
73	private $sessionType = '';
74	private $showCaptcha = 'X';
75
76	/**
77	 * Inserts tracking code to the page header
78	 * (only called on 'show' actions)
79	 *
80	 * @param Event $event event object by reference
81	 * @return void
82	 */
83	public function insertHeader(Event $event, $param) {
84
85		global $INFO;
86
87
88		// build the tracker code:
89		$code = $this->getBMHeader();
90
91		// add the deferred script loader::
92		$code .= DOKU_TAB . DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
93		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "const e=document.createElement('script');" . NL;
94		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.async=true;e.defer=true;" . NL;
95		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.src='".DOKU_BASE."lib/plugins/botmon/client.js';" . NL;
96		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(e);" . NL;
97		$code .= DOKU_TAB . DOKU_TAB . "});";
98		$event->data['script'][] = ['_data' => $code];
99	}
100
101	/* create the BM object code for insertion into a script element: */
102	private function getBMHeader() {
103
104		// build the tracker code:
105		$code = DOKU_TAB . DOKU_TAB . "document._botmon = {t0: Date.now(), session: " . json_encode($this->sessionId) . ", seed: " . json_encode($this->getConf('captchaSeed')) . ", ip: " . json_encode($_SERVER['REMOTE_ADDR']) . "};" . NL;
106
107		// is there a user logged in?
108		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name']) ?  $INFO['userinfo']['name'] : '');
109		if ($username) {
110			$code .= DOKU_TAB . DOKU_TAB . 'document._botmon.user = "' . $username . '";'. NL;
111		}
112
113		return $code;
114
115	}
116
117	/**
118	 * Inserts tracking code to the page header
119	 * (only called on 'show' actions)
120	 *
121	 * @param Event $event event object by reference
122	 * @return void
123	 */
124	public function insertAdminHeader(Event $event, $param) {
125
126		$event->data['link'][] = ['rel' => 'stylesheet', 'href' => DOKU_BASE.'lib/plugins/botmon/admin.css', 'defer' => 'defer'];
127		$event->data['script'][] = ['src' => DOKU_BASE.'lib/plugins/botmon/admin.js', 'defer' => 'defer', '_data' => ''];
128	}
129
130	/**
131	 * Writes data to the server log.
132	 *
133	 * @return void
134	 */
135	public function writeServerLog(Event $event, $param) {
136
137		global $conf;
138		global $INFO;
139
140		// is there a user logged in?
141		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name'])
142					?  $INFO['userinfo']['name'] : '');
143
144		// clean the page ID
145		$pageId = preg_replace('/[\x00-\x1F]/', "\u{FFFD}", $INFO['id'] ?? '');
146
147		// create the log array:
148		$logArr = Array(
149			$_SERVER['REMOTE_ADDR'], /* remote IP */
150			$pageId, /* page ID */
151			$this->sessionId, /* Session ID */
152			$this->sessionType, /* session ID type */
153			$username, /* user name */
154			$_SERVER['HTTP_USER_AGENT'] ?? '', /* User agent */
155			$_SERVER['HTTP_REFERER'] ?? '', /* HTTP Referrer */
156			substr($conf['lang'],0,2), /* page language */
157			implode(',', array_unique(array_map( function($it) { return substr(trim($it),0,2); }, explode(',',trim($_SERVER['HTTP_ACCEPT_LANGUAGE'], " \t;,*"))))), /* accepted client languages */
158			$this->getCountryCode(), /* GeoIP country code */
159			$this->showCaptcha, /* show captcha? */
160			$_SERVER['REQUEST_METHOD'] ?? '' /* request method */
161		);
162
163		//* create the log line */
164		$filename = __DIR__ .'/logs/' . gmdate('Y-m-d') . '.srv.txt'; /* use GMT date for filename */
165		$logline = gmdate('Y-m-d H:i:s'); /* use GMT time for log entries */
166		foreach ($logArr as $tab) {
167			$logline .= "\t" . $tab;
168		};
169
170		/* write the log line to the file */
171		$logfile = fopen($filename, 'a');
172		if (!$logfile) die();
173		if (fwrite($logfile, $logline . "\n") === false) {
174			fclose($logfile);
175			die();
176		}
177
178		/* Done */
179		fclose($logfile);
180	}
181
182	private function getCountryCode() {
183
184		$country = ( $_SERVER['REMOTE_ADDR'] == '127.0.0.1' ? 'local' : 'ZZ' ); // default if no geoip is available!
185
186		$lib = $this->getConf('geoiplib'); /* which library to use? (possible values are: disabled, phpgeoip or cloudflare) */
187
188		try {
189
190			switch($lib) {
191
192			case 'phpgeoip':
193				if (extension_loaded('geoip') && geoip_db_avail(GEOIP_COUNTRY_EDITION)) { // PHP GeoIP module available?
194					$result = geoip_country_code_by_name($_SERVER['REMOTE_ADDR']);
195					$country = ($result ? $result : $country);
196				}
197				break;
198
199			case 'cloudflare':
200				$result = $_SERVER['HTTP_CF_IPCOUNTRY'] ?? null;
201				$country = ( $result == 'XX' || $result === null ? 'ZZ' : $result ); // Cloudflare returns 'XX' for unknown countries, we want 'ZZ' in that case
202				break;
203
204			}
205
206
207		} catch (Exception $e) {
208			Logger::error('BotMon Plugin: GeoIP Error', $e->getMessage());
209		}
210
211		return $country;
212	}
213
214	private function setSessionInfo() {
215
216		// what is the session identifier?
217		if (isset($_SESSION)) {
218			$sesKeys = array_keys($_SESSION); /* DokuWiki Session ID preferred */
219			foreach ($sesKeys as $key) {
220				if (substr($key, 0, 2) == 'DW') {
221					$this->sessionId = $key;
222					$this->sessionType = 'dw';
223					return;
224				}
225			}
226		}
227		if (!$this->sessionId) { /* no DokuWiki Session ID, try PHP session ID */
228			$this->sessionId = session_id();
229			$this->sessionType = 'php';
230		}
231		if (!$this->sessionId) { /* no PHP session ID, try IP address */
232			$this->sessionId = $_SERVER['REMOTE_ADDR'];
233			$this->sessionType = 'ip';
234		}
235
236		if (!$this->sessionId) { /* if all fails, use random data */
237			$this->sessionId = rand(100000000, 999999999);
238			$this->sessionType = 'rnd';
239		}
240
241	}
242
243	public function insertCaptchaCode(Event $event) {
244
245		$useCaptcha = $this->getConf('useCaptcha'); // which background to show?
246
247		// only if we previously determined that we need a captcha:
248		if ($this->showCaptcha == 'Y') {
249
250			echo '<h1 class="sectionedit1">'; tpl_pagetitle(); echo "</h1>\n"; // always show the original page title
251			$event->preventDefault(); // don't show normal content
252			switch ($useCaptcha) {
253				case 'loremipsum':
254					$this->insertLoremIpsum();  // show dada filler instead of text
255					break;
256				case 'dada':
257					$this->insertDadaFiller();  // show dada filler instead of text
258					break;
259			}
260
261			// insert the captcha loader code:
262			echo '<script>' . NL;
263
264			// add the deferred script loader::
265			echo  DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
266			echo  DOKU_TAB . DOKU_TAB . "const cj=document.createElement('script');" . NL;
267			echo  DOKU_TAB . DOKU_TAB . "cj.async=true;cj.defer=true;cj.type='text/javascript';" . NL;
268			echo  DOKU_TAB . DOKU_TAB . "cj.src='".DOKU_BASE."lib/plugins/botmon/captcha.js';" . NL;
269			echo  DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(cj);" . NL;
270			echo  DOKU_TAB . "});" . NL;
271
272			// add the translated strings for the captcha:
273			echo  DOKU_TAB . '$BMLocales = {' . NL;
274			echo  DOKU_TAB . DOKU_TAB . '"dlgTitle": ' . json_encode($this->getLang('bm_dlgTitle')) . ',' . NL;
275			echo  DOKU_TAB . DOKU_TAB . '"dlgSubtitle": ' . json_encode($this->getLang('bm_dlgSubtitle')) . ',' . NL;
276			echo  DOKU_TAB . DOKU_TAB . '"dlgConfirm": ' . json_encode($this->getLang('bm_dlgConfirm')) . ',' . NL;
277			echo  DOKU_TAB . DOKU_TAB . '"dlgChecking": ' . json_encode($this->getLang('bm_dlgChecking')) . ',' . NL;
278			echo  DOKU_TAB . DOKU_TAB . '"dlgLoading": ' . json_encode($this->getLang('bm_dlgLoading')) . ',' . NL;
279			echo  DOKU_TAB . DOKU_TAB . '"dlgError": ' . json_encode($this->getLang('bm_dlgError')) . ',' . NL;
280			echo  DOKU_TAB . '};' . NL;
281
282			// captcha configuration options
283			echo  DOKU_TAB . '$BMConfig = {' . NL;
284			echo  DOKU_TAB . DOKU_TAB . '"captchaOptions": ' . json_encode($this->getConf('captchaOptions')) . NL;
285			echo  DOKU_TAB . '};' . NL;
286
287			echo '</script>' . NL;
288
289			// insert a warning message for users without JavaScript:
290			echo '<dialog open closedby="any" id="BM__NoJSWarning"><p>' . $this->getLang('bm_noJsWarning') . '</p></dialog>' . NL;
291
292		}
293	}
294
295	public function showImageCaptcha(Event $event, $param) {
296
297		$useCaptcha = $this->getConf('useCaptcha');
298
299		echo '<script>' . $this->getBMHeader($event, $param) . '</script>';
300
301		$cCode = '-';
302		if ($useCaptcha !== 'disabled') {
303			if ($this->captchaWhitelisted()) {
304				$cCode = 'W'; // whitelisted
305			}
306			elseif ($this->hasCaptchaCookie()) {
307				$cCode  = 'N'; // user already has a cookie
308			}
309			else {
310				$cCode  = 'Y'; // show the captcha
311
312				echo '<svg width="100%" height="100%" viewBox="0 0 800 400" version="1.1" xmlns="http://www.w3.org/2000/svg"><path d="M1,1l798,398" style="fill:none;stroke:#f00;stroke-width:1px;"/><path d="M1,399l798,-398" style="fill:none;stroke:#f00;stroke-width:1px;"/><rect x="1" y="1" width="798" height="398" style="fill:none;stroke:#000;stroke-width:1px;"/></svg>'; // placeholder image
313				$event->preventDefault(); // don't show normal content
314
315				// TODO Insert dummy image
316				$this->insertCaptchaLoader(); // and load the captcha
317			}
318		};
319
320		$this->showCaptcha = $cCode; // store the captcha code for the logfile
321	}
322
323	/**
324	 * Checks if the user has a valid captcha cookie.
325	 *
326	 * @return boolean
327	 * @access private
328	 *
329	 **/
330	private function hasCaptchaCookie() {
331
332		$cookieVal = isset($_COOKIE['DWConfirm']) ? $_COOKIE['DWConfirm'] : null;
333
334		// bypass cookie checking, of config option is set:
335		$captchaOptions = explode(',', $this->getConf('captchaOptions'));
336		if (in_array('anyval', $captchaOptions) && strlen($cookieVal) == 64) {
337			//$this->writeCaptchaLog($_SERVER['REMOTE_ADDR'], $cookieVal, $_SERVER['SERVER_NAME'], "BYPASSED:" . strlen($cookieVal)); // Debug only
338			return true;
339		}
340
341		//  calculate the expected cookie value:
342		$today = substr((new DateTime())->format('c'), 0, 10);
343		$raw = $this->getConf('captchaSeed') . ';' . $_SERVER['SERVER_NAME'] . ';' . $_SERVER['REMOTE_ADDR'] . ';' . $today;
344		$expected = hash('sha256', $raw);
345
346		// for debugging: write captcha data to the log:
347		//$this->writeCaptchaLog($_SERVER['REMOTE_ADDR'], $cookieVal, $_SERVER['SERVER_NAME'], $expected);
348
349		return $cookieVal == $expected;
350	}
351
352	/**
353	 * Writes data to the captcha log.
354	 *
355	 * @return void
356	 */
357	private function writeCaptchaLog($remote_addr, $cookieVal, $serverName, $expected) {
358
359		global $INFO;
360
361		$logArr = Array(
362			$remote_addr, /* remote IP */
363			$cookieVal, /* cookie value */
364			$this->getConf('captchaSeed'), /* seed */
365			$serverName, /* server name */
366			$expected, /* expected cookie value */
367			($cookieVal == $expected ? 'MATCH' : 'WRONG'), /* cookie matches expected value? */
368			$_SERVER['REQUEST_URI'] /* request URI */
369		);
370
371		//* create the log line */
372		$filename = __DIR__ .'/logs/' . gmdate('Y-m-d') . '.captcha.txt'; /* use GMT date for filename */
373		$logline = gmdate('Y-m-d H:i:s'); /* use GMT time for log entries */
374		foreach ($logArr as $tab) {
375			$logline .= "\t" . $tab;
376		};
377
378		/* write the log line to the file */
379		$logfile = fopen($filename, 'a');
380		if (!$logfile) die();
381		if (fwrite($logfile, $logline . "\n") === false) {
382			fclose($logfile);
383			die();
384		}
385
386		// in case of errors, write the cookie data to the log:
387		if (!$cookieVal) {
388			$logline =  print_r($_COOKIE, true);
389			if (fwrite($logfile, $logline . "\n") === false) {
390				fclose($logfile);
391				die();
392			}
393		}
394
395		/* Done. close the file. */
396		fclose($logfile);
397	}
398
399	// check if the visitor's IP is on a whitelist:
400	private function captchaWhitelisted() {
401
402		// normalise IP address:
403		$ip = inet_pton($_SERVER['REMOTE_ADDR']);
404
405		// find which file to open:
406		$prefixes = ['user', 'default'];
407		foreach ($prefixes as $pre) {
408			$filename = __DIR__ .'/config/' . $pre . '-whitelist.txt';
409			if (file_exists($filename)) {
410				break;
411			}
412		}
413
414		if (file_exists($filename)) {
415			$lines = file($filename, FILE_SKIP_EMPTY_LINES);
416			foreach ($lines as $line) {
417				if (trim($line) !== '' && !str_starts_with($line, '#')) {
418					$col = explode("\t", $line);
419					if (count($col) >= 2) {
420						$from = inet_pton($col[0]);
421						$to = inet_pton($col[1]);
422
423						if ($ip >= $from && $ip <= $to) {
424							return true; /* IP whitelisted */
425						}
426					}
427				}
428			}
429		}
430		return false; /* IP not found in whitelist */
431	}
432
433	// inserts a static text content in place of the actual page content:
434	private function insertLoremIpsum() {
435
436		echo '<div class="level1">' . NL;
437		echo '<p>' . NL . 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.'. NL . '</p>' . NL;
438		echo '<p>' . NL . 'At vero eos et accusamus et iusto odio dignissimos ducimus, qui blanditiis praesentium voluptatum deleniti atque corrupti, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga.'. NL . '</p>' . NL;
439		echo '</div>' . NL;
440
441	}
442
443	/* Generates a few paragraphs of Dada text to show instead of the article content */
444	private function insertDadaFiller() {
445
446		global $conf;
447		global $TOC;
448		global $ID;
449
450		// list of languages to search for the wordlist
451		$langs = array_unique([$conf['lang'], 'la']);
452
453		// find path to the first available wordlist:
454		foreach ($langs as $lang) {
455			$filename = __DIR__ .'/lang/' . $lang . '/wordlist.txt'; /* language-specific wordlist */
456			if (file_exists($filename)) {
457				break;
458			}
459		}
460
461		// load the wordlist file:
462		if (file_exists($filename)) {
463			$words = array();
464			$totalWeight = 0;
465			$lines = file($filename, FILE_SKIP_EMPTY_LINES);
466			foreach ($lines as $line) {
467				$arr = explode("\t", $line);
468				$arr[1] = ( count($arr) > 1 ? (int) trim($arr[1]) : 1 );
469				$totalWeight += (int) $arr[1];
470				array_push($words, $arr);
471			}
472		} else {
473			echo '<script> console.log("Can’t generate filler text: wordlist file not found!"); </script>';
474			return;
475		}
476
477		// If a TOC exists, use it for the headlines:
478		if(is_array($TOC)) {
479			$toc = $TOC;
480		} else {
481			$meta = p_get_metadata($ID, '', METADATA_RENDER_USING_CACHE);
482			//$tocok = (isset($meta['internal']['toc']) ? $meta['internal']['toc'] : $tocok = true);
483			$toc = isset($meta['description']['tableofcontents']) ? $meta['description']['tableofcontents'] : null;
484		}
485		if (!$toc) { // no TOC, generate my own:
486			$hlCount = mt_rand(0, (int) $conf['tocminheads']);
487			$toc = array();
488			for ($i=0; $i<$hlCount; $i++) {
489				array_push($toc, $this->dadaMakeHeadline($words, $totalWeight)); // $toc
490			}
491		}
492
493		// if H1 heading is not in the TOC, add a chappeau section:
494		$chapeauCount = mt_rand(1, 3);
495		if ((int) $conf['toptoclevel'] > 1) {
496			echo "<div class=\"level1\">\n";
497			for ($i=0; $i<$chapeauCount; $i++) {
498				echo $this->dadaMakeParagraph($words, $totalWeight);
499			}
500			echo "</div>\n";
501		}
502
503		//  text sections for each sub-headline:
504		foreach ($toc as $hl) {
505			echo $this->dadaMakeSection($words, $totalWeight, $hl);
506		}
507	}
508
509	private function dadaMakeSection($words, $totalWeight, $hl) {
510
511		global $conf;
512
513		// how many paragraphs?
514		$paragraphCount = mt_rand(1, 4);
515
516		// section level
517		$topTocLevel = (int) $conf['toptoclevel'];
518		$secLevel = $hl['level'] + 1;;
519
520		// return value:
521		$sec = "";
522
523		// make a headline:
524		if ($topTocLevel > 1 || $secLevel > 1) {
525			$sec .= "<h{$secLevel} id=\"{$hl['hid']}\">{$hl['title']}</h{$secLevel}>\n";
526		}
527
528		// add the paragraphs:
529		$sec .= "<div class=\"level{$secLevel}\">\n";
530		for ($i=0; $i<$paragraphCount; $i++) {
531			$sec .= $this->dadaMakeParagraph($words, $totalWeight);
532		}
533		$sec .= "</div>\n";
534
535		return $sec;
536	}
537
538	private function dadaMakeHeadline($words, $totalWeight) {
539
540		// how many words to generate?
541		$wordCount = mt_rand(2, 5);
542
543		// function returns an array:
544		$r = Array();
545
546		// generate the headline:
547		$hlArr = array();
548		for ($i=0; $i<$wordCount; $i++) {
549			array_push($hlArr, $this->dadaSelectRandomWord($words, $totalWeight));
550		}
551
552		$r['title'] =  ucfirst(implode(' ', $hlArr));
553
554		$r['hid'] = preg_replace('/[^\w\d\-]+/i', '_', strtolower($r['title']));
555		$r['type'] = 'ul'; // always ul!
556		$r['level'] = 1; // always level 1 for now
557
558		return $r;
559	}
560
561	private function dadaMakeParagraph($words, $totalWeight) {
562
563		// how many words to generate?
564		$sentenceCount = mt_rand(2, 5);
565
566		$paragraph = array();
567		for ($i=0; $i<$sentenceCount; $i++) {
568			array_push($paragraph, $this->dadaMakeSentence($words, $totalWeight));
569		}
570
571		return "<p>\n" . implode(' ', $paragraph) . "\n</p>\n";
572
573	}
574
575	private function dadaMakeSentence($words, $totalWeight) {
576
577		// how many words to generate?
578		$wordCount = mt_rand(4, 20);
579
580		// generate the sentence:
581		$sentence = array();
582		for ($i=0; $i<$wordCount; $i++) {
583			array_push($sentence, $this->dadaSelectRandomWord($words, $totalWeight));
584		}
585
586		return ucfirst(implode(' ', $sentence)) . '.';
587
588	}
589
590	private function dadaSelectRandomWord($list, $totalWeight) {
591
592		// get a random selection:
593		$rand = mt_rand(0, $totalWeight);
594
595		// match the selection to the weighted list:
596		$cumulativeWeight = 0;
597		for ($i=0; $i<count($list); $i++) {
598			$cumulativeWeight += $list[$i][1];
599			if ($cumulativeWeight >= $rand) {
600				return $list[$i][0];
601			}
602		}
603		return '***';
604	}
605
606}
607