xref: /plugin/botmon/action.php (revision f4b231789501fb8709ed71979edd50fac24b34f0)
1<?php
2
3use dokuwiki\Extension\EventHandler;
4use dokuwiki\Extension\Event;
5use dokuwiki\Logger;
6
7/**
8 * Action Component for the Bot Monitoring Plugin
9 *
10 * @license	GPL 3 (http://www.gnu.org/licenses/gpl.html)
11 * @author	 Sascha Leib <sascha.leib(at)kolmio.com>
12 */
13
14class action_plugin_botmon extends DokuWiki_Action_Plugin {
15
16	public function __construct() {
17
18		// determine if a captcha should be loaded:
19		$this->showCaptcha = 'Z'; // Captcha unknown
20
21		$useCaptcha = $this->getConf('useCaptcha'); // should we show a captcha?
22
23		if ($useCaptcha !== 'disabled') {
24			if ($_SERVER['REQUEST_METHOD'] == 'HEAD') {
25				$this->showCaptcha = 'H'; // Method is HEAD, no need for captcha
26			} elseif ($this->captchaWhitelisted()) {
27				$this->showCaptcha = 'W'; // IP is whitelisted, no captcha
28			} elseif ($this->hasCaptchaCookie()) {
29				$this->showCaptcha = 'N'; // No, user already has a cookie, don't show the captcha
30			} else {
31				$this->showCaptcha = 'Y'; // Yes, show the captcha
32			}
33		}
34	}
35
36	/**
37	 * Registers a callback functions
38	 *
39	 * @param EventHandler $controller DokuWiki's event controller object
40	 * @return void
41	 */
42	public function register(EventHandler $controller) {
43
44		global $ACT;
45
46		// populate the session id and type:
47		$this->setSessionInfo();
48
49		// insert header data into the page:
50		if ($ACT == 'show' || $ACT == 'edit' || $ACT == 'media') {
51			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertHeader');
52
53			// Override the page rendering, if a captcha needs to be displayed:
54			$controller->register_hook('TPL_ACT_RENDER', 'BEFORE', $this, 'insertCaptchaCode');
55
56		} else if ($ACT == 'admin' && isset($_REQUEST['page']) && $_REQUEST['page'] == 'botmon') {
57			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertAdminHeader');
58		}
59
60		// also show a captcha before the image preview
61		$controller->register_hook('TPL_IMG_DISPLAY', 'BEFORE', $this, 'showImageCaptcha');
62
63		// write to the log after the page content was displayed:
64		$controller->register_hook('TPL_CONTENT_DISPLAY', 'AFTER', $this, 'writeServerLog');
65
66	}
67
68	/* session information */
69	private $sessionId = null;
70	private $sessionType = '';
71	private $showCaptcha = 'X';
72
73	/**
74	 * Inserts tracking code to the page header
75	 * (only called on 'show' actions)
76	 *
77	 * @param Event $event event object by reference
78	 * @return void
79	 */
80	public function insertHeader(Event $event, $param) {
81
82		global $INFO;
83
84
85		// build the tracker code:
86		$code = $this->getBMHeader();
87
88		// add the deferred script loader::
89		$code .= DOKU_TAB . DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
90		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "const e=document.createElement('script');" . NL;
91		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.async=true;e.defer=true;" . NL;
92		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.src='".DOKU_BASE."lib/plugins/botmon/client.js';" . NL;
93		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(e);" . NL;
94		$code .= DOKU_TAB . DOKU_TAB . "});";
95		$event->data['script'][] = ['_data' => $code];
96	}
97
98	/* create the BM object code for insertion into a script element: */
99	private function getBMHeader() {
100
101		// build the tracker code:
102		$code = DOKU_TAB . DOKU_TAB . "document._botmon = {t0: Date.now(), session: " . json_encode($this->sessionId) . ", seed: " . json_encode($this->getConf('captchaSeed')) . ", ip: " . json_encode($_SERVER['REMOTE_ADDR']) . "};" . NL;
103
104		// is there a user logged in?
105		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name']) ?  $INFO['userinfo']['name'] : '');
106		if ($username) {
107			$code .= DOKU_TAB . DOKU_TAB . 'document._botmon.user = "' . $username . '";'. NL;
108		}
109
110		return $code;
111
112	}
113
114	/**
115	 * Inserts tracking code to the page header
116	 * (only called on 'show' actions)
117	 *
118	 * @param Event $event event object by reference
119	 * @return void
120	 */
121	public function insertAdminHeader(Event $event, $param) {
122
123		$event->data['link'][] = ['rel' => 'stylesheet', 'href' => DOKU_BASE.'lib/plugins/botmon/admin.css', 'defer' => 'defer'];
124		$event->data['script'][] = ['src' => DOKU_BASE.'lib/plugins/botmon/admin.js', 'defer' => 'defer', '_data' => ''];
125	}
126
127	/**
128	 * Writes data to the server log.
129	 *
130	 * @return void
131	 */
132	public function writeServerLog(Event $event, $param) {
133
134		global $conf;
135		global $INFO;
136
137		// is there a user logged in?
138		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name'])
139					?  $INFO['userinfo']['name'] : '');
140
141		// clean the page ID
142		$pageId = preg_replace('/[\x00-\x1F]/', "\u{FFFD}", $INFO['id'] ?? '');
143
144		// create the log array:
145		$logArr = Array(
146			$_SERVER['REMOTE_ADDR'], /* remote IP */
147			$pageId, /* page ID */
148			$this->sessionId, /* Session ID */
149			$this->sessionType, /* session ID type */
150			$username, /* user name */
151			$_SERVER['HTTP_USER_AGENT'] ?? '', /* User agent */
152			$_SERVER['HTTP_REFERER'] ?? '', /* HTTP Referrer */
153			substr($conf['lang'],0,2), /* page language */
154			implode(',', array_unique(array_map( function($it) { return substr(trim($it),0,2); }, explode(',',trim($_SERVER['HTTP_ACCEPT_LANGUAGE'] ?? '', " \t;,*"))))), /* accepted client languages */
155			$this->getCountryCode(), /* GeoIP country code */
156			$this->showCaptcha, /* show captcha? */
157			$_SERVER['REQUEST_METHOD'] ?? '' /* request method */
158		);
159
160		//* create the log line */
161		$filename = __DIR__ .'/logs/' . gmdate('Y-m-d') . '.srv.txt'; /* use GMT date for filename */
162		$logline = gmdate('Y-m-d H:i:s'); /* use GMT time for log entries */
163		foreach ($logArr as $tab) {
164			$logline .= "\t" . $tab;
165		};
166
167		/* write the log line to the file */
168		$logfile = fopen($filename, 'a');
169		if (!$logfile) die();
170		if (fwrite($logfile, $logline . "\n") === false) {
171			fclose($logfile);
172			die();
173		}
174
175		/* Done */
176		fclose($logfile);
177	}
178
179	private function getCountryCode() {
180
181		$country = ( $_SERVER['REMOTE_ADDR'] == '127.0.0.1' ? 'local' : 'ZZ' ); // default if no geoip is available!
182
183		$lib = $this->getConf('geoiplib'); /* which library to use? (possible values are: disabled, phpgeoip or cloudflare) */
184
185		try {
186
187			switch($lib) {
188
189			case 'phpgeoip':
190				if (extension_loaded('geoip') && geoip_db_avail(GEOIP_COUNTRY_EDITION)) { // PHP GeoIP module available?
191					$result = geoip_country_code_by_name($_SERVER['REMOTE_ADDR']);
192					$country = ($result ? $result : $country);
193				}
194				break;
195
196			case 'cloudflare':
197				$result = $_SERVER['HTTP_CF_IPCOUNTRY'] ?? null;
198				$country = ( $result == 'XX' || $result === null ? 'ZZ' : $result ); // Cloudflare returns 'XX' for unknown countries, we want 'ZZ' in that case
199				break;
200
201			}
202
203
204		} catch (Exception $e) {
205			Logger::error('BotMon Plugin: GeoIP Error', $e->getMessage());
206		}
207
208		return $country;
209	}
210
211	private function setSessionInfo() {
212
213		// what is the session identifier?
214		if (isset($_SESSION)) {
215			$sesKeys = array_keys($_SESSION); /* DokuWiki Session ID preferred */
216			foreach ($sesKeys as $key) {
217				if (substr($key, 0, 2) == 'DW') {
218					$this->sessionId = $key;
219					$this->sessionType = 'dw';
220					return;
221				}
222			}
223		}
224		if (!$this->sessionId) { /* no DokuWiki Session ID, try PHP session ID */
225			$this->sessionId = session_id();
226			$this->sessionType = 'php';
227		}
228		if (!$this->sessionId) { /* no PHP session ID, try IP address */
229			$this->sessionId = $_SERVER['REMOTE_ADDR'];
230			$this->sessionType = 'ip';
231		}
232
233		if (!$this->sessionId) { /* if all fails, use random data */
234			$this->sessionId = rand(100000000, 999999999);
235			$this->sessionType = 'rnd';
236		}
237
238	}
239
240	public function insertCaptchaCode(Event $event) {
241
242		$useCaptcha = $this->getConf('useCaptcha'); // which background to show?
243
244		// only if we previously determined that we need a captcha:
245		if ($this->showCaptcha == 'Y') {
246
247			echo '<h1 class="sectionedit1">'; tpl_pagetitle(); echo "</h1>\n"; // always show the original page title
248			$event->preventDefault(); // don't show normal content
249			switch ($useCaptcha) {
250				case 'loremipsum':
251					$this->insertLoremIpsum();  // show dada filler instead of text
252					break;
253				case 'dada':
254					$this->insertDadaFiller();  // show dada filler instead of text
255					break;
256			}
257
258			// insert the captcha loader code:
259			echo '<script>' . NL;
260
261			// add the deferred script loader::
262			echo  DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
263			echo  DOKU_TAB . DOKU_TAB . "const cj=document.createElement('script');" . NL;
264			echo  DOKU_TAB . DOKU_TAB . "cj.async=true;cj.defer=true;cj.type='text/javascript';" . NL;
265			echo  DOKU_TAB . DOKU_TAB . "cj.src='".DOKU_BASE."lib/plugins/botmon/captcha.js';" . NL;
266			echo  DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(cj);" . NL;
267			echo  DOKU_TAB . "});" . NL;
268
269			// add the translated strings for the captcha:
270			echo  DOKU_TAB . '$BMLocales = {' . NL;
271			echo  DOKU_TAB . DOKU_TAB . '"dlgTitle": ' . json_encode($this->getLang('bm_dlgTitle')) . ',' . NL;
272			echo  DOKU_TAB . DOKU_TAB . '"dlgSubtitle": ' . json_encode($this->getLang('bm_dlgSubtitle')) . ',' . NL;
273			echo  DOKU_TAB . DOKU_TAB . '"dlgConfirm": ' . json_encode($this->getLang('bm_dlgConfirm')) . ',' . NL;
274			echo  DOKU_TAB . DOKU_TAB . '"dlgChecking": ' . json_encode($this->getLang('bm_dlgChecking')) . ',' . NL;
275			echo  DOKU_TAB . DOKU_TAB . '"dlgLoading": ' . json_encode($this->getLang('bm_dlgLoading')) . ',' . NL;
276			echo  DOKU_TAB . DOKU_TAB . '"dlgError": ' . json_encode($this->getLang('bm_dlgError')) . ',' . NL;
277			echo  DOKU_TAB . '};' . NL;
278
279			// captcha configuration options
280			echo  DOKU_TAB . '$BMConfig = {' . NL;
281			echo  DOKU_TAB . DOKU_TAB . '"captchaOptions": ' . json_encode($this->getConf('captchaOptions')) . NL;
282			echo  DOKU_TAB . '};' . NL;
283
284			echo '</script>' . NL;
285
286			// insert a warning message for users without JavaScript:
287			echo '<dialog open closedby="any" id="BM__NoJSWarning"><p>' . $this->getLang('bm_noJsWarning') . '</p></dialog>' . NL;
288
289		}
290	}
291
292	public function showImageCaptcha(Event $event, $param) {
293
294		$useCaptcha = $this->getConf('useCaptcha');
295
296		echo '<script>' . $this->getBMHeader($event, $param) . '</script>';
297
298		$cCode = '-';
299		if ($useCaptcha !== 'disabled') {
300			if ($this->captchaWhitelisted()) {
301				$cCode = 'W'; // whitelisted
302			}
303			elseif ($this->hasCaptchaCookie()) {
304				$cCode  = 'N'; // user already has a cookie
305			}
306			else {
307				$cCode  = 'Y'; // show the captcha
308
309				echo '<svg width="100%" height="100%" viewBox="0 0 800 400" version="1.1" xmlns="http://www.w3.org/2000/svg"><path d="M1,1l798,398" style="fill:none;stroke:#f00;stroke-width:1px;"/><path d="M1,399l798,-398" style="fill:none;stroke:#f00;stroke-width:1px;"/><rect x="1" y="1" width="798" height="398" style="fill:none;stroke:#000;stroke-width:1px;"/></svg>'; // placeholder image
310				$event->preventDefault(); // don't show normal content
311
312				// TODO Insert dummy image
313				$this->insertCaptchaLoader(); // and load the captcha
314			}
315		};
316
317		$this->showCaptcha = $cCode; // store the captcha code for the logfile
318	}
319
320	/**
321	 * Checks if the user has a valid captcha cookie.
322	 *
323	 * @return boolean
324	 * @access private
325	 *
326	 **/
327	private function hasCaptchaCookie() {
328
329		$cookieVal = isset($_COOKIE['DWConfirm']) ? $_COOKIE['DWConfirm'] : null;
330
331		// bypass cookie checking, of config option is set:
332		$captchaOptions = explode(',', $this->getConf('captchaOptions'));
333		if (in_array('anyval', $captchaOptions) && strlen($cookieVal) == 64) {
334			//$this->writeCaptchaLog($_SERVER['REMOTE_ADDR'], $cookieVal, $_SERVER['SERVER_NAME'], "BYPASSED:" . strlen($cookieVal)); // Debug only
335			return true;
336		}
337
338		//  calculate the expected cookie value:
339		$today = substr((new DateTime())->format('c'), 0, 10);
340		$raw = $this->getConf('captchaSeed') . ';' . $_SERVER['SERVER_NAME'] . ';' . $_SERVER['REMOTE_ADDR'] . ';' . $today;
341		$expected = hash('sha256', $raw);
342
343		// for debugging: write captcha data to the log:
344		//$this->writeCaptchaLog($_SERVER['REMOTE_ADDR'], $cookieVal, $_SERVER['SERVER_NAME'], $expected);
345
346		return $cookieVal == $expected;
347	}
348
349	/**
350	 * Writes data to the captcha log.
351	 *
352	 * @return void
353	 */
354	private function writeCaptchaLog($remote_addr, $cookieVal, $serverName, $expected) {
355
356		global $INFO;
357
358		$logArr = Array(
359			$remote_addr, /* remote IP */
360			$cookieVal, /* cookie value */
361			$this->getConf('captchaSeed'), /* seed */
362			$serverName, /* server name */
363			$expected, /* expected cookie value */
364			($cookieVal == $expected ? 'MATCH' : 'WRONG'), /* cookie matches expected value? */
365			$_SERVER['REQUEST_URI'] /* request URI */
366		);
367
368		//* create the log line */
369		$filename = __DIR__ .'/logs/' . gmdate('Y-m-d') . '.captcha.txt'; /* use GMT date for filename */
370		$logline = gmdate('Y-m-d H:i:s'); /* use GMT time for log entries */
371		foreach ($logArr as $tab) {
372			$logline .= "\t" . $tab;
373		};
374
375		/* write the log line to the file */
376		$logfile = fopen($filename, 'a');
377		if (!$logfile) die();
378		if (fwrite($logfile, $logline . "\n") === false) {
379			fclose($logfile);
380			die();
381		}
382
383		// in case of errors, write the cookie data to the log:
384		if (!$cookieVal) {
385			$logline =  print_r($_COOKIE, true);
386			if (fwrite($logfile, $logline . "\n") === false) {
387				fclose($logfile);
388				die();
389			}
390		}
391
392		/* Done. close the file. */
393		fclose($logfile);
394	}
395
396	// check if the visitor's IP is on a whitelist:
397	private function captchaWhitelisted() {
398
399		// normalise IP address:
400		$ip = inet_pton($_SERVER['REMOTE_ADDR']);
401
402		// find which file to open:
403		$prefixes = ['user', 'default'];
404		foreach ($prefixes as $pre) {
405			$filename = __DIR__ .'/config/' . $pre . '-whitelist.txt';
406			if (file_exists($filename)) {
407				break;
408			}
409		}
410
411		if (file_exists($filename)) {
412			$lines = file($filename, FILE_SKIP_EMPTY_LINES);
413			foreach ($lines as $line) {
414				if (trim($line) !== '' && !str_starts_with($line, '#')) {
415					$col = explode("\t", $line);
416					if (count($col) >= 2) {
417						$from = inet_pton($col[0]);
418						$to = inet_pton($col[1]);
419
420						if ($ip >= $from && $ip <= $to) {
421							return true; /* IP whitelisted */
422						}
423					}
424				}
425			}
426		}
427		return false; /* IP not found in whitelist */
428	}
429
430	// inserts a static text content in place of the actual page content:
431	private function insertLoremIpsum() {
432
433		echo '<div class="level1">' . NL;
434		echo '<p>' . NL . 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.'. NL . '</p>' . NL;
435		echo '<p>' . NL . 'At vero eos et accusamus et iusto odio dignissimos ducimus, qui blanditiis praesentium voluptatum deleniti atque corrupti, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga.'. NL . '</p>' . NL;
436		echo '</div>' . NL;
437
438	}
439
440	/* Generates a few paragraphs of Dada text to show instead of the article content */
441	private function insertDadaFiller() {
442
443		global $conf;
444		global $TOC;
445		global $ID;
446
447		// list of languages to search for the wordlist
448		$langs = array_unique([$conf['lang'], 'la']);
449
450		// find path to the first available wordlist:
451		foreach ($langs as $lang) {
452			$filename = __DIR__ .'/lang/' . $lang . '/wordlist.txt'; /* language-specific wordlist */
453			if (file_exists($filename)) {
454				break;
455			}
456		}
457
458		// load the wordlist file:
459		if (file_exists($filename)) {
460			$words = array();
461			$totalWeight = 0;
462			$lines = file($filename, FILE_SKIP_EMPTY_LINES);
463			foreach ($lines as $line) {
464				$arr = explode("\t", $line);
465				$arr[1] = ( count($arr) > 1 ? (int) trim($arr[1]) : 1 );
466				$totalWeight += (int) $arr[1];
467				array_push($words, $arr);
468			}
469		} else {
470			echo '<script> console.log("Can’t generate filler text: wordlist file not found!"); </script>';
471			return;
472		}
473
474		// If a TOC exists, use it for the headlines:
475		if(is_array($TOC)) {
476			$toc = $TOC;
477		} else {
478			$meta = p_get_metadata($ID, '', METADATA_RENDER_USING_CACHE);
479			//$tocok = (isset($meta['internal']['toc']) ? $meta['internal']['toc'] : $tocok = true);
480			$toc = isset($meta['description']['tableofcontents']) ? $meta['description']['tableofcontents'] : null;
481		}
482		if (!$toc) { // no TOC, generate my own:
483			$hlCount = mt_rand(0, (int) $conf['tocminheads']);
484			$toc = array();
485			for ($i=0; $i<$hlCount; $i++) {
486				array_push($toc, $this->dadaMakeHeadline($words, $totalWeight)); // $toc
487			}
488		}
489
490		// if H1 heading is not in the TOC, add a chappeau section:
491		$chapeauCount = mt_rand(1, 3);
492		if ((int) $conf['toptoclevel'] > 1) {
493			echo "<div class=\"level1\">\n";
494			for ($i=0; $i<$chapeauCount; $i++) {
495				echo $this->dadaMakeParagraph($words, $totalWeight);
496			}
497			echo "</div>\n";
498		}
499
500		//  text sections for each sub-headline:
501		foreach ($toc as $hl) {
502			echo $this->dadaMakeSection($words, $totalWeight, $hl);
503		}
504	}
505
506	private function dadaMakeSection($words, $totalWeight, $hl) {
507
508		global $conf;
509
510		// how many paragraphs?
511		$paragraphCount = mt_rand(1, 4);
512
513		// section level
514		$topTocLevel = (int) $conf['toptoclevel'];
515		$secLevel = $hl['level'] + 1;;
516
517		// return value:
518		$sec = "";
519
520		// make a headline:
521		if ($topTocLevel > 1 || $secLevel > 1) {
522			$sec .= "<h{$secLevel} id=\"{$hl['hid']}\">{$hl['title']}</h{$secLevel}>\n";
523		}
524
525		// add the paragraphs:
526		$sec .= "<div class=\"level{$secLevel}\">\n";
527		for ($i=0; $i<$paragraphCount; $i++) {
528			$sec .= $this->dadaMakeParagraph($words, $totalWeight);
529		}
530		$sec .= "</div>\n";
531
532		return $sec;
533	}
534
535	private function dadaMakeHeadline($words, $totalWeight) {
536
537		// how many words to generate?
538		$wordCount = mt_rand(2, 5);
539
540		// function returns an array:
541		$r = Array();
542
543		// generate the headline:
544		$hlArr = array();
545		for ($i=0; $i<$wordCount; $i++) {
546			array_push($hlArr, $this->dadaSelectRandomWord($words, $totalWeight));
547		}
548
549		$r['title'] =  ucfirst(implode(' ', $hlArr));
550
551		$r['hid'] = preg_replace('/[^\w\d\-]+/i', '_', strtolower($r['title']));
552		$r['type'] = 'ul'; // always ul!
553		$r['level'] = 1; // always level 1 for now
554
555		return $r;
556	}
557
558	private function dadaMakeParagraph($words, $totalWeight) {
559
560		// how many words to generate?
561		$sentenceCount = mt_rand(2, 5);
562
563		$paragraph = array();
564		for ($i=0; $i<$sentenceCount; $i++) {
565			array_push($paragraph, $this->dadaMakeSentence($words, $totalWeight));
566		}
567
568		return "<p>\n" . implode(' ', $paragraph) . "\n</p>\n";
569
570	}
571
572	private function dadaMakeSentence($words, $totalWeight) {
573
574		// how many words to generate?
575		$wordCount = mt_rand(4, 20);
576
577		// generate the sentence:
578		$sentence = array();
579		for ($i=0; $i<$wordCount; $i++) {
580			array_push($sentence, $this->dadaSelectRandomWord($words, $totalWeight));
581		}
582
583		return ucfirst(implode(' ', $sentence)) . '.';
584
585	}
586
587	private function dadaSelectRandomWord($list, $totalWeight) {
588
589		// get a random selection:
590		$rand = mt_rand(0, $totalWeight);
591
592		// match the selection to the weighted list:
593		$cumulativeWeight = 0;
594		for ($i=0; $i<count($list); $i++) {
595			$cumulativeWeight += $list[$i][1];
596			if ($cumulativeWeight >= $rand) {
597				return $list[$i][0];
598			}
599		}
600		return '***';
601	}
602
603}
604