xref: /plugin/botmon/action.php (revision ac32818834562efe63405737a5d446e98ee81226)
1<?php
2
3use dokuwiki\Extension\EventHandler;
4use dokuwiki\Extension\Event;
5use dokuwiki\Logger;
6
7/**
8 * Action Component for the Bot Monitoring Plugin
9 *
10 * @license	GPL 3 (http://www.gnu.org/licenses/gpl.html)
11 * @author	 Sascha Leib <sascha.leib(at)kolmio.com>
12 */
13
14class action_plugin_botmon extends DokuWiki_Action_Plugin {
15
16	public function __construct() {
17
18		// determine if a captcha should be loaded:
19		$this->showCaptcha = 'Z'; // Captcha unknown
20
21		$useCaptcha = $this->getConf('useCaptcha'); // should we show a captcha?
22
23		if ($useCaptcha !== 'disabled') {
24			if ($_SERVER['REQUEST_METHOD'] == 'HEAD') {
25				$this->showCaptcha = 'H'; // Method is HEAD, no need for captcha
26			} elseif ($this->captchaWhitelisted()) {
27				$this->showCaptcha = 'W'; // IP is whitelisted, no captcha
28			} elseif ($this->hasCaptchaCookie()) {
29				$this->showCaptcha = 'N'; // No, user already has a cookie, don't show the captcha
30			} else {
31				$this->showCaptcha = 'Y'; // Yes, show the captcha
32			}
33		}
34	}
35
36	/**
37	 * Registers a callback functions
38	 *
39	 * @param EventHandler $controller DokuWiki's event controller object
40	 * @return void
41	 */
42	public function register(EventHandler $controller) {
43
44		global $ACT;
45
46		// populate the session id and type:
47		$this->setSessionInfo();
48
49		// insert header data into the page:
50		if ($ACT == 'show' || $ACT == 'edit' || $ACT == 'media') {
51			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertHeader');
52
53			// Override the page rendering, if a captcha needs to be displayed:
54			$controller->register_hook('TPL_ACT_RENDER', 'BEFORE', $this, 'insertCaptchaCode');
55
56		} else if ($ACT == 'admin' && isset($_REQUEST['page']) && $_REQUEST['page'] == 'botmon') {
57			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertAdminHeader');
58		}
59
60		// also show a captcha before the image preview
61		$controller->register_hook('TPL_IMG_DISPLAY', 'BEFORE', $this, 'showImageCaptcha');
62
63		// write to the log after the page content was displayed:
64		$controller->register_hook('TPL_CONTENT_DISPLAY', 'AFTER', $this, 'writeServerLog');
65
66	}
67
68	/* session information */
69	private $sessionId = null;
70	private $sessionType = '';
71	private $showCaptcha = 'X';
72
73	/**
74	 * Inserts tracking code to the page header
75	 * (only called on 'show' actions)
76	 *
77	 * @param Event $event event object by reference
78	 * @return void
79	 */
80	public function insertHeader(Event $event, $param) {
81
82		global $INFO;
83
84
85		// build the tracker code:
86		$code = $this->getBMHeader();
87
88		// add the deferred script loader::
89		$code .= DOKU_TAB . DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
90		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "const e=document.createElement('script');" . NL;
91		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.async=true;e.defer=true;" . NL;
92		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.src='".DOKU_BASE."lib/plugins/botmon/client.js';" . NL;
93		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(e);" . NL;
94		$code .= DOKU_TAB . DOKU_TAB . "});";
95		$event->data['script'][] = ['_data' => $code];
96	}
97
98	/* create the BM object code for insertion into a script element: */
99	private function getBMHeader() {
100
101		// build the tracker code:
102		$code = DOKU_TAB . DOKU_TAB . "document._botmon = {t0: Date.now(), session: " . json_encode($this->sessionId) . ", seed: " . json_encode($this->getConf('captchaSeed')) . ", ip: " . json_encode($_SERVER['REMOTE_ADDR']) . "};" . NL;
103
104		// is there a user logged in?
105		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name']) ?  $INFO['userinfo']['name'] : '');
106		if ($username) {
107			$code .= DOKU_TAB . DOKU_TAB . 'document._botmon.user = "' . $username . '";'. NL;
108		}
109
110		return $code;
111
112	}
113
114	/**
115	 * Inserts tracking code to the page header
116	 * (only called on 'show' actions)
117	 *
118	 * @param Event $event event object by reference
119	 * @return void
120	 */
121	public function insertAdminHeader(Event $event, $param) {
122
123		$event->data['link'][] = ['rel' => 'stylesheet', 'href' => DOKU_BASE.'lib/plugins/botmon/admin.css', 'defer' => 'defer'];
124		$event->data['script'][] = ['src' => DOKU_BASE.'lib/plugins/botmon/admin.js', 'defer' => 'defer', '_data' => ''];
125	}
126
127	/**
128	 * Writes data to the server log.
129	 *
130	 * @return void
131	 */
132	public function writeServerLog(Event $event, $param) {
133
134		global $conf;
135		global $INFO;
136
137		// is there a user logged in?
138		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name'])
139					?  $INFO['userinfo']['name'] : '');
140
141		// clean the page ID
142		$pageId = preg_replace('/[\x00-\x1F]/', "\u{FFFD}", $INFO['id'] ?? '');
143
144		// create the log array:
145		$logArr = Array(
146			$_SERVER['REMOTE_ADDR'], /* remote IP */
147			$pageId, /* page ID */
148			$this->sessionId, /* Session ID */
149			$this->sessionType, /* session ID type */
150			$username, /* user name */
151			$_SERVER['HTTP_USER_AGENT'] ?? '', /* User agent */
152			$_SERVER['HTTP_REFERER'] ?? '', /* HTTP Referrer */
153			substr($conf['lang'],0,2), /* page language */
154			implode(',', array_unique(array_map( function($it) { return substr(trim($it),0,2); }, explode(',',trim($_SERVER['HTTP_ACCEPT_LANGUAGE'], " \t;,*"))))), /* accepted client languages */
155			$this->getCountryCode(), /* GeoIP country code */
156			$this->showCaptcha, /* show captcha? */
157			$_SERVER['REQUEST_METHOD'] ?? '' /* request method */
158		);
159
160		//* create the log line */
161		$filename = __DIR__ .'/logs/' . gmdate('Y-m-d') . '.srv.txt'; /* use GMT date for filename */
162		$logline = gmdate('Y-m-d H:i:s'); /* use GMT time for log entries */
163		foreach ($logArr as $tab) {
164			$logline .= "\t" . $tab;
165		};
166
167		/* write the log line to the file */
168		$logfile = fopen($filename, 'a');
169		if (!$logfile) die();
170		if (fwrite($logfile, $logline . "\n") === false) {
171			fclose($logfile);
172			die();
173		}
174
175		/* Done */
176		fclose($logfile);
177	}
178
179	private function getCountryCode() {
180
181		$country = ( $_SERVER['REMOTE_ADDR'] == '127.0.0.1' ? 'local' : 'ZZ' ); // default if no geoip is available!
182
183		$lib = $this->getConf('geoiplib'); /* which library to use? (can only be phpgeoip or disabled) */
184
185		try {
186
187			// use GeoIP module?
188			if ($lib == 'phpgeoip' && extension_loaded('geoip') && geoip_db_avail(GEOIP_COUNTRY_EDITION)) { // Use PHP GeoIP module
189				$result = geoip_country_code_by_name($_SERVER['REMOTE_ADDR']);
190				$country = ($result ? $result : $country);
191			}
192		} catch (Exception $e) {
193			Logger::error('BotMon Plugin: GeoIP Error', $e->getMessage());
194		}
195
196		return $country;
197	}
198
199	private function setSessionInfo() {
200
201		// what is the session identifier?
202		if (isset($_SESSION)) {
203			$sesKeys = array_keys($_SESSION); /* DokuWiki Session ID preferred */
204			foreach ($sesKeys as $key) {
205				if (substr($key, 0, 2) == 'DW') {
206					$this->sessionId = $key;
207					$this->sessionType = 'dw';
208					return;
209				}
210			}
211		}
212		if (!$this->sessionId) { /* no DokuWiki Session ID, try PHP session ID */
213			$this->sessionId = session_id();
214			$this->sessionType = 'php';
215		}
216		if (!$this->sessionId) { /* no PHP session ID, try IP address */
217			$this->sessionId = $_SERVER['REMOTE_ADDR'];
218			$this->sessionType = 'ip';
219		}
220
221		if (!$this->sessionId) { /* if all fails, use random data */
222			$this->sessionId = rand(100000000, 999999999);
223			$this->sessionType = 'rnd';
224		}
225
226	}
227
228	public function insertCaptchaCode(Event $event) {
229
230		$useCaptcha = $this->getConf('useCaptcha'); // which background to show?
231
232		// only if we previously determined that we need a captcha:
233		if ($this->showCaptcha == 'Y') {
234
235			echo '<h1 class="sectionedit1">'; tpl_pagetitle(); echo "</h1>\n"; // always show the original page title
236			$event->preventDefault(); // don't show normal content
237			switch ($useCaptcha) {
238				case 'loremipsum':
239					$this->insertLoremIpsum();  // show dada filler instead of text
240					break;
241				case 'dada':
242					$this->insertDadaFiller();  // show dada filler instead of text
243					break;
244			}
245
246			// insert the captcha loader code:
247			echo '<script>' . NL;
248
249			// add the deferred script loader::
250			echo  DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
251			echo  DOKU_TAB . DOKU_TAB . "const cj=document.createElement('script');" . NL;
252			echo  DOKU_TAB . DOKU_TAB . "cj.async=true;cj.defer=true;cj.type='text/javascript';" . NL;
253			echo  DOKU_TAB . DOKU_TAB . "cj.src='".DOKU_BASE."lib/plugins/botmon/captcha.js';" . NL;
254			echo  DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(cj);" . NL;
255			echo  DOKU_TAB . "});" . NL;
256
257			// add the translated strings for the captcha:
258			echo  DOKU_TAB . '$BMLocales = {' . NL;
259			echo  DOKU_TAB . DOKU_TAB . '"dlgTitle": ' . json_encode($this->getLang('bm_dlgTitle')) . ',' . NL;
260			echo  DOKU_TAB . DOKU_TAB . '"dlgSubtitle": ' . json_encode($this->getLang('bm_dlgSubtitle')) . ',' . NL;
261			echo  DOKU_TAB . DOKU_TAB . '"dlgConfirm": ' . json_encode($this->getLang('bm_dlgConfirm')) . ',' . NL;
262			echo  DOKU_TAB . DOKU_TAB . '"dlgChecking": ' . json_encode($this->getLang('bm_dlgChecking')) . ',' . NL;
263			echo  DOKU_TAB . DOKU_TAB . '"dlgLoading": ' . json_encode($this->getLang('bm_dlgLoading')) . ',' . NL;
264			echo  DOKU_TAB . DOKU_TAB . '"dlgError": ' . json_encode($this->getLang('bm_dlgError')) . ',' . NL;
265			echo  DOKU_TAB . '};' . NL;
266
267			// captcha configuration options
268			echo  DOKU_TAB . '$BMConfig = {' . NL;
269			echo  DOKU_TAB . DOKU_TAB . '"captchaOptions": ' . json_encode($this->getConf('captchaOptions')) . NL;
270			echo  DOKU_TAB . '};' . NL;
271
272			echo '</script>' . NL;
273
274			// insert a warning message for users without JavaScript:
275			echo '<dialog open closedby="any" id="BM__NoJSWarning"><p>' . $this->getLang('bm_noJsWarning') . '</p></dialog>' . NL;
276
277		}
278	}
279
280	public function showImageCaptcha(Event $event, $param) {
281
282		$useCaptcha = $this->getConf('useCaptcha');
283
284		echo '<script>' . $this->getBMHeader($event, $param) . '</script>';
285
286		$cCode = '-';
287		if ($useCaptcha !== 'disabled') {
288			if ($this->captchaWhitelisted()) {
289				$cCode = 'W'; // whitelisted
290			}
291			elseif ($this->hasCaptchaCookie()) {
292				$cCode  = 'N'; // user already has a cookie
293			}
294			else {
295				$cCode  = 'Y'; // show the captcha
296
297				echo '<svg width="100%" height="100%" viewBox="0 0 800 400" version="1.1" xmlns="http://www.w3.org/2000/svg"><path d="M1,1l798,398" style="fill:none;stroke:#f00;stroke-width:1px;"/><path d="M1,399l798,-398" style="fill:none;stroke:#f00;stroke-width:1px;"/><rect x="1" y="1" width="798" height="398" style="fill:none;stroke:#000;stroke-width:1px;"/></svg>'; // placeholder image
298				$event->preventDefault(); // don't show normal content
299
300				// TODO Insert dummy image
301				$this->insertCaptchaLoader(); // and load the captcha
302			}
303		};
304
305		$this->showCaptcha = $cCode; // store the captcha code for the logfile
306	}
307
308	/**
309	 * Checks if the user has a valid captcha cookie.
310	 *
311	 * @return boolean
312	 * @access private
313	 *
314	 **/
315	private function hasCaptchaCookie() {
316
317		$cookieVal = isset($_COOKIE['DWConfirm']) ? $_COOKIE['DWConfirm'] : null;
318
319		// bypass cookie checking, of config option is set:
320		$captchaOptions = explode(',', $this->getConf('captchaOptions'));
321		if (in_array('anyval', $captchaOptions) && strlen($cookieVal) == 64) {
322			//$this->writeCaptchaLog($_SERVER['REMOTE_ADDR'], $cookieVal, $_SERVER['SERVER_NAME'], "BYPASSED:" . strlen($cookieVal)); // Debug only
323			return true;
324		}
325
326		//  calculate the expected cookie value:
327		$today = substr((new DateTime())->format('c'), 0, 10);
328		$raw = $this->getConf('captchaSeed') . ';' . $_SERVER['SERVER_NAME'] . ';' . $_SERVER['REMOTE_ADDR'] . ';' . $today;
329		$expected = hash('sha256', $raw);
330
331		// for debugging: write captcha data to the log:
332		//$this->writeCaptchaLog($_SERVER['REMOTE_ADDR'], $cookieVal, $_SERVER['SERVER_NAME'], $expected);
333
334		return $cookieVal == $expected;
335	}
336
337	/**
338	 * Writes data to the captcha log.
339	 *
340	 * @return void
341	 */
342	private function writeCaptchaLog($remote_addr, $cookieVal, $serverName, $expected) {
343
344		global $INFO;
345
346		$logArr = Array(
347			$remote_addr, /* remote IP */
348			$cookieVal, /* cookie value */
349			$this->getConf('captchaSeed'), /* seed */
350			$serverName, /* server name */
351			$expected, /* expected cookie value */
352			($cookieVal == $expected ? 'MATCH' : 'WRONG'), /* cookie matches expected value? */
353			$_SERVER['REQUEST_URI'] /* request URI */
354		);
355
356		//* create the log line */
357		$filename = __DIR__ .'/logs/' . gmdate('Y-m-d') . '.captcha.txt'; /* use GMT date for filename */
358		$logline = gmdate('Y-m-d H:i:s'); /* use GMT time for log entries */
359		foreach ($logArr as $tab) {
360			$logline .= "\t" . $tab;
361		};
362
363		/* write the log line to the file */
364		$logfile = fopen($filename, 'a');
365		if (!$logfile) die();
366		if (fwrite($logfile, $logline . "\n") === false) {
367			fclose($logfile);
368			die();
369		}
370
371		// in case of errors, write the cookie data to the log:
372		if (!$cookieVal) {
373			$logline =  print_r($_COOKIE, true);
374			if (fwrite($logfile, $logline . "\n") === false) {
375				fclose($logfile);
376				die();
377			}
378		}
379
380		/* Done. close the file. */
381		fclose($logfile);
382	}
383
384	// check if the visitor's IP is on a whitelist:
385	private function captchaWhitelisted() {
386
387		// normalise IP address:
388		$ip = inet_pton($_SERVER['REMOTE_ADDR']);
389
390		// find which file to open:
391		$prefixes = ['user', 'default'];
392		foreach ($prefixes as $pre) {
393			$filename = __DIR__ .'/config/' . $pre . '-whitelist.txt';
394			if (file_exists($filename)) {
395				break;
396			}
397		}
398
399		if (file_exists($filename)) {
400			$lines = file($filename, FILE_SKIP_EMPTY_LINES);
401			foreach ($lines as $line) {
402				if (trim($line) !== '' && !str_starts_with($line, '#')) {
403					$col = explode("\t", $line);
404					if (count($col) >= 2) {
405						$from = inet_pton($col[0]);
406						$to = inet_pton($col[1]);
407
408						if ($ip >= $from && $ip <= $to) {
409							return true; /* IP whitelisted */
410						}
411					}
412				}
413			}
414		}
415		return false; /* IP not found in whitelist */
416	}
417
418	// inserts a static text content in place of the actual page content:
419	private function insertLoremIpsum() {
420
421		echo '<div class="level1">' . NL;
422		echo '<p>' . NL . 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.'. NL . '</p>' . NL;
423		echo '<p>' . NL . 'At vero eos et accusamus et iusto odio dignissimos ducimus, qui blanditiis praesentium voluptatum deleniti atque corrupti, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga.'. NL . '</p>' . NL;
424		echo '</div>' . NL;
425
426	}
427
428	/* Generates a few paragraphs of Dada text to show instead of the article content */
429	private function insertDadaFiller() {
430
431		global $conf;
432		global $TOC;
433		global $ID;
434
435		// list of languages to search for the wordlist
436		$langs = array_unique([$conf['lang'], 'la']);
437
438		// find path to the first available wordlist:
439		foreach ($langs as $lang) {
440			$filename = __DIR__ .'/lang/' . $lang . '/wordlist.txt'; /* language-specific wordlist */
441			if (file_exists($filename)) {
442				break;
443			}
444		}
445
446		// load the wordlist file:
447		if (file_exists($filename)) {
448			$words = array();
449			$totalWeight = 0;
450			$lines = file($filename, FILE_SKIP_EMPTY_LINES);
451			foreach ($lines as $line) {
452				$arr = explode("\t", $line);
453				$arr[1] = ( count($arr) > 1 ? (int) trim($arr[1]) : 1 );
454				$totalWeight += (int) $arr[1];
455				array_push($words, $arr);
456			}
457		} else {
458			echo '<script> console.log("Can’t generate filler text: wordlist file not found!"); </script>';
459			return;
460		}
461
462		// If a TOC exists, use it for the headlines:
463		if(is_array($TOC)) {
464			$toc = $TOC;
465		} else {
466			$meta = p_get_metadata($ID, '', METADATA_RENDER_USING_CACHE);
467			//$tocok = (isset($meta['internal']['toc']) ? $meta['internal']['toc'] : $tocok = true);
468			$toc = isset($meta['description']['tableofcontents']) ? $meta['description']['tableofcontents'] : null;
469		}
470		if (!$toc) { // no TOC, generate my own:
471			$hlCount = mt_rand(0, (int) $conf['tocminheads']);
472			$toc = array();
473			for ($i=0; $i<$hlCount; $i++) {
474				array_push($toc, $this->dadaMakeHeadline($words, $totalWeight)); // $toc
475			}
476		}
477
478		// if H1 heading is not in the TOC, add a chappeau section:
479		$chapeauCount = mt_rand(1, 3);
480		if ((int) $conf['toptoclevel'] > 1) {
481			echo "<div class=\"level1\">\n";
482			for ($i=0; $i<$chapeauCount; $i++) {
483				echo $this->dadaMakeParagraph($words, $totalWeight);
484			}
485			echo "</div>\n";
486		}
487
488		//  text sections for each sub-headline:
489		foreach ($toc as $hl) {
490			echo $this->dadaMakeSection($words, $totalWeight, $hl);
491		}
492	}
493
494	private function dadaMakeSection($words, $totalWeight, $hl) {
495
496		global $conf;
497
498		// how many paragraphs?
499		$paragraphCount = mt_rand(1, 4);
500
501		// section level
502		$topTocLevel = (int) $conf['toptoclevel'];
503		$secLevel = $hl['level'] + 1;;
504
505		// return value:
506		$sec = "";
507
508		// make a headline:
509		if ($topTocLevel > 1 || $secLevel > 1) {
510			$sec .= "<h{$secLevel} id=\"{$hl['hid']}\">{$hl['title']}</h{$secLevel}>\n";
511		}
512
513		// add the paragraphs:
514		$sec .= "<div class=\"level{$secLevel}\">\n";
515		for ($i=0; $i<$paragraphCount; $i++) {
516			$sec .= $this->dadaMakeParagraph($words, $totalWeight);
517		}
518		$sec .= "</div>\n";
519
520		return $sec;
521	}
522
523	private function dadaMakeHeadline($words, $totalWeight) {
524
525		// how many words to generate?
526		$wordCount = mt_rand(2, 5);
527
528		// function returns an array:
529		$r = Array();
530
531		// generate the headline:
532		$hlArr = array();
533		for ($i=0; $i<$wordCount; $i++) {
534			array_push($hlArr, $this->dadaSelectRandomWord($words, $totalWeight));
535		}
536
537		$r['title'] =  ucfirst(implode(' ', $hlArr));
538
539		$r['hid'] = preg_replace('/[^\w\d\-]+/i', '_', strtolower($r['title']));
540		$r['type'] = 'ul'; // always ul!
541		$r['level'] = 1; // always level 1 for now
542
543		return $r;
544	}
545
546	private function dadaMakeParagraph($words, $totalWeight) {
547
548		// how many words to generate?
549		$sentenceCount = mt_rand(2, 5);
550
551		$paragraph = array();
552		for ($i=0; $i<$sentenceCount; $i++) {
553			array_push($paragraph, $this->dadaMakeSentence($words, $totalWeight));
554		}
555
556		return "<p>\n" . implode(' ', $paragraph) . "\n</p>\n";
557
558	}
559
560	private function dadaMakeSentence($words, $totalWeight) {
561
562		// how many words to generate?
563		$wordCount = mt_rand(4, 20);
564
565		// generate the sentence:
566		$sentence = array();
567		for ($i=0; $i<$wordCount; $i++) {
568			array_push($sentence, $this->dadaSelectRandomWord($words, $totalWeight));
569		}
570
571		return ucfirst(implode(' ', $sentence)) . '.';
572
573	}
574
575	private function dadaSelectRandomWord($list, $totalWeight) {
576
577		// get a random selection:
578		$rand = mt_rand(0, $totalWeight);
579
580		// match the selection to the weighted list:
581		$cumulativeWeight = 0;
582		for ($i=0; $i<count($list); $i++) {
583			$cumulativeWeight += $list[$i][1];
584			if ($cumulativeWeight >= $rand) {
585				return $list[$i][0];
586			}
587		}
588		return '***';
589	}
590
591}