xref: /plugin/botmon/action.php (revision 19b69b647da12be70cee3430601d5917bad20dca)
1<?php
2
3use dokuwiki\Extension\EventHandler;
4use dokuwiki\Extension\Event;
5use dokuwiki\Logger;
6
7/**
8 * Action Component for the Bot Monitoring Plugin
9 *
10 * @license	GPL 3 (http://www.gnu.org/licenses/gpl.html)
11 * @author	 Sascha Leib <sascha.leib(at)kolmio.com>
12 */
13
14class action_plugin_botmon extends DokuWiki_Action_Plugin {
15
16	public function __construct() {
17
18		// determine if a captcha should be loaded:
19		$this->showCaptcha = 'Z';
20
21		$useCaptcha = $this->getConf('useCaptcha'); // should we show a captcha?
22
23		if ($useCaptcha !== 'disabled') {
24			if ($_SERVER['REQUEST_METHOD'] == 'HEAD') {
25				$this->showCaptcha = 'H'; // Method is HEAD, no need for captcha
26			} elseif ($this->captchaWhitelisted()) {
27				$this->showCaptcha = 'W'; // IP is whitelisted, no captcha
28			} elseif ($this->hasCaptchaCookie()) {
29				$this->showCaptcha = 'N'; // No, user already has a cookie, don't show the captcha
30			} else {
31				$this->showCaptcha = 'Y'; // Yes, show the captcha
32			}
33		}
34	}
35
36	/**
37	 * Registers a callback functions
38	 *
39	 * @param EventHandler $controller DokuWiki's event controller object
40	 * @return void
41	 */
42	public function register(EventHandler $controller) {
43
44		global $ACT;
45
46		// populate the session id and type:
47		$this->setSessionInfo();
48
49		// insert header data into the page:
50		if ($ACT == 'show' || $ACT == 'edit' || $ACT == 'media') {
51			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertHeader');
52
53			// Override the page rendering, if a captcha needs to be displayed:
54			$controller->register_hook('TPL_ACT_RENDER', 'BEFORE', $this, 'insertCaptchaCode');
55
56		} else if ($ACT == 'admin' && isset($_REQUEST['page']) && $_REQUEST['page'] == 'botmon') {
57			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertAdminHeader');
58		}
59
60		// also show a captcha before the image preview
61		$controller->register_hook('TPL_IMG_DISPLAY', 'BEFORE', $this, 'showImageCaptcha');
62
63		// write to the log after the page content was displayed:
64		$controller->register_hook('TPL_CONTENT_DISPLAY', 'AFTER', $this, 'writeServerLog');
65
66	}
67
68	/* session information */
69	private $sessionId = null;
70	private $sessionType = '';
71	private $showCaptcha = 'X';
72
73	/**
74	 * Inserts tracking code to the page header
75	 * (only called on 'show' actions)
76	 *
77	 * @param Event $event event object by reference
78	 * @return void
79	 */
80	public function insertHeader(Event $event, $param) {
81
82		global $INFO;
83
84
85		// build the tracker code:
86		$code = $this->getBMHeader();
87
88		// add the deferred script loader::
89		$code .= DOKU_TAB . DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
90		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "const e=document.createElement('script');" . NL;
91		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.async=true;e.defer=true;" . NL;
92		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.src='".DOKU_BASE."lib/plugins/botmon/client.js';" . NL;
93		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(e);" . NL;
94		$code .= DOKU_TAB . DOKU_TAB . "});";
95		$event->data['script'][] = ['_data' => $code];
96	}
97
98	/* create the BM object code for insertion into a script element: */
99	private function getBMHeader() {
100
101		// build the tracker code:
102		$code = DOKU_TAB . DOKU_TAB . "document._botmon = {t0: Date.now(), session: " . json_encode($this->sessionId) . ", seed: " . json_encode($this->getConf('captchaSeed')) . ", ip: " . json_encode($_SERVER['REMOTE_ADDR']) . "};" . NL;
103
104		// is there a user logged in?
105		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name']) ?  $INFO['userinfo']['name'] : '');
106		if ($username) {
107			$code .= DOKU_TAB . DOKU_TAB . 'document._botmon.user = "' . $username . '";'. NL;
108		}
109
110		return $code;
111
112	}
113
114	/**
115	 * Inserts tracking code to the page header
116	 * (only called on 'show' actions)
117	 *
118	 * @param Event $event event object by reference
119	 * @return void
120	 */
121	public function insertAdminHeader(Event $event, $param) {
122
123		$event->data['link'][] = ['rel' => 'stylesheet', 'href' => DOKU_BASE.'lib/plugins/botmon/admin.css', 'defer' => 'defer'];
124		$event->data['script'][] = ['src' => DOKU_BASE.'lib/plugins/botmon/admin.js', 'defer' => 'defer', '_data' => ''];
125	}
126
127	/**
128	 * Writes data to the server log.
129	 *
130	 * @return void
131	 */
132	public function writeServerLog(Event $event, $param) {
133
134		global $conf;
135		global $INFO;
136
137		// is there a user logged in?
138		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name'])
139					?  $INFO['userinfo']['name'] : '');
140
141		// clean the page ID
142		$pageId = preg_replace('/[\x00-\x1F]/', "\u{FFFD}", $INFO['id'] ?? '');
143
144		// create the log array:
145		$logArr = Array(
146			$_SERVER['REMOTE_ADDR'], /* remote IP */
147			$pageId, /* page ID */
148			$this->sessionId, /* Session ID */
149			$this->sessionType, /* session ID type */
150			$username, /* user name */
151			$_SERVER['HTTP_USER_AGENT'] ?? '', /* User agent */
152			$_SERVER['HTTP_REFERER'] ?? '', /* HTTP Referrer */
153			substr($conf['lang'],0,2), /* page language */
154			implode(',', array_unique(array_map( function($it) { return substr(trim($it),0,2); }, explode(',',trim($_SERVER['HTTP_ACCEPT_LANGUAGE'], " \t;,*"))))), /* accepted client languages */
155			$this->getCountryCode(), /* GeoIP country code */
156			$this->showCaptcha /* show captcha? */
157		);
158
159		//* create the log line */
160		$filename = __DIR__ .'/logs/' . gmdate('Y-m-d') . '.srv.txt'; /* use GMT date for filename */
161		$logline = gmdate('Y-m-d H:i:s'); /* use GMT time for log entries */
162		foreach ($logArr as $tab) {
163			$logline .= "\t" . $tab;
164		};
165
166		/* write the log line to the file */
167		$logfile = fopen($filename, 'a');
168		if (!$logfile) die();
169		if (fwrite($logfile, $logline . "\n") === false) {
170			fclose($logfile);
171			die();
172		}
173
174		/* Done */
175		fclose($logfile);
176	}
177
178	private function getCountryCode() {
179
180		$country = ( $_SERVER['REMOTE_ADDR'] == '127.0.0.1' ? 'local' : 'ZZ' ); // default if no geoip is available!
181
182		$lib = $this->getConf('geoiplib'); /* which library to use? (can only be phpgeoip or disabled) */
183
184		try {
185
186			// use GeoIP module?
187			if ($lib == 'phpgeoip' && extension_loaded('geoip') && geoip_db_avail(GEOIP_COUNTRY_EDITION)) { // Use PHP GeoIP module
188				$result = geoip_country_code_by_name($_SERVER['REMOTE_ADDR']);
189				$country = ($result ? $result : $country);
190			}
191		} catch (Exception $e) {
192			Logger::error('BotMon Plugin: GeoIP Error', $e->getMessage());
193		}
194
195		return $country;
196	}
197
198	private function setSessionInfo() {
199
200		// what is the session identifier?
201		if (isset($_SESSION)) {
202			$sesKeys = array_keys($_SESSION); /* DokuWiki Session ID preferred */
203			foreach ($sesKeys as $key) {
204				if (substr($key, 0, 2) == 'DW') {
205					$this->sessionId = $key;
206					$this->sessionType = 'dw';
207					return;
208				}
209			}
210		}
211		if (!$this->sessionId) { /* no DokuWiki Session ID, try PHP session ID */
212			$this->sessionId = session_id();
213			$this->sessionType = 'php';
214		}
215		if (!$this->sessionId) { /* no PHP session ID, try IP address */
216			$this->sessionId = $_SERVER['REMOTE_ADDR'];
217			$this->sessionType = 'ip';
218		}
219
220		if (!$this->sessionId) { /* if all fails, use random data */
221			$this->sessionId = rand(100000000, 999999999);
222			$this->sessionType = 'rnd';
223		}
224
225	}
226
227	public function insertCaptchaCode(Event $event) {
228
229		$useCaptcha = $this->getConf('useCaptcha'); // which background to show?
230
231		// only if we previously determined that we need a captcha:
232		if ($this->showCaptcha == 'Y') {
233
234			echo '<h1 class="sectionedit1">'; tpl_pagetitle(); echo "</h1>\n"; // always show the original page title
235			$event->preventDefault(); // don't show normal content
236			switch ($useCaptcha) {
237				case 'loremipsum':
238					$this->insertLoremIpsum();  // show dada filler instead of text
239					break;
240				case 'dada':
241					$this->insertDadaFiller();  // show dada filler instead of text
242					break;
243			}
244
245			// insert the captcha loader code:
246			echo '<script>' . NL;
247
248			// add the deferred script loader::
249			echo  DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
250			echo  DOKU_TAB . DOKU_TAB . "const cj=document.createElement('script');" . NL;
251			echo  DOKU_TAB . DOKU_TAB . "cj.async=true;cj.defer=true;cj.type='text/javascript';" . NL;
252			echo  DOKU_TAB . DOKU_TAB . "cj.src='".DOKU_BASE."lib/plugins/botmon/captcha.js';" . NL;
253			echo  DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(cj);" . NL;
254			echo  DOKU_TAB . "});";
255
256			// add the translated strings for the captcha:
257			echo  DOKU_TAB . '$BMLocales = {' . NL;
258			echo  DOKU_TAB . DOKU_TAB . '"dlgTitle": ' . json_encode($this->getLang('bm_dlgTitle')) . ',' . NL;
259			echo  DOKU_TAB . DOKU_TAB . '"dlgSubtitle": ' . json_encode($this->getLang('bm_dlgSubtitle')) . ',' . NL;
260			echo  DOKU_TAB . DOKU_TAB . '"dlgConfirm": ' . json_encode($this->getLang('bm_dlgConfirm')) . ',' . NL;
261			echo  DOKU_TAB . DOKU_TAB . '"dlgChecking": ' . json_encode($this->getLang('bm_dlgChecking')) . ',' . NL;
262			echo  DOKU_TAB . DOKU_TAB . '"dlgLoading": ' . json_encode($this->getLang('bm_dlgLoading')) . ',' . NL;
263			echo  DOKU_TAB . DOKU_TAB . '"dlgError": ' . json_encode($this->getLang('bm_dlgError')) . ',' . NL;
264			echo  DOKU_TAB . '};' . NL;
265
266			echo '</script>' . NL;
267		}
268	}
269
270	public function showImageCaptcha(Event $event, $param) {
271
272		$useCaptcha = $this->getConf('useCaptcha');
273
274		echo '<script>' . $this->getBMHeader($event, $param) . '</script>';
275
276		$cCode = '-';
277		if ($useCaptcha !== 'disabled') {
278			if ($this->captchaWhitelisted()) {
279				$cCode = 'W'; // whitelisted
280			}
281			elseif ($this->hasCaptchaCookie()) {
282				$cCode  = 'N'; // user already has a cookie
283			}
284			else {
285				$cCode  = 'Y'; // show the captcha
286
287				echo '<svg width="100%" height="100%" viewBox="0 0 800 400" version="1.1" xmlns="http://www.w3.org/2000/svg"><path d="M1,1l798,398" style="fill:none;stroke:#f00;stroke-width:1px;"/><path d="M1,399l798,-398" style="fill:none;stroke:#f00;stroke-width:1px;"/><rect x="1" y="1" width="798" height="398" style="fill:none;stroke:#000;stroke-width:1px;"/></svg>'; // placeholder image
288				$event->preventDefault(); // don't show normal content
289
290				// TODO Insert dummy image
291				$this->insertCaptchaLoader(); // and load the captcha
292			}
293		};
294
295		$this->showCaptcha = $cCode; // store the captcha code for the logfile
296	}
297
298	private function hasCaptchaCookie() {
299
300		$cookieVal = isset($_COOKIE['DWConfirm']) ? $_COOKIE['DWConfirm'] : null;
301
302		$today = substr((new DateTime())->format('c'), 0, 10);
303
304		$raw = $this->getConf('captchaSeed') . '|' . $_SERVER['SERVER_NAME'] . '|' . $_SERVER['REMOTE_ADDR'] . '|' . $today;
305		$expected = hash('sha256', $raw);
306
307		//echo '<ul><li>cookie: ' . $cookieVal . '</li><li>expected: ' . $expected . '</li><li>matches: ' .($cookieVal == $expected ? 'true' : 'false') . '</li></ul>';
308
309		return $cookieVal == $expected;
310	}
311
312	// check if the visitor's IP is on a whitelist:
313	private function captchaWhitelisted() {
314
315		// normalise IP address:
316		$ip = inet_pton($_SERVER['REMOTE_ADDR']);
317
318		// find which file to open:
319		$prefixes = ['user', 'default'];
320		foreach ($prefixes as $pre) {
321			$filename = __DIR__ .'/config/' . $pre . '-whitelist.txt';
322			if (file_exists($filename)) {
323				break;
324			}
325		}
326
327		if (file_exists($filename)) {
328			$lines = file($filename, FILE_SKIP_EMPTY_LINES);
329			foreach ($lines as $line) {
330				if (trim($line) !== '' && !str_starts_with($line, '#')) {
331					$col = explode("\t", $line);
332					if (count($col) >= 2) {
333						$from = inet_pton($col[0]);
334						$to = inet_pton($col[1]);
335
336						if ($ip >= $from && $ip <= $to) {
337							return true; /* IP whitelisted */
338						}
339					}
340				}
341			}
342		}
343		return false; /* IP not found in whitelist */
344	}
345
346	// inserts a blank box to ensure there is enough space for the captcha:
347	private function insertLoremIpsum() {
348
349		echo '<div class="level1">' . NL;
350		echo '<p>' . NL . 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.'. NL . '</p>' . NL;
351		echo '<p>' . NL . 'At vero eos et accusamus et iusto odio dignissimos ducimus, qui blanditiis praesentium voluptatum deleniti atque corrupti, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga.'. NL . '</p>' . NL;
352		echo '</div>' . NL;
353
354	}
355
356	/* Generates a few paragraphs of Dada text to show instead of the article content */
357	private function insertDadaFiller() {
358
359		global $conf;
360		global $TOC;
361		global $ID;
362
363		// list of languages to search for the wordlist
364		$langs = array_unique([$conf['lang'], 'la']);
365
366		// find path to the first available wordlist:
367		foreach ($langs as $lang) {
368			$filename = __DIR__ .'/lang/' . $lang . '/wordlist.txt'; /* language-specific wordlist */
369			if (file_exists($filename)) {
370				break;
371			}
372		}
373
374		// load the wordlist file:
375		if (file_exists($filename)) {
376			$words = array();
377			$totalWeight = 0;
378			$lines = file($filename, FILE_SKIP_EMPTY_LINES);
379			foreach ($lines as $line) {
380				$arr = explode("\t", $line);
381				$arr[1] = ( count($arr) > 1 ? (int) trim($arr[1]) : 1 );
382				$totalWeight += (int) $arr[1];
383				array_push($words, $arr);
384			}
385		} else {
386			echo '<script> console.log("Can’t generate filler text: wordlist file not found!"); </script>';
387			return;
388		}
389
390		// If a TOC exists, use it for the headlines:
391		if(is_array($TOC)) {
392			$toc = $TOC;
393		} else {
394			$meta = p_get_metadata($ID, '', METADATA_RENDER_USING_CACHE);
395			//$tocok = (isset($meta['internal']['toc']) ? $meta['internal']['toc'] : $tocok = true);
396			$toc = isset($meta['description']['tableofcontents']) ? $meta['description']['tableofcontents'] : null;
397		}
398		if (!$toc) { // no TOC, generate my own:
399			$hlCount = mt_rand(0, (int) $conf['tocminheads']);
400			$toc = array();
401			for ($i=0; $i<$hlCount; $i++) {
402				array_push($toc, $this->dadaMakeHeadline($words, $totalWeight)); // $toc
403			}
404		}
405
406		// if H1 heading is not in the TOC, add a chappeau section:
407		$chapeauCount = mt_rand(1, 3);
408		if ((int) $conf['toptoclevel'] > 1) {
409			echo "<div class=\"level1\">\n";
410			for ($i=0; $i<$chapeauCount; $i++) {
411				echo $this->dadaMakeParagraph($words, $totalWeight);
412			}
413			echo "</div>\n";
414		}
415
416		//  text sections for each sub-headline:
417		foreach ($toc as $hl) {
418			echo $this->dadaMakeSection($words, $totalWeight, $hl);
419		}
420	}
421
422	private function dadaMakeSection($words, $totalWeight, $hl) {
423
424		global $conf;
425
426		// how many paragraphs?
427		$paragraphCount = mt_rand(1, 4);
428
429		// section level
430		$topTocLevel = (int) $conf['toptoclevel'];
431		$secLevel = $hl['level'] + 1;;
432
433		// return value:
434		$sec = "";
435
436		// make a headline:
437		if ($topTocLevel > 1 || $secLevel > 1) {
438			$sec .= "<h{$secLevel} id=\"{$hl['hid']}\">{$hl['title']}</h{$secLevel}>\n";
439		}
440
441		// add the paragraphs:
442		$sec .= "<div class=\"level{$secLevel}\">\n";
443		for ($i=0; $i<$paragraphCount; $i++) {
444			$sec .= $this->dadaMakeParagraph($words, $totalWeight);
445		}
446		$sec .= "</div>\n";
447
448		return $sec;
449	}
450
451	private function dadaMakeHeadline($words, $totalWeight) {
452
453		// how many words to generate?
454		$wordCount = mt_rand(2, 5);
455
456		// function returns an array:
457		$r = Array();
458
459		// generate the headline:
460		$hlArr = array();
461		for ($i=0; $i<$wordCount; $i++) {
462			array_push($hlArr, $this->dadaSelectRandomWord($words, $totalWeight));
463		}
464
465		$r['title'] =  ucfirst(implode(' ', $hlArr));
466
467		$r['hid'] = preg_replace('/[^\w\d\-]+/i', '_', strtolower($r['title']));
468		$r['type'] = 'ul'; // always ul!
469		$r['level'] = 1; // always level 1 for now
470
471		return $r;
472	}
473
474	private function dadaMakeParagraph($words, $totalWeight) {
475
476		// how many words to generate?
477		$sentenceCount = mt_rand(2, 5);
478
479		$paragraph = array();
480		for ($i=0; $i<$sentenceCount; $i++) {
481			array_push($paragraph, $this->dadaMakeSentence($words, $totalWeight));
482		}
483
484		return "<p>\n" . implode(' ', $paragraph) . "\n</p>\n";
485
486	}
487
488	private function dadaMakeSentence($words, $totalWeight) {
489
490		// how many words to generate?
491		$wordCount = mt_rand(4, 20);
492
493		// generate the sentence:
494		$sentence = array();
495		for ($i=0; $i<$wordCount; $i++) {
496			array_push($sentence, $this->dadaSelectRandomWord($words, $totalWeight));
497		}
498
499		return ucfirst(implode(' ', $sentence)) . '.';
500
501	}
502
503	private function dadaSelectRandomWord($list, $totalWeight) {
504
505		// get a random selection:
506		$rand = mt_rand(0, $totalWeight);
507
508		// match the selection to the weighted list:
509		$cumulativeWeight = 0;
510		for ($i=0; $i<count($list); $i++) {
511			$cumulativeWeight += $list[$i][1];
512			if ($cumulativeWeight >= $rand) {
513				return $list[$i][0];
514			}
515		}
516		return '***';
517	}
518
519}