xref: /plugin/botmon/action.php (revision ad279a215c0c174eeb976c8d0e216df09c40c48a)
1<?php
2
3use dokuwiki\Extension\EventHandler;
4use dokuwiki\Extension\Event;
5use dokuwiki\Logger;
6
7/**
8 * Action Component for the Bot Monitoring Plugin
9 *
10 * @license	GPL 3 (http://www.gnu.org/licenses/gpl.html)
11 * @author	 Sascha Leib <sascha.leib(at)kolmio.com>
12 */
13
14class action_plugin_botmon extends DokuWiki_Action_Plugin {
15
16	public function __construct() {
17
18		// determine if a captcha should be loaded:
19		$this->showCaptcha = 'Z'; // Captcha unknown
20
21		$useCaptcha = $this->getConf('useCaptcha'); // should we show a captcha?
22
23		if ($useCaptcha !== 'disabled') {
24			if ($_SERVER['REQUEST_METHOD'] == 'HEAD') {
25				$this->showCaptcha = 'H'; // Method is HEAD, no need for captcha
26			} elseif ($this->captchaWhitelisted()) {
27				$this->showCaptcha = 'W'; // IP is whitelisted, no captcha
28			} elseif ($this->hasCaptchaCookie()) {
29				$this->showCaptcha = 'N'; // No, user already has a cookie, don't show the captcha
30			} else {
31				$this->showCaptcha = 'Y'; // Yes, show the captcha
32			}
33		}
34	}
35
36	/**
37	 * Registers a callback functions
38	 *
39	 * @param EventHandler $controller DokuWiki's event controller object
40	 * @return void
41	 */
42	public function register(EventHandler $controller) {
43
44		global $ACT;
45
46		// populate the session id and type:
47		$this->setSessionInfo();
48
49		// insert header data into the page:
50		if ($ACT == 'show' || $ACT == 'edit' || $ACT == 'media') {
51			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertHeader');
52
53			// Override the page rendering, if a captcha needs to be displayed:
54			$controller->register_hook('TPL_ACT_RENDER', 'BEFORE', $this, 'insertCaptchaCode');
55
56		} else if ($ACT == 'admin' && isset($_REQUEST['page']) && $_REQUEST['page'] == 'botmon') {
57			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertAdminHeader');
58		}
59
60		// also show a captcha before the image preview
61		$controller->register_hook('TPL_IMG_DISPLAY', 'BEFORE', $this, 'showImageCaptcha');
62
63		// write to the log after the page content was displayed:
64		$controller->register_hook('TPL_CONTENT_DISPLAY', 'AFTER', $this, 'writeServerLog');
65
66	}
67
68	/* session information */
69	private $sessionId = null;
70	private $sessionType = '';
71	private $showCaptcha = 'X';
72
73	/**
74	 * Inserts tracking code to the page header
75	 * (only called on 'show' actions)
76	 *
77	 * @param Event $event event object by reference
78	 * @return void
79	 */
80	public function insertHeader(Event $event, $param) {
81
82		global $INFO;
83
84
85		// build the tracker code:
86		$code = $this->getBMHeader();
87
88		// add the deferred script loader::
89		$code .= DOKU_TAB . DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
90		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "const e=document.createElement('script');" . NL;
91		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.async=true;e.defer=true;" . NL;
92		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.src='".DOKU_BASE."lib/plugins/botmon/client.js';" . NL;
93		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(e);" . NL;
94		$code .= DOKU_TAB . DOKU_TAB . "});";
95		$event->data['script'][] = ['_data' => $code];
96	}
97
98	/* create the BM object code for insertion into a script element: */
99	private function getBMHeader() {
100
101		// build the tracker code:
102		$code = DOKU_TAB . DOKU_TAB . "document._botmon = {t0: Date.now(), session: " . json_encode($this->sessionId) . ", seed: " . json_encode($this->getConf('captchaSeed')) . ", ip: " . json_encode($_SERVER['REMOTE_ADDR']) . "};" . NL;
103
104		// is there a user logged in?
105		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name']) ?  $INFO['userinfo']['name'] : '');
106		if ($username) {
107			$code .= DOKU_TAB . DOKU_TAB . 'document._botmon.user = "' . $username . '";'. NL;
108		}
109
110		return $code;
111
112	}
113
114	/**
115	 * Inserts tracking code to the page header
116	 * (only called on 'show' actions)
117	 *
118	 * @param Event $event event object by reference
119	 * @return void
120	 */
121	public function insertAdminHeader(Event $event, $param) {
122
123		$event->data['link'][] = ['rel' => 'stylesheet', 'href' => DOKU_BASE.'lib/plugins/botmon/admin.css', 'defer' => 'defer'];
124		$event->data['script'][] = ['src' => DOKU_BASE.'lib/plugins/botmon/admin.js', 'defer' => 'defer', '_data' => ''];
125	}
126
127	/**
128	 * Writes data to the server log.
129	 *
130	 * @return void
131	 */
132	public function writeServerLog(Event $event, $param) {
133
134		global $conf;
135		global $INFO;
136
137		// is there a user logged in?
138		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name'])
139					?  $INFO['userinfo']['name'] : '');
140
141		// clean the page ID
142		$pageId = preg_replace('/[\x00-\x1F]/', "\u{FFFD}", $INFO['id'] ?? '');
143
144		// create the log array:
145		$logArr = Array(
146			$_SERVER['REMOTE_ADDR'], /* remote IP */
147			$pageId, /* page ID */
148			$this->sessionId, /* Session ID */
149			$this->sessionType, /* session ID type */
150			$username, /* user name */
151			$_SERVER['HTTP_USER_AGENT'] ?? '', /* User agent */
152			$_SERVER['HTTP_REFERER'] ?? '', /* HTTP Referrer */
153			substr($conf['lang'],0,2), /* page language */
154			implode(',', array_unique(array_map( function($it) { return substr(trim($it),0,2); }, explode(',',trim($_SERVER['HTTP_ACCEPT_LANGUAGE'], " \t;,*"))))), /* accepted client languages */
155			$this->getCountryCode(), /* GeoIP country code */
156			$this->showCaptcha, /* show captcha? */
157			$_SERVER['REQUEST_METHOD'] ?? '' /* request method */
158		);
159
160		//* create the log line */
161		$filename = __DIR__ .'/logs/' . gmdate('Y-m-d') . '.srv.txt'; /* use GMT date for filename */
162		$logline = gmdate('Y-m-d H:i:s'); /* use GMT time for log entries */
163		foreach ($logArr as $tab) {
164			$logline .= "\t" . $tab;
165		};
166
167		/* write the log line to the file */
168		$logfile = fopen($filename, 'a');
169		if (!$logfile) die();
170		if (fwrite($logfile, $logline . "\n") === false) {
171			fclose($logfile);
172			die();
173		}
174
175		/* Done */
176		fclose($logfile);
177	}
178
179	private function getCountryCode() {
180
181		$country = ( $_SERVER['REMOTE_ADDR'] == '127.0.0.1' ? 'local' : 'ZZ' ); // default if no geoip is available!
182
183		$lib = $this->getConf('geoiplib'); /* which library to use? (can only be phpgeoip or disabled) */
184
185		try {
186
187			// use GeoIP module?
188			if ($lib == 'phpgeoip' && extension_loaded('geoip') && geoip_db_avail(GEOIP_COUNTRY_EDITION)) { // Use PHP GeoIP module
189				$result = geoip_country_code_by_name($_SERVER['REMOTE_ADDR']);
190				$country = ($result ? $result : $country);
191			}
192		} catch (Exception $e) {
193			Logger::error('BotMon Plugin: GeoIP Error', $e->getMessage());
194		}
195
196		return $country;
197	}
198
199	private function setSessionInfo() {
200
201		// what is the session identifier?
202		if (isset($_SESSION)) {
203			$sesKeys = array_keys($_SESSION); /* DokuWiki Session ID preferred */
204			foreach ($sesKeys as $key) {
205				if (substr($key, 0, 2) == 'DW') {
206					$this->sessionId = $key;
207					$this->sessionType = 'dw';
208					return;
209				}
210			}
211		}
212		if (!$this->sessionId) { /* no DokuWiki Session ID, try PHP session ID */
213			$this->sessionId = session_id();
214			$this->sessionType = 'php';
215		}
216		if (!$this->sessionId) { /* no PHP session ID, try IP address */
217			$this->sessionId = $_SERVER['REMOTE_ADDR'];
218			$this->sessionType = 'ip';
219		}
220
221		if (!$this->sessionId) { /* if all fails, use random data */
222			$this->sessionId = rand(100000000, 999999999);
223			$this->sessionType = 'rnd';
224		}
225
226	}
227
228	public function insertCaptchaCode(Event $event) {
229
230		$useCaptcha = $this->getConf('useCaptcha'); // which background to show?
231
232		// only if we previously determined that we need a captcha:
233		if ($this->showCaptcha == 'Y') {
234
235			echo '<h1 class="sectionedit1">'; tpl_pagetitle(); echo "</h1>\n"; // always show the original page title
236			$event->preventDefault(); // don't show normal content
237			switch ($useCaptcha) {
238				case 'loremipsum':
239					$this->insertLoremIpsum();  // show dada filler instead of text
240					break;
241				case 'dada':
242					$this->insertDadaFiller();  // show dada filler instead of text
243					break;
244			}
245
246			// insert the captcha loader code:
247			echo '<script>' . NL;
248
249			// add the deferred script loader::
250			echo  DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
251			echo  DOKU_TAB . DOKU_TAB . "const cj=document.createElement('script');" . NL;
252			echo  DOKU_TAB . DOKU_TAB . "cj.async=true;cj.defer=true;cj.type='text/javascript';" . NL;
253			echo  DOKU_TAB . DOKU_TAB . "cj.src='".DOKU_BASE."lib/plugins/botmon/captcha.js';" . NL;
254			echo  DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(cj);" . NL;
255			echo  DOKU_TAB . "});" . NL;
256
257			// add the translated strings for the captcha:
258			echo  DOKU_TAB . '$BMLocales = {' . NL;
259			echo  DOKU_TAB . DOKU_TAB . '"dlgTitle": ' . json_encode($this->getLang('bm_dlgTitle')) . ',' . NL;
260			echo  DOKU_TAB . DOKU_TAB . '"dlgSubtitle": ' . json_encode($this->getLang('bm_dlgSubtitle')) . ',' . NL;
261			echo  DOKU_TAB . DOKU_TAB . '"dlgConfirm": ' . json_encode($this->getLang('bm_dlgConfirm')) . ',' . NL;
262			echo  DOKU_TAB . DOKU_TAB . '"dlgChecking": ' . json_encode($this->getLang('bm_dlgChecking')) . ',' . NL;
263			echo  DOKU_TAB . DOKU_TAB . '"dlgLoading": ' . json_encode($this->getLang('bm_dlgLoading')) . ',' . NL;
264			echo  DOKU_TAB . DOKU_TAB . '"dlgError": ' . json_encode($this->getLang('bm_dlgError')) . ',' . NL;
265			echo  DOKU_TAB . '};' . NL;
266
267			echo '</script>' . NL;
268		}
269	}
270
271	public function showImageCaptcha(Event $event, $param) {
272
273		$useCaptcha = $this->getConf('useCaptcha');
274
275		echo '<script>' . $this->getBMHeader($event, $param) . '</script>';
276
277		$cCode = '-';
278		if ($useCaptcha !== 'disabled') {
279			if ($this->captchaWhitelisted()) {
280				$cCode = 'W'; // whitelisted
281			}
282			elseif ($this->hasCaptchaCookie()) {
283				$cCode  = 'N'; // user already has a cookie
284			}
285			else {
286				$cCode  = 'Y'; // show the captcha
287
288				echo '<svg width="100%" height="100%" viewBox="0 0 800 400" version="1.1" xmlns="http://www.w3.org/2000/svg"><path d="M1,1l798,398" style="fill:none;stroke:#f00;stroke-width:1px;"/><path d="M1,399l798,-398" style="fill:none;stroke:#f00;stroke-width:1px;"/><rect x="1" y="1" width="798" height="398" style="fill:none;stroke:#000;stroke-width:1px;"/></svg>'; // placeholder image
289				$event->preventDefault(); // don't show normal content
290
291				// TODO Insert dummy image
292				$this->insertCaptchaLoader(); // and load the captcha
293			}
294		};
295
296		$this->showCaptcha = $cCode; // store the captcha code for the logfile
297	}
298
299	private function hasCaptchaCookie() {
300
301		$cookieVal = isset($_COOKIE['DWConfirm']) ? $_COOKIE['DWConfirm'] : null;
302
303		$today = substr((new DateTime())->format('c'), 0, 10);
304
305		$raw = $this->getConf('captchaSeed') . '|' . $_SERVER['SERVER_NAME'] . '|' . $_SERVER['REMOTE_ADDR'] . '|' . $today;
306		$expected = hash('sha256', $raw);
307
308		//echo '<ul><li>cookie: ' . $cookieVal . '</li><li>expected: ' . $expected . '</li><li>matches: ' .($cookieVal == $expected ? 'true' : 'false') . '</li></ul>';
309
310		return $cookieVal == $expected;
311	}
312
313	// check if the visitor's IP is on a whitelist:
314	private function captchaWhitelisted() {
315
316		// normalise IP address:
317		$ip = inet_pton($_SERVER['REMOTE_ADDR']);
318
319		// find which file to open:
320		$prefixes = ['user', 'default'];
321		foreach ($prefixes as $pre) {
322			$filename = __DIR__ .'/config/' . $pre . '-whitelist.txt';
323			if (file_exists($filename)) {
324				break;
325			}
326		}
327
328		if (file_exists($filename)) {
329			$lines = file($filename, FILE_SKIP_EMPTY_LINES);
330			foreach ($lines as $line) {
331				if (trim($line) !== '' && !str_starts_with($line, '#')) {
332					$col = explode("\t", $line);
333					if (count($col) >= 2) {
334						$from = inet_pton($col[0]);
335						$to = inet_pton($col[1]);
336
337						if ($ip >= $from && $ip <= $to) {
338							return true; /* IP whitelisted */
339						}
340					}
341				}
342			}
343		}
344		return false; /* IP not found in whitelist */
345	}
346
347	// inserts a blank box to ensure there is enough space for the captcha:
348	private function insertLoremIpsum() {
349
350		echo '<div class="level1">' . NL;
351		echo '<p>' . NL . 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.'. NL . '</p>' . NL;
352		echo '<p>' . NL . 'At vero eos et accusamus et iusto odio dignissimos ducimus, qui blanditiis praesentium voluptatum deleniti atque corrupti, quos dolores et quas molestias excepturi sint, obcaecati cupiditate non provident, similique sunt in culpa, qui officia deserunt mollitia animi, id est laborum et dolorum fuga.'. NL . '</p>' . NL;
353		echo '</div>' . NL;
354
355	}
356
357	/* Generates a few paragraphs of Dada text to show instead of the article content */
358	private function insertDadaFiller() {
359
360		global $conf;
361		global $TOC;
362		global $ID;
363
364		// list of languages to search for the wordlist
365		$langs = array_unique([$conf['lang'], 'la']);
366
367		// find path to the first available wordlist:
368		foreach ($langs as $lang) {
369			$filename = __DIR__ .'/lang/' . $lang . '/wordlist.txt'; /* language-specific wordlist */
370			if (file_exists($filename)) {
371				break;
372			}
373		}
374
375		// load the wordlist file:
376		if (file_exists($filename)) {
377			$words = array();
378			$totalWeight = 0;
379			$lines = file($filename, FILE_SKIP_EMPTY_LINES);
380			foreach ($lines as $line) {
381				$arr = explode("\t", $line);
382				$arr[1] = ( count($arr) > 1 ? (int) trim($arr[1]) : 1 );
383				$totalWeight += (int) $arr[1];
384				array_push($words, $arr);
385			}
386		} else {
387			echo '<script> console.log("Can’t generate filler text: wordlist file not found!"); </script>';
388			return;
389		}
390
391		// If a TOC exists, use it for the headlines:
392		if(is_array($TOC)) {
393			$toc = $TOC;
394		} else {
395			$meta = p_get_metadata($ID, '', METADATA_RENDER_USING_CACHE);
396			//$tocok = (isset($meta['internal']['toc']) ? $meta['internal']['toc'] : $tocok = true);
397			$toc = isset($meta['description']['tableofcontents']) ? $meta['description']['tableofcontents'] : null;
398		}
399		if (!$toc) { // no TOC, generate my own:
400			$hlCount = mt_rand(0, (int) $conf['tocminheads']);
401			$toc = array();
402			for ($i=0; $i<$hlCount; $i++) {
403				array_push($toc, $this->dadaMakeHeadline($words, $totalWeight)); // $toc
404			}
405		}
406
407		// if H1 heading is not in the TOC, add a chappeau section:
408		$chapeauCount = mt_rand(1, 3);
409		if ((int) $conf['toptoclevel'] > 1) {
410			echo "<div class=\"level1\">\n";
411			for ($i=0; $i<$chapeauCount; $i++) {
412				echo $this->dadaMakeParagraph($words, $totalWeight);
413			}
414			echo "</div>\n";
415		}
416
417		//  text sections for each sub-headline:
418		foreach ($toc as $hl) {
419			echo $this->dadaMakeSection($words, $totalWeight, $hl);
420		}
421	}
422
423	private function dadaMakeSection($words, $totalWeight, $hl) {
424
425		global $conf;
426
427		// how many paragraphs?
428		$paragraphCount = mt_rand(1, 4);
429
430		// section level
431		$topTocLevel = (int) $conf['toptoclevel'];
432		$secLevel = $hl['level'] + 1;;
433
434		// return value:
435		$sec = "";
436
437		// make a headline:
438		if ($topTocLevel > 1 || $secLevel > 1) {
439			$sec .= "<h{$secLevel} id=\"{$hl['hid']}\">{$hl['title']}</h{$secLevel}>\n";
440		}
441
442		// add the paragraphs:
443		$sec .= "<div class=\"level{$secLevel}\">\n";
444		for ($i=0; $i<$paragraphCount; $i++) {
445			$sec .= $this->dadaMakeParagraph($words, $totalWeight);
446		}
447		$sec .= "</div>\n";
448
449		return $sec;
450	}
451
452	private function dadaMakeHeadline($words, $totalWeight) {
453
454		// how many words to generate?
455		$wordCount = mt_rand(2, 5);
456
457		// function returns an array:
458		$r = Array();
459
460		// generate the headline:
461		$hlArr = array();
462		for ($i=0; $i<$wordCount; $i++) {
463			array_push($hlArr, $this->dadaSelectRandomWord($words, $totalWeight));
464		}
465
466		$r['title'] =  ucfirst(implode(' ', $hlArr));
467
468		$r['hid'] = preg_replace('/[^\w\d\-]+/i', '_', strtolower($r['title']));
469		$r['type'] = 'ul'; // always ul!
470		$r['level'] = 1; // always level 1 for now
471
472		return $r;
473	}
474
475	private function dadaMakeParagraph($words, $totalWeight) {
476
477		// how many words to generate?
478		$sentenceCount = mt_rand(2, 5);
479
480		$paragraph = array();
481		for ($i=0; $i<$sentenceCount; $i++) {
482			array_push($paragraph, $this->dadaMakeSentence($words, $totalWeight));
483		}
484
485		return "<p>\n" . implode(' ', $paragraph) . "\n</p>\n";
486
487	}
488
489	private function dadaMakeSentence($words, $totalWeight) {
490
491		// how many words to generate?
492		$wordCount = mt_rand(4, 20);
493
494		// generate the sentence:
495		$sentence = array();
496		for ($i=0; $i<$wordCount; $i++) {
497			array_push($sentence, $this->dadaSelectRandomWord($words, $totalWeight));
498		}
499
500		return ucfirst(implode(' ', $sentence)) . '.';
501
502	}
503
504	private function dadaSelectRandomWord($list, $totalWeight) {
505
506		// get a random selection:
507		$rand = mt_rand(0, $totalWeight);
508
509		// match the selection to the weighted list:
510		$cumulativeWeight = 0;
511		for ($i=0; $i<count($list); $i++) {
512			$cumulativeWeight += $list[$i][1];
513			if ($cumulativeWeight >= $rand) {
514				return $list[$i][0];
515			}
516		}
517		return '***';
518	}
519
520}