xref: /plugin/botmon/action.php (revision 620d9253e58a385b16ea95cd5c3f3aea693b68f8)
1<?php
2
3use dokuwiki\Extension\EventHandler;
4use dokuwiki\Extension\Event;
5use dokuwiki\Logger;
6
7/**
8 * Action Component for the Bot Monitoring Plugin
9 *
10 * @license	GPL 3 (http://www.gnu.org/licenses/gpl.html)
11 * @author	 Sascha Leib <sascha.leib(at)kolmio.com>
12 */
13
14class action_plugin_botmon extends DokuWiki_Action_Plugin {
15
16	/**
17	 * Registers a callback functions
18	 *
19	 * @param EventHandler $controller DokuWiki's event controller object
20	 * @return void
21	 */
22	public function register(EventHandler $controller) {
23
24		global $ACT;
25
26		// initialize the session id and type with random data:
27		$this->sessionId = rand(1000000, 9999999);
28		$this->sessionType = 'rnd';
29
30		// insert header data into the page:
31		if ($ACT == 'show' || $ACT == 'edit' || $ACT == 'media') {
32			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertHeader');
33
34			// Override the page rendering, if a captcha needs to be displayed:
35			$controller->register_hook('TPL_ACT_RENDER', 'BEFORE', $this, 'showCaptcha');
36
37		} else if ($ACT == 'admin' && isset($_REQUEST['page']) && $_REQUEST['page'] == 'botmon') {
38			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertAdminHeader');
39		}
40
41		// also show a captcha before the image preview
42		$controller->register_hook('TPL_IMG_DISPLAY', 'BEFORE', $this, 'showImageCaptcha');
43
44		// write to the log after the page content was displayed:
45		$controller->register_hook('TPL_CONTENT_DISPLAY', 'AFTER', $this, 'writeServerLog');
46
47	}
48
49	/* session information */
50	private $sessionId = null;
51	private $sessionType = '';
52	private $showCaptcha = '-';
53
54	/**
55	 * Inserts tracking code to the page header
56	 * (only called on 'show' actions)
57	 *
58	 * @param Event $event event object by reference
59	 * @return void
60	 */
61	public function insertHeader(Event $event, $param) {
62
63		global $INFO;
64
65		// populate the session id and type:
66		$this->getSessionInfo();
67
68		// build the tracker code:
69		$code = $this->getBMHeader();
70
71		// add the deferred script loader::
72		$code .= DOKU_TAB . DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
73		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "const e=document.createElement('script');" . NL;
74		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.async=true;e.defer=true;" . NL;
75		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.src='".DOKU_BASE."lib/plugins/botmon/client.js';" . NL;
76		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(e);" . NL;
77		$code .= DOKU_TAB . DOKU_TAB . "});";
78		$event->data['script'][] = ['_data' => $code];
79	}
80
81	/* create the BM object code for insertion into a script element: */
82	private function getBMHeader() {
83
84		// build the tracker code:
85		$code = DOKU_TAB . DOKU_TAB . "document._botmon = {t0: Date.now(), session: " . json_encode($this->sessionId) . ", seed: " . json_encode($this->getConf('captchaSeed')) . ", ip: " . json_encode($_SERVER['REMOTE_ADDR']) . "};" . NL;
86
87		// is there a user logged in?
88		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name']) ?  $INFO['userinfo']['name'] : '');
89		if ($username) {
90			$code .= DOKU_TAB . DOKU_TAB . 'document._botmon.user = "' . $username . '";'. NL;
91		}
92
93		return $code;
94
95	}
96
97	/**
98	 * Inserts tracking code to the page header
99	 * (only called on 'show' actions)
100	 *
101	 * @param Event $event event object by reference
102	 * @return void
103	 */
104	public function insertAdminHeader(Event $event, $param) {
105
106		$event->data['link'][] = ['rel' => 'stylesheet', 'href' => DOKU_BASE.'lib/plugins/botmon/admin.css', 'defer' => 'defer'];
107		$event->data['script'][] = ['src' => DOKU_BASE.'lib/plugins/botmon/admin.js', 'defer' => 'defer', '_data' => ''];
108	}
109
110	/**
111	 * Writes data to the server log.
112	 *
113	 * @return void
114	 */
115	public function writeServerLog(Event $event, $param) {
116
117		global $conf;
118		global $INFO;
119
120		// is there a user logged in?
121		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name'])
122					?  $INFO['userinfo']['name'] : '');
123
124		// clean the page ID
125		$pageId = preg_replace('/[\x00-\x1F]/', "\u{FFFD}", $INFO['id'] ?? '');
126
127		// create the log array:
128		$logArr = Array(
129			$_SERVER['REMOTE_ADDR'], /* remote IP */
130			$pageId, /* page ID */
131			$this->sessionId, /* Session ID */
132			$this->sessionType, /* session ID type */
133			$username, /* user name */
134			$_SERVER['HTTP_USER_AGENT'] ?? '', /* User agent */
135			$_SERVER['HTTP_REFERER'] ?? '', /* HTTP Referrer */
136			substr($conf['lang'],0,2), /* page language */
137			implode(',', array_unique(array_map( function($it) { return substr(trim($it),0,2); }, explode(',',trim($_SERVER['HTTP_ACCEPT_LANGUAGE'], " \t;,*"))))), /* accepted client languages */
138			$this->getCountryCode(), /* GeoIP country code */
139			$this->showCaptcha /* show captcha? */
140		);
141
142		//* create the log line */
143		$filename = __DIR__ .'/logs/' . gmdate('Y-m-d') . '.srv.txt'; /* use GMT date for filename */
144		$logline = gmdate('Y-m-d H:i:s'); /* use GMT time for log entries */
145		foreach ($logArr as $tab) {
146			$logline .= "\t" . $tab;
147		};
148
149		/* write the log line to the file */
150		$logfile = fopen($filename, 'a');
151		if (!$logfile) die();
152		if (fwrite($logfile, $logline . "\n") === false) {
153			fclose($logfile);
154			die();
155		}
156
157		/* Done */
158		fclose($logfile);
159	}
160
161	private function getCountryCode() {
162
163		$country = ( $_SERVER['REMOTE_ADDR'] == '127.0.0.1' ? 'local' : 'ZZ' ); // default if no geoip is available!
164
165		$lib = $this->getConf('geoiplib'); /* which library to use? (can only be phpgeoip or disabled) */
166
167		try {
168
169			// use GeoIP module?
170			if ($lib == 'phpgeoip' && extension_loaded('geoip') && geoip_db_avail(GEOIP_COUNTRY_EDITION)) { // Use PHP GeoIP module
171				$result = geoip_country_code_by_name($_SERVER['REMOTE_ADDR']);
172				$country = ($result ? $result : $country);
173			}
174		} catch (Exception $e) {
175			Logger::error('BotMon Plugin: GeoIP Error', $e->getMessage());
176		}
177
178		return $country;
179	}
180
181	private function getSessionInfo() {
182
183		// what is the session identifier?
184		if (isset($_SESSION)) {
185			$sesKeys = array_keys($_SESSION); /* DokuWiki Session ID preferred */
186			foreach ($sesKeys as $key) {
187				if (substr($key, 0, 2) == 'DW') {
188					$this->sessionId = $key;
189					$this->sessionType = 'dw';
190					return;
191				}
192			}
193		}
194		if (!$this->sessionId) { /* no DokuWiki Session ID, try PHP session ID */
195			$this->sessionId = session_id();
196			$this->sessionType = 'php';
197		}
198		if (!$this->sessionId) { /* no PHP session ID, try IP address */
199			$this->sessionId = $_SERVER['REMOTE_ADDR'];
200			$this->sessionType = 'ip';
201		}
202	}
203
204	public function showCaptcha(Event $event) {
205
206		$useCaptcha = $this->getConf('useCaptcha');
207
208		$cCode = '-';
209		if ($useCaptcha !== 'disabled') {
210			if ($this->captchaWhitelisted()) {
211				$cCode = 'W'; // whitelisted
212			} elseif ($this->hasCaptchaCookie()) {
213				$cCode  = 'N'; // user already has a cookie
214			} else {
215				$cCode  = 'Y'; // show the captcha
216
217
218				echo '<h1 class="sectionedit1">'; tpl_pagetitle(); echo "</h1>\n"; // always show the original page title
219				$event->preventDefault(); // don't show normal content
220				switch ($useCaptcha) {
221					case 'blank':
222						$this->insertBlankBox();  // show dada filler instead of text
223						break;
224					case 'dada':
225						$this->insertDadaFiller();  // show dada filler instead of text
226						break;
227				}
228				$this->insertCaptchaLoader(); // and load the captcha
229			}
230		}
231		$this->showCaptcha = $cCode; // store the captcha code for the logfile
232
233	}
234
235	public function showImageCaptcha(Event $event, $param) {
236
237		$useCaptcha = $this->getConf('useCaptcha');
238
239		echo '<script>' . $this->getBMHeader($event, $param) . '</script>';
240
241		$cCode = '-';
242		if ($useCaptcha !== 'disabled') {
243			if ($this->captchaWhitelisted()) {
244				$cCode = 'W'; // whitelisted
245			}
246			elseif ($this->hasCaptchaCookie()) {
247				$cCode  = 'N'; // user already has a cookie
248			}
249			else {
250				$cCode  = 'Y'; // show the captcha
251
252				echo '<svg width="100%" height="100%" viewBox="0 0 800 400" version="1.1" xmlns="http://www.w3.org/2000/svg"><path d="M1,1l798,398" style="fill:none;stroke:#f00;stroke-width:1px;"/><path d="M1,399l798,-398" style="fill:none;stroke:#f00;stroke-width:1px;"/><rect x="1" y="1" width="798" height="398" style="fill:none;stroke:#000;stroke-width:1px;"/></svg>'; // placeholder image
253				$event->preventDefault(); // don't show normal content
254
255				// TODO Insert dummy image
256				$this->insertCaptchaLoader(); // and load the captcha
257			}
258		};
259
260		$this->showCaptcha = $cCode; // store the captcha code for the logfile
261	}
262
263	private function hasCaptchaCookie() {
264
265		$cookieVal = isset($_COOKIE['DWConfirm']) ? $_COOKIE['DWConfirm'] : null;
266
267		$today = substr((new DateTime())->format('c'), 0, 10);
268
269		$raw = $this->getConf('captchaSeed') . '|' . $_SERVER['SERVER_NAME'] . '|' . $_SERVER['REMOTE_ADDR'] . '|' . $today;
270		$expected = hash('sha256', $raw);
271
272		//echo '<ul><li>cookie: ' . $cookieVal . '</li><li>expected: ' . $expected . '</li><li>matches: ' .($cookieVal == $expected ? 'true' : 'false') . '</li></ul>';
273
274		return $cookieVal == $expected;
275	}
276
277	// check if the visitor's IP is on a whitelist:
278	private function captchaWhitelisted() {
279
280		// normalise IP address:
281		$ip = inet_pton($_SERVER['REMOTE_ADDR']);
282
283		// find which file to open:
284		$prefixes = ['user', 'default'];
285		foreach ($prefixes as $pre) {
286			$filename = __DIR__ .'/config/' . $pre . '-whitelist.txt';
287			if (file_exists($filename)) {
288				break;
289			}
290		}
291
292		if (file_exists($filename)) {
293			$lines = file($filename, FILE_SKIP_EMPTY_LINES);
294			foreach ($lines as $line) {
295				if (trim($line) !== '' && !str_starts_with($line, '#')) {
296					$col = explode("\t", $line);
297					if (count($col) >= 2) {
298						$from = inet_pton($col[0]);
299						$to = inet_pton($col[1]);
300
301						if ($ip >= $from && $ip <= $to) {
302							return true; /* IP whitelisted */
303						}
304					}
305				}
306			}
307		}
308		return false; /* IP not found in whitelist */
309	}
310
311	private function insertCaptchaLoader() {
312
313		echo '<script>' . NL;
314
315		// add the deferred script loader::
316		echo  DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
317		echo  DOKU_TAB . DOKU_TAB . "const cj=document.createElement('script');" . NL;
318		echo  DOKU_TAB . DOKU_TAB . "cj.async=true;cj.defer=true;cj.type='text/javascript';" . NL;
319		echo  DOKU_TAB . DOKU_TAB . "cj.src='".DOKU_BASE."lib/plugins/botmon/captcha.js';" . NL;
320		echo  DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(cj);" . NL;
321		echo  DOKU_TAB . "});";
322
323		// add the locales for the captcha:
324		echo  DOKU_TAB . '$BMLocales = {' . NL;
325		echo  DOKU_TAB . DOKU_TAB . '"dlgTitle": ' . json_encode($this->getLang('bm_dlgTitle')) . ',' . NL;
326		echo  DOKU_TAB . DOKU_TAB . '"dlgSubtitle": ' . json_encode($this->getLang('bm_dlgSubtitle')) . ',' . NL;
327		echo  DOKU_TAB . DOKU_TAB . '"dlgConfirm": ' . json_encode($this->getLang('bm_dlgConfirm')) . ',' . NL;
328		echo  DOKU_TAB . DOKU_TAB . '"dlgChecking": ' . json_encode($this->getLang('bm_dlgChecking')) . ',' . NL;
329		echo  DOKU_TAB . DOKU_TAB . '"dlgLoading": ' . json_encode($this->getLang('bm_dlgLoading')) . ',' . NL;
330		echo  DOKU_TAB . DOKU_TAB . '"dlgError": ' . json_encode($this->getLang('bm_dlgError')) . ',' . NL;
331		echo  DOKU_TAB . '};' . NL;
332
333		echo '</script>' . NL;
334
335	}
336
337	// inserts a blank box to ensure there is enough space for the captcha:
338	private function insertBlankBox() {
339
340		echo '<p style="min-height: 100px;">&nbsp;</p>';
341	}
342
343	/* Generates a few paragraphs of Dada text to show instead of the article content */
344	private function insertDadaFiller() {
345
346		global $conf;
347		global $TOC;
348		global $ID;
349
350		// list of languages to search for the wordlist
351		$langs = array_unique([$conf['lang'], 'la']);
352
353		// find path to the first available wordlist:
354		foreach ($langs as $lang) {
355			$filename = __DIR__ .'/lang/' . $lang . '/wordlist.txt'; /* language-specific wordlist */
356			if (file_exists($filename)) {
357				break;
358			}
359		}
360
361		// load the wordlist file:
362		if (file_exists($filename)) {
363			$words = array();
364			$totalWeight = 0;
365			$lines = file($filename, FILE_SKIP_EMPTY_LINES);
366			foreach ($lines as $line) {
367				$arr = explode("\t", $line);
368				$arr[1] = ( count($arr) > 1 ? (int) trim($arr[1]) : 1 );
369				$totalWeight += (int) $arr[1];
370				array_push($words, $arr);
371			}
372		} else {
373			echo '<script> console.log("Can’t generate filler text: wordlist file not found!"); </script>';
374			return;
375		}
376
377		// If a TOC exists, use it for the headlines:
378		if(is_array($TOC)) {
379			$toc = $TOC;
380		} else {
381			$meta = p_get_metadata($ID, '', METADATA_RENDER_USING_CACHE);
382			//$tocok = (isset($meta['internal']['toc']) ? $meta['internal']['toc'] : $tocok = true);
383			$toc = isset($meta['description']['tableofcontents']) ? $meta['description']['tableofcontents'] : null;
384		}
385		if (!$toc) { // no TOC, generate my own:
386			$hlCount = mt_rand(0, (int) $conf['tocminheads']);
387			$toc = array();
388			for ($i=0; $i<$hlCount; $i++) {
389				array_push($toc, $this->dadaMakeHeadline($words, $totalWeight)); // $toc
390			}
391		}
392
393		// if H1 heading is not in the TOC, add a chappeau section:
394		$chapeauCount = mt_rand(1, 3);
395		if ((int) $conf['toptoclevel'] > 1) {
396			echo "<div class=\"level1\">\n";
397			for ($i=0; $i<$chapeauCount; $i++) {
398				echo $this->dadaMakeParagraph($words, $totalWeight);
399			}
400			echo "</div>\n";
401		}
402
403		//  text sections for each sub-headline:
404		foreach ($toc as $hl) {
405			echo $this->dadaMakeSection($words, $totalWeight, $hl);
406		}
407	}
408
409	private function dadaMakeSection($words, $totalWeight, $hl) {
410
411		global $conf;
412
413		// how many paragraphs?
414		$paragraphCount = mt_rand(1, 4);
415
416		// section level
417		$topTocLevel = (int) $conf['toptoclevel'];
418		$secLevel = $hl['level'] + 1;;
419
420		// return value:
421		$sec = "";
422
423		// make a headline:
424		if ($topTocLevel > 1 || $secLevel > 1) {
425			$sec .= "<h{$secLevel} id=\"{$hl['hid']}\">{$hl['title']}</h{$secLevel}>\n";
426		}
427
428		// add the paragraphs:
429		$sec .= "<div class=\"level{$secLevel}\">\n";
430		for ($i=0; $i<$paragraphCount; $i++) {
431			$sec .= $this->dadaMakeParagraph($words, $totalWeight);
432		}
433		$sec .= "</div>\n";
434
435		return $sec;
436	}
437
438	private function dadaMakeHeadline($words, $totalWeight) {
439
440		// how many words to generate?
441		$wordCount = mt_rand(2, 5);
442
443		// function returns an array:
444		$r = Array();
445
446		// generate the headline:
447		$hlArr = array();
448		for ($i=0; $i<$wordCount; $i++) {
449			array_push($hlArr, $this->dadaSelectRandomWord($words, $totalWeight));
450		}
451
452		$r['title'] =  ucfirst(implode(' ', $hlArr));
453
454		$r['hid'] = preg_replace('/[^\w\d\-]+/i', '_', strtolower($r['title']));
455		$r['type'] = 'ul'; // always ul!
456		$r['level'] = 1; // always level 1 for now
457
458		return $r;
459	}
460
461	private function dadaMakeParagraph($words, $totalWeight) {
462
463		// how many words to generate?
464		$sentenceCount = mt_rand(2, 5);
465
466		$paragraph = array();
467		for ($i=0; $i<$sentenceCount; $i++) {
468			array_push($paragraph, $this->dadaMakeSentence($words, $totalWeight));
469		}
470
471		return "<p>\n" . implode(' ', $paragraph) . "\n</p>\n";
472
473	}
474
475	private function dadaMakeSentence($words, $totalWeight) {
476
477		// how many words to generate?
478		$wordCount = mt_rand(4, 20);
479
480		// generate the sentence:
481		$sentence = array();
482		for ($i=0; $i<$wordCount; $i++) {
483			array_push($sentence, $this->dadaSelectRandomWord($words, $totalWeight));
484		}
485
486		return ucfirst(implode(' ', $sentence)) . '.';
487
488	}
489
490	private function dadaSelectRandomWord($list, $totalWeight) {
491
492		// get a random selection:
493		$rand = mt_rand(0, $totalWeight);
494
495		// match the selection to the weighted list:
496		$cumulativeWeight = 0;
497		for ($i=0; $i<count($list); $i++) {
498			$cumulativeWeight += $list[$i][1];
499			if ($cumulativeWeight >= $rand) {
500				return $list[$i][0];
501			}
502		}
503		return '***';
504	}
505
506}