xref: /plugin/botmon/action.php (revision 2c641262223e62a95e87ba791a84b69fdaf4310b)
1<?php
2
3use dokuwiki\Extension\EventHandler;
4use dokuwiki\Extension\Event;
5use dokuwiki\Logger;
6
7/**
8 * Action Component for the Bot Monitoring Plugin
9 *
10 * @license	GPL 3 (http://www.gnu.org/licenses/gpl.html)
11 * @author	 Sascha Leib <sascha.leib(at)kolmio.com>
12 */
13
14class action_plugin_botmon extends DokuWiki_Action_Plugin {
15
16	/**
17	 * Registers a callback functions
18	 *
19	 * @param EventHandler $controller DokuWiki's event controller object
20	 * @return void
21	 */
22	public function register(EventHandler $controller) {
23
24		global $ACT;
25
26		// insert header data into the page:
27		if ($ACT == 'show') {
28			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertHeader');
29		} else if ($ACT == 'admin' && isset($_REQUEST['page']) && $_REQUEST['page'] == 'botmon') {
30			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertAdminHeader');
31		}
32
33		// Override the page rendering, if a captcha needs to be displayed:
34		if ($ACT !== 'admin') {
35			$controller->register_hook('TPL_ACT_RENDER', 'BEFORE', $this, 'showCaptcha');
36		}
37
38		// write to the log after the page content was displayed:
39		$controller->register_hook('TPL_CONTENT_DISPLAY', 'AFTER', $this, 'writeServerLog');
40
41	}
42
43	/* session information */
44	private $sessionId = null;
45	private $sessionType = '';
46	private $showCaptcha = '-';
47
48	/**
49	 * Inserts tracking code to the page header
50	 * (only called on 'show' actions)
51	 *
52	 * @param Event $event event object by reference
53	 * @return void
54	 */
55	public function insertHeader(Event $event, $param) {
56
57		global $INFO;
58
59		// populate the session id and type:
60		$this->getSessionInfo();
61
62		// is there a user logged in?
63		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name']) ?  $INFO['userinfo']['name'] : '');
64
65		// build the tracker code:
66		$code = "document._botmon = {t0: Date.now(), session: " . json_encode($this->sessionId) . ", seed: " . json_encode($this->getConf('captchaSeed')) . ", ip: " . json_encode($_SERVER['REMOTE_ADDR']) . "};" . NL;
67		if ($username) {
68			$code .= DOKU_TAB . DOKU_TAB . 'document._botmon.user = "' . $username . '";'. NL;
69		}
70
71		// add the deferred script loader::
72		$code .= DOKU_TAB . DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
73		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "const e=document.createElement('script');" . NL;
74		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.async=true;e.defer=true;" . NL;
75		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.src='".DOKU_BASE."lib/plugins/botmon/client.js';" . NL;
76		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(e);" . NL;
77		$code .= DOKU_TAB . DOKU_TAB . "});";
78		$event->data['script'][] = ['_data' => $code];
79	}
80
81	/**
82	 * Inserts tracking code to the page header
83	 * (only called on 'show' actions)
84	 *
85	 * @param Event $event event object by reference
86	 * @return void
87	 */
88	public function insertAdminHeader(Event $event, $param) {
89
90		$event->data['link'][] = ['rel' => 'stylesheet', 'href' => DOKU_BASE.'lib/plugins/botmon/admin.css', 'defer' => 'defer'];
91		$event->data['script'][] = ['src' => DOKU_BASE.'lib/plugins/botmon/admin.js', 'defer' => 'defer', '_data' => ''];
92	}
93
94	/**
95	 * Writes data to the server log.
96	 *
97	 * @return void
98	 */
99	public function writeServerLog(Event $event, $param) {
100
101		global $conf;
102		global $INFO;
103
104		// is there a user logged in?
105		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name'])
106					?  $INFO['userinfo']['name'] : '');
107
108		// clean the page ID
109		$pageId = preg_replace('/[\x00-\x1F]/', "\u{FFFD}", $INFO['id'] ?? '');
110
111		// create the log array:
112		$logArr = Array(
113			$_SERVER['REMOTE_ADDR'], /* remote IP */
114			$pageId, /* page ID */
115			$this->sessionId, /* Session ID */
116			$this->sessionType, /* session ID type */
117			$username, /* user name */
118			$_SERVER['HTTP_USER_AGENT'] ?? '', /* User agent */
119			$_SERVER['HTTP_REFERER'] ?? '', /* HTTP Referrer */
120			substr($conf['lang'],0,2), /* page language */
121			implode(',', array_unique(array_map( function($it) { return substr(trim($it),0,2); }, explode(',',trim($_SERVER['HTTP_ACCEPT_LANGUAGE'], " \t;,*"))))), /* accepted client languages */
122			$this->getCountryCode(), /* GeoIP country code */
123			$this->showCaptcha /* show captcha? */		);
124
125		//* create the log line */
126		$filename = __DIR__ .'/logs/' . gmdate('Y-m-d') . '.srv.txt'; /* use GMT date for filename */
127		$logline = gmdate('Y-m-d H:i:s'); /* use GMT time for log entries */
128		foreach ($logArr as $tab) {
129			$logline .= "\t" . $tab;
130		};
131
132		/* write the log line to the file */
133		$logfile = fopen($filename, 'a');
134		if (!$logfile) die();
135		if (fwrite($logfile, $logline . "\n") === false) {
136			fclose($logfile);
137			die();
138		}
139
140		/* Done */
141		fclose($logfile);
142	}
143
144	private function getCountryCode() {
145
146		$country = ( $_SERVER['REMOTE_ADDR'] == '127.0.0.1' ? 'local' : 'ZZ' ); // default if no geoip is available!
147
148		$lib = $this->getConf('geoiplib'); /* which library to use? (can only be phpgeoip or disabled) */
149
150		try {
151
152			// use GeoIP module?
153			if ($lib == 'phpgeoip' && extension_loaded('geoip') && geoip_db_avail(GEOIP_COUNTRY_EDITION)) { // Use PHP GeoIP module
154				$result = geoip_country_code_by_name($_SERVER['REMOTE_ADDR']);
155				$country = ($result ? $result : $country);
156			}
157		} catch (Exception $e) {
158			Logger::error('BotMon Plugin: GeoIP Error', $e->getMessage());
159		}
160
161		return $country;
162	}
163
164	private function getSessionInfo() {
165
166		// what is the session identifier?
167		if (isset($_SESSION)) {
168			$sesKeys = array_keys($_SESSION); /* DokuWiki Session ID preferred */
169			foreach ($sesKeys as $key) {
170				if (substr($key, 0, 2) == 'DW') {
171					$this->sessionId = $key;
172					$this->sessionType = 'dw';
173					return;
174				}
175			}
176		}
177		if (!$this->sessionId) { /* no DokuWiki Session ID, try PHP session ID */
178			$this->sessionId = session_id();
179			$this->sessionType = 'php';
180		}
181		if (!$this->sessionId) { /* no PHP session ID, try IP address */
182			$this->sessionId = $_SERVER['REMOTE_ADDR'];
183			$this->sessionType = 'ip';
184		}
185		if (!$this->sessionId) { /* if everything else fails, just us a random ID */
186			$this->sessionId = rand(1000000, 9999999);
187			$this->sessionType = 'rand';
188		}
189	}
190
191	public function showCaptcha(Event $event) {
192
193		$useCaptcha = $this->getConf('useCaptcha');
194
195		if ($useCaptcha !== 'disabled' && $this->checkCaptchaCookie() && !$this->captchaWhitelisted()) {
196
197			$this->showCaptcha = 'Y'; // captcha will be shown.
198
199			echo '<h1 class="sectionedit1">'; tpl_pagetitle(); echo "</h1>\n"; // always show the original page title
200			$event->preventDefault(); // don't show normal content
201			switch ($useCaptcha) {
202				case 'blank':
203					$this->insertBlankBox();  // show dada filler instead of text
204					break;
205				case 'dada':
206					$this->insertDadaFiller();  // show dada filler instead of text
207					break;
208			}
209			$this->insertCaptchaLoader(); // and load the captcha
210		} else {
211			$this->showCaptcha = 'N'; // do not show a captcha
212		}
213	}
214
215	private function checkCaptchaCookie() {
216
217		$cookieVal = isset($_COOKIE['DWConfirm']) ? $_COOKIE['DWConfirm'] : null;
218
219		$today = substr((new DateTime())->format('c'), 0, 10);
220
221		$raw = $this->getConf('captchaSeed') . '|' . $_SERVER['SERVER_NAME'] . '|' . $_SERVER['REMOTE_ADDR'] . '|' . $today;
222		$expected = hash('sha256', $raw);
223
224		//echo '<ul><li>cookie: ' . $cookieVal . '</li><li>expected: ' . $expected . '</li><li>matches: ' .($cookieVal == $expected ? 'true' : 'false') . '</li></ul>';
225
226		return $cookieVal !== $expected;
227	}
228
229	// check if the visitor's IP is on a whitelist:
230	private function captchaWhitelisted() {
231
232		// normalise IP address:
233		$ip = inet_pton($_SERVER['REMOTE_ADDR']);
234
235		// find which file to open:
236		$prefixes = ['user', 'default'];
237		foreach ($prefixes as $pre) {
238			$filename = __DIR__ .'/config/' . $pre . '-whitelist.txt';
239			if (file_exists($filename)) {
240				break;
241			}
242		}
243
244		if (file_exists($filename)) {
245			$lines = file($filename, FILE_SKIP_EMPTY_LINES);
246			foreach ($lines as $line) {
247				if (trim($line) !== '' && !str_starts_with($line, '#')) {
248					$col = explode("\t", $line);
249					if (count($col) >= 2) {
250						$from = inet_pton($col[0]);
251						$to = inet_pton($col[1]);
252
253						if ($ip >= $from && $ip <= $to) {
254							//echo "<p>Found my IP in range: " . $col[0] . " - " . $col[1] . "</p>";
255							return true;
256						}
257					}
258				}
259			}
260		}
261
262		return false;
263	}
264
265	private function insertCaptchaLoader() {
266		echo '<script>' . NL;
267
268		// add the deferred script loader::
269		echo  DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
270		echo  DOKU_TAB . DOKU_TAB . "const cj=document.createElement('script');" . NL;
271		echo  DOKU_TAB . DOKU_TAB . "cj.async=true;cj.defer=true;cj.type='text/javascript';" . NL;
272		echo  DOKU_TAB . DOKU_TAB . "cj.src='".DOKU_BASE."lib/plugins/botmon/captcha.js';" . NL;
273		echo  DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(cj);" . NL;
274		echo  DOKU_TAB . "});";
275		echo '</script>' . NL;
276
277	}
278
279	// inserts a blank box to ensure there is enough space for the captcha:
280	private function insertBlankBox() {
281
282		echo '<p style="min-height: 100px;">&nbsp;</p>';
283	}
284
285	/* Generates a few paragraphs of Dada text to show instead of the article content */
286	private function insertDadaFiller() {
287
288		global $conf;
289		global $TOC;
290		global $ID;
291
292		// list of languages to search for the wordlist
293		$langs = array_unique([$conf['lang'], 'la']);
294
295		// find path to the first available wordlist:
296		foreach ($langs as $lang) {
297			$filename = __DIR__ .'/lang/' . $lang . '/wordlist.txt'; /* language-specific wordlist */
298			if (file_exists($filename)) {
299				break;
300			}
301		}
302
303		// load the wordlist file:
304		if (file_exists($filename)) {
305			$words = array();
306			$totalWeight = 0;
307			$lines = file($filename, FILE_SKIP_EMPTY_LINES);
308			foreach ($lines as $line) {
309				$arr = explode("\t", $line);
310				$arr[1] = ( count($arr) > 1 ? (int) trim($arr[1]) : 1 );
311				$totalWeight += (int) $arr[1];
312				array_push($words, $arr);
313			}
314		} else {
315			echo '<script> console.log("Can’t generate filler text: wordlist file not found!"); </script>';
316			return;
317		}
318
319		// If a TOC exists, use it for the headlines:
320		if(is_array($TOC)) {
321			$toc = $TOC;
322		} else {
323			$meta = p_get_metadata($ID, '', METADATA_RENDER_USING_CACHE);
324			//$tocok = (isset($meta['internal']['toc']) ? $meta['internal']['toc'] : $tocok = true);
325			$toc = isset($meta['description']['tableofcontents']) ? $meta['description']['tableofcontents'] : null;
326		}
327		if (!$toc) { // no TOC, generate my own:
328			$hlCount = mt_rand(0, (int) $conf['tocminheads']);
329			$toc = array();
330			for ($i=0; $i<$hlCount; $i++) {
331				array_push($toc, $this->dadaMakeHeadline($words, $totalWeight)); // $toc
332			}
333		}
334
335		// if H1 heading is not in the TOC, add a chappeau section:
336		$chapeauCount = mt_rand(1, 3);
337		if ((int) $conf['toptoclevel'] > 1) {
338			echo "<div class=\"level1\">\n";
339			for ($i=0; $i<$chapeauCount; $i++) {
340				echo $this->dadaMakeParagraph($words, $totalWeight);
341			}
342			echo "</div>\n";
343		}
344
345		//  text sections for each sub-headline:
346		foreach ($toc as $hl) {
347			echo $this->dadaMakeSection($words, $totalWeight, $hl);
348		}
349	}
350
351	private function dadaMakeSection($words, $totalWeight, $hl) {
352
353		global $conf;
354
355		// how many paragraphs?
356		$paragraphCount = mt_rand(1, 4);
357
358		// section level
359		$topTocLevel = (int) $conf['toptoclevel'];
360		$secLevel = $hl['level'] + 1;;
361
362		// return value:
363		$sec = "";
364
365		// make a headline:
366		if ($topTocLevel > 1 || $secLevel > 1) {
367			$sec .= "<h{$secLevel} id=\"{$hl['hid']}\">{$hl['title']}</h{$secLevel}>\n";
368		}
369
370		// add the paragraphs:
371		$sec .= "<div class=\"level{$secLevel}\">\n";
372		for ($i=0; $i<$paragraphCount; $i++) {
373			$sec .= $this->dadaMakeParagraph($words, $totalWeight);
374		}
375		$sec .= "</div>\n";
376
377		return $sec;
378	}
379
380	private function dadaMakeHeadline($words, $totalWeight) {
381
382		// how many words to generate?
383		$wordCount = mt_rand(2, 5);
384
385		// function returns an array:
386		$r = Array();
387
388		// generate the headline:
389		$hlArr = array();
390		for ($i=0; $i<$wordCount; $i++) {
391			array_push($hlArr, $this->dadaSelectRandomWord($words, $totalWeight));
392		}
393
394		$r['title'] =  ucfirst(implode(' ', $hlArr));
395
396		$r['hid'] = preg_replace('/[^\w\d\-]+/i', '_', strtolower($r['title']));
397		$r['type'] = 'ul'; // always ul!
398		$r['level'] = 1; // always level 1 for now
399
400		return $r;
401	}
402
403	private function dadaMakeParagraph($words, $totalWeight) {
404
405		// how many words to generate?
406		$sentenceCount = mt_rand(2, 5);
407
408		$paragraph = array();
409		for ($i=0; $i<$sentenceCount; $i++) {
410			array_push($paragraph, $this->dadaMakeSentence($words, $totalWeight));
411		}
412
413		return "<p>\n" . implode(' ', $paragraph) . "\n</p>\n";
414
415	}
416
417	private function dadaMakeSentence($words, $totalWeight) {
418
419		// how many words to generate?
420		$wordCount = mt_rand(4, 20);
421
422		// generate the sentence:
423		$sentence = array();
424		for ($i=0; $i<$wordCount; $i++) {
425			array_push($sentence, $this->dadaSelectRandomWord($words, $totalWeight));
426		}
427
428		return ucfirst(implode(' ', $sentence)) . '.';
429
430	}
431
432	private function dadaSelectRandomWord($list, $totalWeight) {
433
434		// get a random selection:
435		$rand = mt_rand(0, $totalWeight);
436
437		// match the selection to the weighted list:
438		$cumulativeWeight = 0;
439		for ($i=0; $i<count($list); $i++) {
440			$cumulativeWeight += $list[$i][1];
441			if ($cumulativeWeight >= $rand) {
442				return $list[$i][0];
443			}
444		}
445		return '***';
446	}
447
448}