xref: /plugin/botmon/action.php (revision cdc02cd4c39f99df88e41b6048475b40280560ad)
1<?php
2
3use dokuwiki\Extension\EventHandler;
4use dokuwiki\Extension\Event;
5use dokuwiki\Logger;
6
7/**
8 * Action Component for the Bot Monitoring Plugin
9 *
10 * @license	GPL 3 (http://www.gnu.org/licenses/gpl.html)
11 * @author	 Sascha Leib <sascha.leib(at)kolmio.com>
12 */
13
14class action_plugin_botmon extends DokuWiki_Action_Plugin {
15
16	/**
17	 * Registers a callback functions
18	 *
19	 * @param EventHandler $controller DokuWiki's event controller object
20	 * @return void
21	 */
22	public function register(EventHandler $controller) {
23
24		global $ACT;
25
26		// insert header data into the page:
27		if ($ACT == 'show') {
28			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertHeader');
29		} else if ($ACT == 'admin' && isset($_REQUEST['page']) && $_REQUEST['page'] == 'botmon') {
30			$controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertAdminHeader');
31		}
32
33		// Override the page rendering, if a captcha needs to be displayed:
34		if ($ACT !== 'admin') {
35			$controller->register_hook('TPL_ACT_RENDER', 'BEFORE', $this, 'showCaptcha');
36		}
37
38		// write to the log after the page content was displayed:
39		$controller->register_hook('TPL_CONTENT_DISPLAY', 'AFTER', $this, 'writeServerLog');
40
41	}
42
43	/* session information */
44	private $sessionId = null;
45	private $sessionType = '';
46
47	/**
48	 * Inserts tracking code to the page header
49	 * (only called on 'show' actions)
50	 *
51	 * @param Event $event event object by reference
52	 * @return void
53	 */
54	public function insertHeader(Event $event, $param) {
55
56		global $INFO;
57
58		// populate the session id and type:
59		$this->getSessionInfo();
60
61		// is there a user logged in?
62		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name']) ?  $INFO['userinfo']['name'] : '');
63
64		// build the tracker code:
65		$code = "document._botmon = {t0: Date.now(), session: " . json_encode($this->sessionId) . ", seed: " . json_encode($this->getConf('captchaSeed')) . ", ip: " . json_encode($_SERVER['REMOTE_ADDR']) . "};" . NL;
66		if ($username) {
67			$code .= DOKU_TAB . DOKU_TAB . 'document._botmon.user = "' . $username . '";'. NL;
68		}
69
70		// add the deferred script loader::
71		$code .= DOKU_TAB . DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
72		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "const e=document.createElement('script');" . NL;
73		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.async=true;e.defer=true;" . NL;
74		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.src='".DOKU_BASE."lib/plugins/botmon/client.js';" . NL;
75		$code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(e);" . NL;
76		$code .= DOKU_TAB . DOKU_TAB . "});";
77		$event->data['script'][] = ['_data' => $code];
78	}
79
80	/**
81	 * Inserts tracking code to the page header
82	 * (only called on 'show' actions)
83	 *
84	 * @param Event $event event object by reference
85	 * @return void
86	 */
87	public function insertAdminHeader(Event $event, $param) {
88
89		$event->data['link'][] = ['rel' => 'stylesheet', 'href' => DOKU_BASE.'lib/plugins/botmon/admin.css', 'defer' => 'defer'];
90		$event->data['script'][] = ['src' => DOKU_BASE.'lib/plugins/botmon/admin.js', 'defer' => 'defer', '_data' => ''];
91	}
92
93	/**
94	 * Writes data to the server log.
95	 *
96	 * @return void
97	 */
98	public function writeServerLog(Event $event, $param) {
99
100		global $conf;
101		global $INFO;
102
103		// is there a user logged in?
104		$username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name'])
105					?  $INFO['userinfo']['name'] : '');
106
107		// clean the page ID
108		$pageId = preg_replace('/[\x00-\x1F]/', "\u{FFFD}", $INFO['id'] ?? '');
109
110		// create the log array:
111		$logArr = Array(
112			$_SERVER['REMOTE_ADDR'], /* remote IP */
113			$pageId, /* page ID */
114			$this->sessionId, /* Session ID */
115			$this->sessionType, /* session ID type */
116			$username, /* user name */
117			$_SERVER['HTTP_USER_AGENT'] ?? '', /* User agent */
118			$_SERVER['HTTP_REFERER'] ?? '', /* HTTP Referrer */
119			substr($conf['lang'],0,2), /* page language */
120			implode(',', array_unique(array_map( function($it) { return substr(trim($it),0,2); }, explode(',',trim($_SERVER['HTTP_ACCEPT_LANGUAGE'], " \t;,*"))))), /* accepted client languages */
121			$this->getCountryCode() /* GeoIP country code */
122		);
123
124		//* create the log line */
125		$filename = __DIR__ .'/logs/' . gmdate('Y-m-d') . '.srv.txt'; /* use GMT date for filename */
126		$logline = gmdate('Y-m-d H:i:s'); /* use GMT time for log entries */
127		foreach ($logArr as $tab) {
128			$logline .= "\t" . $tab;
129		};
130
131		/* write the log line to the file */
132		$logfile = fopen($filename, 'a');
133		if (!$logfile) die();
134		if (fwrite($logfile, $logline . "\n") === false) {
135			fclose($logfile);
136			die();
137		}
138
139		/* Done */
140		fclose($logfile);
141	}
142
143	private function getCountryCode() {
144
145		$country = ( $_SERVER['REMOTE_ADDR'] == '127.0.0.1' ? 'local' : 'ZZ' ); // default if no geoip is available!
146
147		$lib = $this->getConf('geoiplib'); /* which library to use? (can only be phpgeoip or disabled) */
148
149		try {
150
151			// use GeoIP module?
152			if ($lib == 'phpgeoip' && extension_loaded('geoip') && geoip_db_avail(GEOIP_COUNTRY_EDITION)) { // Use PHP GeoIP module
153				$result = geoip_country_code_by_name($_SERVER['REMOTE_ADDR']);
154				$country = ($result ? $result : $country);
155			}
156		} catch (Exception $e) {
157			Logger::error('BotMon Plugin: GeoIP Error', $e->getMessage());
158		}
159
160		return $country;
161	}
162
163	private function getSessionInfo() {
164
165		// what is the session identifier?
166		if (isset($_SESSION)) {
167			$sesKeys = array_keys($_SESSION); /* DokuWiki Session ID preferred */
168			foreach ($sesKeys as $key) {
169				if (substr($key, 0, 2) == 'DW') {
170					$this->sessionId = $key;
171					$this->sessionType = 'dw';
172					return;
173				}
174			}
175		}
176		if (!$this->sessionId) { /* no DokuWiki Session ID, try PHP session ID */
177			$this->sessionId = session_id();
178			$this->sessionType = 'php';
179		}
180		if (!$this->sessionId) { /* no PHP session ID, try IP address */
181			$this->sessionId = $_SERVER['REMOTE_ADDR'];
182			$this->sessionType = 'ip';
183		}
184		if (!$this->sessionId) { /* if everything else fails, just us a random ID */
185			$this->sessionId = rand(1000000, 9999999);
186			$this->sessionType = 'rand';
187		}
188	}
189
190	public function showCaptcha(Event $event) {
191
192		$useCaptcha = $this->getConf('useCaptcha');
193
194		if ($useCaptcha !== 'disabled' && $this->checkCaptchaCookie()) {
195			echo '<h1 class="sectionedit1">'; tpl_pagetitle(); echo "</h1>\n"; // always show the original page title
196			$event->preventDefault(); // don't show normal content
197			switch ($useCaptcha) {
198				case 'blank':
199					$this->insertBlankBox();  // show dada filler instead of text
200					break;
201				case 'dada':
202					$this->insertDadaFiller();  // show dada filler instead of text
203					break;
204			}
205			$this->insertCaptchaLoader(); // and load the captcha
206		}
207	}
208
209	private function checkCaptchaCookie() {
210
211		$cookieVal = isset($_COOKIE['DWConfirm']) ? $_COOKIE['DWConfirm'] : null;
212
213		$today = substr((new DateTime())->format('c'), 0, 10);
214
215		$raw = $this->getConf('captchaSeed') . '|' . $_SERVER['SERVER_NAME'] . '|' . $_SERVER['REMOTE_ADDR'] . '|' . $today;
216		$expected = hash('sha256', $raw);
217
218		//echo '<ul><li>cookie: ' . $cookieVal . '</li><li>expected: ' . $expected . '</li><li>matches: ' .($cookieVal == $expected ? 'true' : 'false') . '</li></ul>';
219
220		return $cookieVal !== $expected;
221	}
222
223	private function insertCaptchaLoader() {
224		echo '<script>' . NL;
225
226		// add the deferred script loader::
227		echo  DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL;
228		echo  DOKU_TAB . DOKU_TAB . "const cj=document.createElement('script');" . NL;
229		echo  DOKU_TAB . DOKU_TAB . "cj.async=true;cj.defer=true;cj.type='text/javascript';" . NL;
230		echo  DOKU_TAB . DOKU_TAB . "cj.src='".DOKU_BASE."lib/plugins/botmon/captcha.js';" . NL;
231		echo  DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(cj);" . NL;
232		echo  DOKU_TAB . "});";
233		echo '</script>' . NL;
234
235	}
236
237	// inserts a blank box to ensure there is enough space for the captcha:
238	private function insertBlankBox() {
239
240		echo '<p style="min-height: 100px;">&nbsp;</p>';
241	}
242
243	/* Generates a few paragraphs of Dada text to show instead of the article content */
244	private function insertDadaFiller() {
245
246		global $conf;
247		global $TOC;
248		global $ID;
249
250		// list of languages to search for the wordlist
251		$langs = array_unique([$conf['lang'], 'la']);
252
253		// find path to the first available wordlist:
254		foreach ($langs as $lang) {
255			$filename = __DIR__ .'/lang/' . $lang . '/wordlist.txt'; /* language-specific wordlist */
256			if (file_exists($filename)) {
257				break;
258			}
259		}
260
261		// load the wordlist file:
262		if (file_exists($filename)) {
263			$words = array();
264			$totalWeight = 0;
265			$lines = file($filename, FILE_SKIP_EMPTY_LINES);
266			foreach ($lines as $line) {
267				$arr = explode("\t", $line);
268				$arr[1] = ( count($arr) > 1 ? (int) trim($arr[1]) : 1 );
269				$totalWeight += (int) $arr[1];
270				array_push($words, $arr);
271			}
272		} else {
273			echo '<script> console.log("Can’t generate filler text: wordlist file not found!"); </script>';
274			return;
275		}
276
277		// If a TOC exists, use it for the headlines:
278		if(is_array($TOC)) {
279			$toc = $TOC;
280		} else {
281			$meta = p_get_metadata($ID, '', METADATA_RENDER_USING_CACHE);
282			//$tocok = (isset($meta['internal']['toc']) ? $meta['internal']['toc'] : $tocok = true);
283			$toc = isset($meta['description']['tableofcontents']) ? $meta['description']['tableofcontents'] : null;
284		}
285		if (!$toc) { // no TOC, generate my own:
286			$hlCount = mt_rand(0, (int) $conf['tocminheads']);
287			$toc = array();
288			for ($i=0; $i<$hlCount; $i++) {
289				array_push($toc, $this->dadaMakeHeadline($words, $totalWeight)); // $toc
290			}
291		}
292
293		// if H1 heading is not in the TOC, add a chappeau section:
294		$chapeauCount = mt_rand(1, 3);
295		if ((int) $conf['toptoclevel'] > 1) {
296			echo "<div class=\"level1\">\n";
297			for ($i=0; $i<$chapeauCount; $i++) {
298				echo $this->dadaMakeParagraph($words, $totalWeight);
299			}
300			echo "</div>\n";
301		}
302
303		//  text sections for each sub-headline:
304		foreach ($toc as $hl) {
305			echo $this->dadaMakeSection($words, $totalWeight, $hl);
306		}
307	}
308
309	private function dadaMakeSection($words, $totalWeight, $hl) {
310
311		global $conf;
312
313		// how many paragraphs?
314		$paragraphCount = mt_rand(1, 4);
315
316		// section level
317		$topTocLevel = (int) $conf['toptoclevel'];
318		$secLevel = $hl['level'] + 1;;
319
320		// return value:
321		$sec = "";
322
323		// make a headline:
324		if ($topTocLevel > 1 || $secLevel > 1) {
325			$sec .= "<h{$secLevel} id=\"{$hl['hid']}\">{$hl['title']}</h{$secLevel}>\n";
326		}
327
328		// add the paragraphs:
329		$sec .= "<div class=\"level{$secLevel}\">\n";
330		for ($i=0; $i<$paragraphCount; $i++) {
331			$sec .= $this->dadaMakeParagraph($words, $totalWeight);
332		}
333		$sec .= "</div>\n";
334
335		return $sec;
336	}
337
338	private function dadaMakeHeadline($words, $totalWeight) {
339
340		// how many words to generate?
341		$wordCount = mt_rand(2, 5);
342
343		// function returns an array:
344		$r = Array();
345
346		// generate the headline:
347		$hlArr = array();
348		for ($i=0; $i<$wordCount; $i++) {
349			array_push($hlArr, $this->dadaSelectRandomWord($words, $totalWeight));
350		}
351
352		$r['title'] =  ucfirst(implode(' ', $hlArr));
353
354		$r['hid'] = preg_replace('/[^\w\d\-]+/i', '_', strtolower($r['title']));
355		$r['type'] = 'ul'; // always ul!
356		$r['level'] = 1; // always level 1 for now
357
358		return $r;
359	}
360
361	private function dadaMakeParagraph($words, $totalWeight) {
362
363		// how many words to generate?
364		$sentenceCount = mt_rand(2, 5);
365
366		$paragraph = array();
367		for ($i=0; $i<$sentenceCount; $i++) {
368			array_push($paragraph, $this->dadaMakeSentence($words, $totalWeight));
369		}
370
371		return "<p>\n" . implode(' ', $paragraph) . "\n</p>\n";
372
373	}
374
375	private function dadaMakeSentence($words, $totalWeight) {
376
377		// how many words to generate?
378		$wordCount = mt_rand(4, 20);
379
380		// generate the sentence:
381		$sentence = array();
382		for ($i=0; $i<$wordCount; $i++) {
383			array_push($sentence, $this->dadaSelectRandomWord($words, $totalWeight));
384		}
385
386		return ucfirst(implode(' ', $sentence)) . '.';
387
388	}
389
390	private function dadaSelectRandomWord($list, $totalWeight) {
391
392		// get a random selection:
393		$rand = mt_rand(0, $totalWeight);
394
395		// match the selection to the weighted list:
396		$cumulativeWeight = 0;
397		for ($i=0; $i<count($list); $i++) {
398			$cumulativeWeight += $list[$i][1];
399			if ($cumulativeWeight >= $rand) {
400				return $list[$i][0];
401			}
402		}
403		return '***';
404	}
405
406}