1<?php 2 3use dokuwiki\Extension\EventHandler; 4use dokuwiki\Extension\Event; 5use dokuwiki\Logger; 6 7/** 8 * Action Component for the Bot Monitoring Plugin 9 * 10 * @license GPL 3 (http://www.gnu.org/licenses/gpl.html) 11 * @author Sascha Leib <sascha.leib(at)kolmio.com> 12 */ 13 14class action_plugin_botmon extends DokuWiki_Action_Plugin { 15 16 /** 17 * Registers a callback functions 18 * 19 * @param EventHandler $controller DokuWiki's event controller object 20 * @return void 21 */ 22 public function register(EventHandler $controller) { 23 24 global $ACT; 25 26 // insert header data into the page: 27 if ($ACT == 'show') { 28 $controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertHeader'); 29 } else if ($ACT == 'admin' && isset($_REQUEST['page']) && $_REQUEST['page'] == 'botmon') { 30 $controller->register_hook('TPL_METAHEADER_OUTPUT', 'BEFORE', $this, 'insertAdminHeader'); 31 } 32 33 // Override the page rendering, if a captcha needs to be displayed: 34 if ($ACT !== 'admin') { 35 $controller->register_hook('TPL_ACT_RENDER', 'BEFORE', $this, 'showCaptcha'); 36 } 37 38 // write to the log after the page content was displayed: 39 $controller->register_hook('TPL_CONTENT_DISPLAY', 'AFTER', $this, 'writeServerLog'); 40 41 } 42 43 /* session information */ 44 private $sessionId = null; 45 private $sessionType = ''; 46 private $showCaptcha = '-'; 47 48 /** 49 * Inserts tracking code to the page header 50 * (only called on 'show' actions) 51 * 52 * @param Event $event event object by reference 53 * @return void 54 */ 55 public function insertHeader(Event $event, $param) { 56 57 global $INFO; 58 59 // populate the session id and type: 60 $this->getSessionInfo(); 61 62 // is there a user logged in? 63 $username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name']) ? $INFO['userinfo']['name'] : ''); 64 65 // build the tracker code: 66 $code = "document._botmon = {t0: Date.now(), session: " . json_encode($this->sessionId) . ", seed: " . json_encode($this->getConf('captchaSeed')) . ", ip: " . json_encode($_SERVER['REMOTE_ADDR']) . "};" . NL; 67 if ($username) { 68 $code .= DOKU_TAB . DOKU_TAB . 'document._botmon.user = "' . $username . '";'. NL; 69 } 70 71 // add the deferred script loader:: 72 $code .= DOKU_TAB . DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL; 73 $code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "const e=document.createElement('script');" . NL; 74 $code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.async=true;e.defer=true;" . NL; 75 $code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "e.src='".DOKU_BASE."lib/plugins/botmon/client.js';" . NL; 76 $code .= DOKU_TAB . DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(e);" . NL; 77 $code .= DOKU_TAB . DOKU_TAB . "});"; 78 $event->data['script'][] = ['_data' => $code]; 79 } 80 81 /** 82 * Inserts tracking code to the page header 83 * (only called on 'show' actions) 84 * 85 * @param Event $event event object by reference 86 * @return void 87 */ 88 public function insertAdminHeader(Event $event, $param) { 89 90 $event->data['link'][] = ['rel' => 'stylesheet', 'href' => DOKU_BASE.'lib/plugins/botmon/admin.css', 'defer' => 'defer']; 91 $event->data['script'][] = ['src' => DOKU_BASE.'lib/plugins/botmon/admin.js', 'defer' => 'defer', '_data' => '']; 92 } 93 94 /** 95 * Writes data to the server log. 96 * 97 * @return void 98 */ 99 public function writeServerLog(Event $event, $param) { 100 101 global $conf; 102 global $INFO; 103 104 // is there a user logged in? 105 $username = ( !empty($INFO['userinfo']) && !empty($INFO['userinfo']['name']) 106 ? $INFO['userinfo']['name'] : ''); 107 108 // clean the page ID 109 $pageId = preg_replace('/[\x00-\x1F]/', "\u{FFFD}", $INFO['id'] ?? ''); 110 111 // create the log array: 112 $logArr = Array( 113 $_SERVER['REMOTE_ADDR'], /* remote IP */ 114 $pageId, /* page ID */ 115 $this->sessionId, /* Session ID */ 116 $this->sessionType, /* session ID type */ 117 $username, /* user name */ 118 $_SERVER['HTTP_USER_AGENT'] ?? '', /* User agent */ 119 $_SERVER['HTTP_REFERER'] ?? '', /* HTTP Referrer */ 120 substr($conf['lang'],0,2), /* page language */ 121 implode(',', array_unique(array_map( function($it) { return substr(trim($it),0,2); }, explode(',',trim($_SERVER['HTTP_ACCEPT_LANGUAGE'], " \t;,*"))))), /* accepted client languages */ 122 $this->getCountryCode(), /* GeoIP country code */ 123 $this->showCaptcha /* show captcha? */ ); 124 125 //* create the log line */ 126 $filename = __DIR__ .'/logs/' . gmdate('Y-m-d') . '.srv.txt'; /* use GMT date for filename */ 127 $logline = gmdate('Y-m-d H:i:s'); /* use GMT time for log entries */ 128 foreach ($logArr as $tab) { 129 $logline .= "\t" . $tab; 130 }; 131 132 /* write the log line to the file */ 133 $logfile = fopen($filename, 'a'); 134 if (!$logfile) die(); 135 if (fwrite($logfile, $logline . "\n") === false) { 136 fclose($logfile); 137 die(); 138 } 139 140 /* Done */ 141 fclose($logfile); 142 } 143 144 private function getCountryCode() { 145 146 $country = ( $_SERVER['REMOTE_ADDR'] == '127.0.0.1' ? 'local' : 'ZZ' ); // default if no geoip is available! 147 148 $lib = $this->getConf('geoiplib'); /* which library to use? (can only be phpgeoip or disabled) */ 149 150 try { 151 152 // use GeoIP module? 153 if ($lib == 'phpgeoip' && extension_loaded('geoip') && geoip_db_avail(GEOIP_COUNTRY_EDITION)) { // Use PHP GeoIP module 154 $result = geoip_country_code_by_name($_SERVER['REMOTE_ADDR']); 155 $country = ($result ? $result : $country); 156 } 157 } catch (Exception $e) { 158 Logger::error('BotMon Plugin: GeoIP Error', $e->getMessage()); 159 } 160 161 return $country; 162 } 163 164 private function getSessionInfo() { 165 166 // what is the session identifier? 167 if (isset($_SESSION)) { 168 $sesKeys = array_keys($_SESSION); /* DokuWiki Session ID preferred */ 169 foreach ($sesKeys as $key) { 170 if (substr($key, 0, 2) == 'DW') { 171 $this->sessionId = $key; 172 $this->sessionType = 'dw'; 173 return; 174 } 175 } 176 } 177 if (!$this->sessionId) { /* no DokuWiki Session ID, try PHP session ID */ 178 $this->sessionId = session_id(); 179 $this->sessionType = 'php'; 180 } 181 if (!$this->sessionId) { /* no PHP session ID, try IP address */ 182 $this->sessionId = $_SERVER['REMOTE_ADDR']; 183 $this->sessionType = 'ip'; 184 } 185 if (!$this->sessionId) { /* if everything else fails, just us a random ID */ 186 $this->sessionId = rand(1000000, 9999999); 187 $this->sessionType = 'rand'; 188 } 189 } 190 191 public function showCaptcha(Event $event) { 192 193 $useCaptcha = $this->getConf('useCaptcha'); 194 195 if ($useCaptcha !== 'disabled' && $this->checkCaptchaCookie() && !$this->captchaWhitelisted()) { 196 197 $this->showCaptcha = 'Y'; // captcha will be shown. 198 199 echo '<h1 class="sectionedit1">'; tpl_pagetitle(); echo "</h1>\n"; // always show the original page title 200 $event->preventDefault(); // don't show normal content 201 switch ($useCaptcha) { 202 case 'blank': 203 $this->insertBlankBox(); // show dada filler instead of text 204 break; 205 case 'dada': 206 $this->insertDadaFiller(); // show dada filler instead of text 207 break; 208 } 209 $this->insertCaptchaLoader(); // and load the captcha 210 } else { 211 $this->showCaptcha = 'N'; // do not show a captcha 212 } 213 } 214 215 private function checkCaptchaCookie() { 216 217 $cookieVal = isset($_COOKIE['DWConfirm']) ? $_COOKIE['DWConfirm'] : null; 218 219 $today = substr((new DateTime())->format('c'), 0, 10); 220 221 $raw = $this->getConf('captchaSeed') . '|' . $_SERVER['SERVER_NAME'] . '|' . $_SERVER['REMOTE_ADDR'] . '|' . $today; 222 $expected = hash('sha256', $raw); 223 224 //echo '<ul><li>cookie: ' . $cookieVal . '</li><li>expected: ' . $expected . '</li><li>matches: ' .($cookieVal == $expected ? 'true' : 'false') . '</li></ul>'; 225 226 return $cookieVal !== $expected; 227 } 228 229 // check if the visitor's IP is on a whitelist: 230 private function captchaWhitelisted() { 231 232 // normalise IP address: 233 $ip = inet_pton($_SERVER['REMOTE_ADDR']); 234 235 // find which file to open: 236 $prefixes = ['user', 'default']; 237 foreach ($prefixes as $pre) { 238 $filename = __DIR__ .'/config/' . $pre . '-whitelist.txt'; 239 if (file_exists($filename)) { 240 break; 241 } 242 } 243 244 if (file_exists($filename)) { 245 $lines = file($filename, FILE_SKIP_EMPTY_LINES); 246 foreach ($lines as $line) { 247 if (trim($line) !== '' && !str_starts_with($line, '#')) { 248 $col = explode("\t", $line); 249 if (count($col) >= 2) { 250 $from = inet_pton($col[0]); 251 $to = inet_pton($col[1]); 252 253 if ($ip >= $from && $ip <= $to) { 254 //echo "<p>Found my IP in range: " . $col[0] . " - " . $col[1] . "</p>"; 255 return true; 256 } 257 } 258 } 259 } 260 } 261 262 return false; 263 } 264 265 private function insertCaptchaLoader() { 266 echo '<script>' . NL; 267 268 // add the deferred script loader:: 269 echo DOKU_TAB . "addEventListener('DOMContentLoaded', function(){" . NL; 270 echo DOKU_TAB . DOKU_TAB . "const cj=document.createElement('script');" . NL; 271 echo DOKU_TAB . DOKU_TAB . "cj.async=true;cj.defer=true;cj.type='text/javascript';" . NL; 272 echo DOKU_TAB . DOKU_TAB . "cj.src='".DOKU_BASE."lib/plugins/botmon/captcha.js';" . NL; 273 echo DOKU_TAB . DOKU_TAB . "document.getElementsByTagName('head')[0].appendChild(cj);" . NL; 274 echo DOKU_TAB . "});"; 275 echo '</script>' . NL; 276 277 } 278 279 // inserts a blank box to ensure there is enough space for the captcha: 280 private function insertBlankBox() { 281 282 echo '<p style="min-height: 100px;"> </p>'; 283 } 284 285 /* Generates a few paragraphs of Dada text to show instead of the article content */ 286 private function insertDadaFiller() { 287 288 global $conf; 289 global $TOC; 290 global $ID; 291 292 // list of languages to search for the wordlist 293 $langs = array_unique([$conf['lang'], 'la']); 294 295 // find path to the first available wordlist: 296 foreach ($langs as $lang) { 297 $filename = __DIR__ .'/lang/' . $lang . '/wordlist.txt'; /* language-specific wordlist */ 298 if (file_exists($filename)) { 299 break; 300 } 301 } 302 303 // load the wordlist file: 304 if (file_exists($filename)) { 305 $words = array(); 306 $totalWeight = 0; 307 $lines = file($filename, FILE_SKIP_EMPTY_LINES); 308 foreach ($lines as $line) { 309 $arr = explode("\t", $line); 310 $arr[1] = ( count($arr) > 1 ? (int) trim($arr[1]) : 1 ); 311 $totalWeight += (int) $arr[1]; 312 array_push($words, $arr); 313 } 314 } else { 315 echo '<script> console.log("Can’t generate filler text: wordlist file not found!"); </script>'; 316 return; 317 } 318 319 // If a TOC exists, use it for the headlines: 320 if(is_array($TOC)) { 321 $toc = $TOC; 322 } else { 323 $meta = p_get_metadata($ID, '', METADATA_RENDER_USING_CACHE); 324 //$tocok = (isset($meta['internal']['toc']) ? $meta['internal']['toc'] : $tocok = true); 325 $toc = isset($meta['description']['tableofcontents']) ? $meta['description']['tableofcontents'] : null; 326 } 327 if (!$toc) { // no TOC, generate my own: 328 $hlCount = mt_rand(0, (int) $conf['tocminheads']); 329 $toc = array(); 330 for ($i=0; $i<$hlCount; $i++) { 331 array_push($toc, $this->dadaMakeHeadline($words, $totalWeight)); // $toc 332 } 333 } 334 335 // if H1 heading is not in the TOC, add a chappeau section: 336 $chapeauCount = mt_rand(1, 3); 337 if ((int) $conf['toptoclevel'] > 1) { 338 echo "<div class=\"level1\">\n"; 339 for ($i=0; $i<$chapeauCount; $i++) { 340 echo $this->dadaMakeParagraph($words, $totalWeight); 341 } 342 echo "</div>\n"; 343 } 344 345 // text sections for each sub-headline: 346 foreach ($toc as $hl) { 347 echo $this->dadaMakeSection($words, $totalWeight, $hl); 348 } 349 } 350 351 private function dadaMakeSection($words, $totalWeight, $hl) { 352 353 global $conf; 354 355 // how many paragraphs? 356 $paragraphCount = mt_rand(1, 4); 357 358 // section level 359 $topTocLevel = (int) $conf['toptoclevel']; 360 $secLevel = $hl['level'] + 1;; 361 362 // return value: 363 $sec = ""; 364 365 // make a headline: 366 if ($topTocLevel > 1 || $secLevel > 1) { 367 $sec .= "<h{$secLevel} id=\"{$hl['hid']}\">{$hl['title']}</h{$secLevel}>\n"; 368 } 369 370 // add the paragraphs: 371 $sec .= "<div class=\"level{$secLevel}\">\n"; 372 for ($i=0; $i<$paragraphCount; $i++) { 373 $sec .= $this->dadaMakeParagraph($words, $totalWeight); 374 } 375 $sec .= "</div>\n"; 376 377 return $sec; 378 } 379 380 private function dadaMakeHeadline($words, $totalWeight) { 381 382 // how many words to generate? 383 $wordCount = mt_rand(2, 5); 384 385 // function returns an array: 386 $r = Array(); 387 388 // generate the headline: 389 $hlArr = array(); 390 for ($i=0; $i<$wordCount; $i++) { 391 array_push($hlArr, $this->dadaSelectRandomWord($words, $totalWeight)); 392 } 393 394 $r['title'] = ucfirst(implode(' ', $hlArr)); 395 396 $r['hid'] = preg_replace('/[^\w\d\-]+/i', '_', strtolower($r['title'])); 397 $r['type'] = 'ul'; // always ul! 398 $r['level'] = 1; // always level 1 for now 399 400 return $r; 401 } 402 403 private function dadaMakeParagraph($words, $totalWeight) { 404 405 // how many words to generate? 406 $sentenceCount = mt_rand(2, 5); 407 408 $paragraph = array(); 409 for ($i=0; $i<$sentenceCount; $i++) { 410 array_push($paragraph, $this->dadaMakeSentence($words, $totalWeight)); 411 } 412 413 return "<p>\n" . implode(' ', $paragraph) . "\n</p>\n"; 414 415 } 416 417 private function dadaMakeSentence($words, $totalWeight) { 418 419 // how many words to generate? 420 $wordCount = mt_rand(4, 20); 421 422 // generate the sentence: 423 $sentence = array(); 424 for ($i=0; $i<$wordCount; $i++) { 425 array_push($sentence, $this->dadaSelectRandomWord($words, $totalWeight)); 426 } 427 428 return ucfirst(implode(' ', $sentence)) . '.'; 429 430 } 431 432 private function dadaSelectRandomWord($list, $totalWeight) { 433 434 // get a random selection: 435 $rand = mt_rand(0, $totalWeight); 436 437 // match the selection to the weighted list: 438 $cumulativeWeight = 0; 439 for ($i=0; $i<count($list); $i++) { 440 $cumulativeWeight += $list[$i][1]; 441 if ($cumulativeWeight >= $rand) { 442 return $list[$i][0]; 443 } 444 } 445 return '***'; 446 } 447 448}