1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5/** 6 * Exception thrown when logging should be ignored 7 */ 8class IgnoreException extends \RuntimeException 9{ 10} 11 12use DeviceDetector\DeviceDetector; 13use DeviceDetector\Parser\Client\Browser; 14use DeviceDetector\Parser\Device\AbstractDeviceParser; 15use DeviceDetector\Parser\OperatingSystem; 16use dokuwiki\Input\Input; 17use dokuwiki\plugin\sqlite\SQLiteDB; 18use helper_plugin_popularity; 19use helper_plugin_statistics; 20 21class Logger 22{ 23 /** @var helper_plugin_statistics The statistics helper plugin instance */ 24 protected helper_plugin_statistics $hlp; 25 26 /** @var SQLiteDB The SQLite database instance */ 27 protected SQLiteDB $db; 28 29 /** @var string The full user agent string */ 30 protected string $uaAgent; 31 32 /** @var string The type of user agent (browser, robot, feedreader) */ 33 protected string $uaType = 'browser'; 34 35 /** @var string The browser/client name */ 36 protected string $uaName; 37 38 /** @var string The browser/client version */ 39 protected string $uaVersion; 40 41 /** @var string The operating system/platform */ 42 protected string $uaPlatform; 43 44 /** @var string|null The user name, if available */ 45 protected ?string $user = null; 46 47 /** @var string The unique user identifier */ 48 protected string $uid; 49 50 /** @var string The session identifier */ 51 protected string $session; 52 53 /** @var int|null The ID of the main access log entry if any */ 54 protected ?int $hit = null; 55 56 // region lifecycle 57 58 /** 59 * Constructor 60 * 61 * Parses browser info and set internal vars 62 */ 63 public function __construct(helper_plugin_statistics $hlp) 64 { 65 /** @var Input $INPUT */ 66 global $INPUT; 67 68 $this->hlp = $hlp; 69 $this->db = $this->hlp->getDB(); 70 71 // FIXME if we already have a session, we should not re-parse the user agent 72 73 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 74 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 75 $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers 76 $dd->discardBotInformation(); 77 $dd->parse(); 78 79 if ($dd->isFeedReader()) { 80 $this->uaType = 'feedreader'; 81 } elseif ($dd->isBot()) { 82 $this->uaType = 'robot'; 83 // for now ignore bots 84 throw new IgnoreException('Bot detected, not logging'); 85 } 86 87 $this->uaAgent = $ua; 88 $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown'; 89 $this->uaVersion = $dd->getClient('version') ?: '0'; 90 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown'; 91 $this->uid = $this->getUID(); 92 $this->session = $this->getSession(); 93 94 if (!$this->hlp->getConf('nousers')) { 95 $this->user = $INPUT->server->str('REMOTE_USER', null, true); 96 } 97 } 98 99 /** 100 * Should be called before logging 101 * 102 * This starts a transaction, so all logging is done in one go. It also logs the user and session data. 103 */ 104 public function begin(): void 105 { 106 $this->hlp->getDB()->getPdo()->beginTransaction(); 107 108 $this->logUser(); 109 $this->logGroups(); 110 $this->logDomain(); 111 $this->logSession(); 112 } 113 114 /** 115 * Should be called after logging 116 * 117 * This commits the transaction started in begin() 118 */ 119 public function end(): void 120 { 121 $this->hlp->getDB()->getPdo()->commit(); 122 } 123 124 // endregion 125 // region data gathering 126 127 /** 128 * Get the unique user ID 129 * 130 * The user ID is stored in the user preferences and should stay there forever. 131 * @return string The unique user identifier 132 */ 133 protected function getUID(): string 134 { 135 if (!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) { 136 // when there is no session UID set, we assume this was deliberate and we simply abort all logging 137 // @todo we may later make UID generation optional 138 throw new IgnoreException('No user ID found'); 139 } 140 141 return $_SESSION[DOKU_COOKIE]['statistics']['uid']; 142 } 143 144 /** 145 * Return the user's session ID 146 * 147 * @return string The session identifier 148 */ 149 protected function getSession(): string 150 { 151 if (!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) { 152 // when there is no session ID set, we assume this was deliberate and we simply abort all logging 153 throw new IgnoreException('No session ID found'); 154 } 155 156 return $_SESSION[DOKU_COOKIE]['statistics']['id']; 157 } 158 159 // endregion 160 // region automatic logging 161 162 /** 163 * Log the user was seen 164 */ 165 protected function logUser(): void 166 { 167 if (!$this->user) return; 168 169 $this->db->exec( 170 'INSERT INTO users (user, dt) 171 VALUES (?, CURRENT_TIMESTAMP) 172 ON CONFLICT (user) DO UPDATE SET 173 dt = CURRENT_TIMESTAMP 174 WHERE excluded.user = users.user 175 ', 176 $this->user 177 ); 178 179 } 180 181 /** 182 * Log the session and user agent information 183 */ 184 protected function logSession(): void 185 { 186 $this->db->exec( 187 'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os) 188 VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?) 189 ON CONFLICT (session) DO UPDATE SET 190 end = CURRENT_TIMESTAMP, 191 user = excluded.user, 192 uid = excluded.uid 193 WHERE excluded.session = sessions.session 194 ', 195 $this->session, 196 $this->uid, 197 $this->user, 198 $this->uaAgent, 199 $this->uaName, 200 $this->uaType, 201 $this->uaVersion, 202 $this->uaPlatform 203 ); 204 } 205 206 /** 207 * Log all groups for the user 208 * 209 * @todo maybe this should be done only once per session? 210 */ 211 protected function logGroups(): void 212 { 213 global $USERINFO; 214 215 if (!$this->user) return; 216 if (!isset($USERINFO['grps'])) return; 217 if (!is_array($USERINFO['grps'])) return; 218 $groups = $USERINFO['grps']; 219 220 $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user); 221 222 if (empty($groups)) { 223 return; 224 } 225 226 $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)')); 227 $params = []; 228 $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders"; 229 foreach ($groups as $group) { 230 $params[] = $this->user; 231 $params[] = $group; 232 } 233 $this->db->exec($sql, $params); 234 } 235 236 /** 237 * Log email domain 238 * 239 * @todo maybe this should be done only once per session? 240 */ 241 protected function logDomain(): void 242 { 243 global $USERINFO; 244 if (!$this->user) return; 245 if (!isset($USERINFO['mail'])) return; 246 $mail = $USERINFO['mail']; 247 248 $pos = strrpos($mail, '@'); 249 if (!$pos) return; 250 $domain = substr($mail, $pos + 1); 251 if (empty($domain)) return; 252 253 $sql = 'UPDATE users SET domain = ? WHERE user = ?'; 254 $this->db->exec($sql, [$domain, $this->user]); 255 } 256 257 // endregion 258 // region internal loggers called by the dispatchers 259 260 /** 261 * Log the given referer URL 262 * 263 * Note: we DO log empty referers. These are external accesses that did not provide a referer URL. 264 * We do not log referers that are our own pages though. 265 * 266 * engine set -> a search engine referer 267 * no engine set, url empty -> a direct access (bookmark, direct link, etc.) 268 * no engine set, url not empty -> a referer from another page (not a wiki page) 269 * null returned -> referer was a wiki page 270 * 271 * @param $referer 272 * @return int|null The referer ID or null if no referer was logged 273 * @todo we could check against a blacklist here 274 */ 275 public function logReferer($referer): ?int 276 { 277 $referer = trim($referer); 278 279 // do not log our own pages as referers (empty referer is OK though) 280 if (!empty($referer)) { 281 $selfre = '^' . preg_quote(DOKU_URL, '/'); 282 if (preg_match("/$selfre/", $referer)) { 283 return null; 284 } 285 } 286 287 // is it a search engine? 288 $se = new SearchEngines($referer); 289 $engine = $se->getEngine(); 290 291 $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)'; 292 $this->db->exec($sql, [$referer, $engine]); 293 return (int)$this->db->queryValue('SELECT id FROM referers WHERE url = ?', $referer); 294 } 295 296 /** 297 * Resolve IP to country/city and store in database 298 * 299 * @return string The IP address as stored 300 */ 301 public function logIp(): string 302 { 303 $ip = clientIP(true); 304 305 // anonymize the IP address for storage? 306 if ($this->hlp->getConf('anonips')) { 307 $hash = md5($ip . strrev($ip)); // we use the reversed IP as salt to avoid common rainbow tables 308 $host = ''; 309 } else { 310 $hash = $ip; 311 $host = gethostbyaddr($ip); 312 } 313 314 if ($this->hlp->getConf('nolocation')) { 315 // if we don't resolve location data, we just return the IP address 316 return $hash; 317 } 318 319 // check if IP already known and up-to-date 320 $result = $this->db->queryValue( 321 "SELECT ip 322 FROM iplocation 323 WHERE ip = ? 324 AND lastupd > date('now', '-30 days')", 325 $hash 326 ); 327 if ($result) return $hash; // already known and up-to-date 328 329 330 // resolve the IP address to location data 331 try { 332 $data = $this->hlp->resolveIP($ip); 333 } catch (IpResolverException $e) { 334 \dokuwiki\Logger::error('Statistics Plugin: ' . $e->getMessage(), $e->details); 335 $data = []; 336 } 337 338 $this->db->exec( 339 'INSERT OR REPLACE INTO iplocation ( 340 ip, country, code, city, host, lastupd 341 ) VALUES ( 342 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 343 )', 344 $hash, 345 $data['country'] ?? '', 346 $data['countryCode'] ?? '', 347 $data['city'] ?? '', 348 $host 349 ); 350 351 return $hash; 352 } 353 354 // endregion 355 // region log dispatchers 356 357 public function logPageView(): void 358 { 359 global $INPUT; 360 361 if (!$INPUT->str('p')) return; 362 363 364 $referer = $INPUT->filter('trim')->str('r'); 365 $ip = $this->logIp(); // resolve the IP address 366 367 $data = [ 368 'page' => $INPUT->filter('cleanID')->str('p'), 369 'ip' => $ip, 370 'ref_id' => $this->logReferer($referer), 371 'sx' => $INPUT->int('sx'), 372 'sy' => $INPUT->int('sy'), 373 'vx' => $INPUT->int('vx'), 374 'vy' => $INPUT->int('vy'), 375 'session' => $this->session, 376 ]; 377 378 $this->db->exec(' 379 INSERT INTO pageviews ( 380 dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session 381 ) VALUES ( 382 CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session 383 ) 384 ', 385 $data 386 ); 387 } 388 389 /** 390 * Log a click on an external link 391 * 392 * Called from log.php 393 */ 394 public function logOutgoing(): void 395 { 396 global $INPUT; 397 398 if (!$INPUT->str('ol')) return; 399 400 $link = $INPUT->filter('trim')->str('ol'); 401 $session = $this->session; 402 $page = $INPUT->filter('cleanID')->str('p'); 403 404 $this->db->exec( 405 'INSERT INTO outlinks ( 406 dt, session, page, link 407 ) VALUES ( 408 CURRENT_TIMESTAMP, ?, ?, ? 409 )', 410 $session, 411 $page, 412 $link 413 ); 414 } 415 416 /** 417 * Log access to a media file 418 * 419 * Called from action.php 420 * 421 * @param string $media The media ID 422 * @param string $mime The media's mime type 423 * @param bool $inline Is this displayed inline? 424 * @param int $size Size of the media file 425 */ 426 public function logMedia(string $media, string $mime, bool $inline, int $size): void 427 { 428 [$mime1, $mime2] = explode('/', strtolower($mime)); 429 $inline = $inline ? 1 : 0; 430 431 432 $data = [ 433 'media' => cleanID($media), 434 'ip' => $this->logIp(), // resolve the IP address 435 'session' => $this->session, 436 'size' => $size, 437 'mime1' => $mime1, 438 'mime2' => $mime2, 439 'inline' => $inline, 440 ]; 441 442 $this->db->exec(' 443 INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline ) 444 VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline) 445 ', 446 $data 447 ); 448 } 449 450 /** 451 * Log page edits 452 * 453 * called from action.php 454 * 455 * @param string $page The page that was edited 456 * @param string $type The type of edit (create, edit, etc.) 457 */ 458 public function logEdit(string $page, string $type): void 459 { 460 $data = [ 461 'page' => cleanID($page), 462 'type' => $type, 463 'ip' => $this->logIp(), // resolve the IP address 464 'session' => $this->session 465 ]; 466 467 $this->db->exec( 468 'INSERT INTO edits ( 469 dt, page, type, ip, session 470 ) VALUES ( 471 CURRENT_TIMESTAMP, :page, :type, :ip, :session 472 )', 473 $data 474 ); 475 } 476 477 /** 478 * Log login/logoffs and user creations 479 * 480 * @param string $type The type of login event (login, logout, create, failed) 481 * @param string $user The username 482 */ 483 public function logLogin(string $type, string $user = ''): void 484 { 485 global $INPUT; 486 487 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 488 489 $ip = clientIP(true); 490 491 $this->db->exec( 492 'INSERT INTO logins ( 493 dt, ip, user, type 494 ) VALUES ( 495 CURRENT_TIMESTAMP, ?, ?, ? 496 )', 497 $ip, 498 $user, 499 $type 500 ); 501 } 502 503 /** 504 * Log search data to the search related tables 505 * 506 * @param string $query The search query 507 * @param string[] $words The query split into words 508 */ 509 public function logSearch(string $query, array $words): void 510 { 511 if (!$query) return; 512 513 $sid = $this->db->exec( 514 'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)', 515 $this->logIp(), // resolve the IP address 516 $this->session, 517 $query, 518 ); 519 520 foreach ($words as $word) { 521 if (!$word) continue; 522 $this->db->exec( 523 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 524 $sid, 525 $word 526 ); 527 } 528 } 529 530 /** 531 * Log the current page count and size as today's history entry 532 */ 533 public function logHistoryPages(): void 534 { 535 global $conf; 536 537 // use the popularity plugin's search method to find the wanted data 538 /** @var helper_plugin_popularity $pop */ 539 $pop = plugin_load('helper', 'popularity'); 540 $list = $this->initEmptySearchList(); 541 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 542 $page_count = $list['file_count']; 543 $page_size = $list['file_size']; 544 545 $this->db->exec( 546 'INSERT OR REPLACE INTO history ( 547 info, value, dt 548 ) VALUES ( 549 ?, ?, CURRENT_TIMESTAMP 550 )', 551 'page_count', 552 $page_count 553 ); 554 $this->db->exec( 555 'INSERT OR REPLACE INTO history ( 556 info, value, dt 557 ) VALUES ( 558 ?, ?, CURRENT_TIMESTAMP 559 )', 560 'page_size', 561 $page_size 562 ); 563 } 564 565 /** 566 * Log the current media count and size as today's history entry 567 */ 568 public function logHistoryMedia(): void 569 { 570 global $conf; 571 572 // use the popularity plugin's search method to find the wanted data 573 /** @var helper_plugin_popularity $pop */ 574 $pop = plugin_load('helper', 'popularity'); 575 $list = $this->initEmptySearchList(); 576 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 577 $media_count = $list['file_count']; 578 $media_size = $list['file_size']; 579 580 $this->db->exec( 581 'INSERT OR REPLACE INTO history ( 582 info, value, dt 583 ) VALUES ( 584 ?, ?, CURRENT_TIMESTAMP 585 )', 586 'media_count', 587 $media_count 588 ); 589 $this->db->exec( 590 'INSERT OR REPLACE INTO history ( 591 info, value, dt 592 ) VALUES ( 593 ?, ?, CURRENT_TIMESTAMP 594 )', 595 'media_size', 596 $media_size 597 ); 598 } 599 600 // endregion 601 602 /** 603 * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public 604 * @return array 605 */ 606 protected function initEmptySearchList() 607 { 608 return array_fill_keys([ 609 'file_count', 610 'file_size', 611 'file_max', 612 'file_min', 613 'dir_count', 614 'dir_nest', 615 'file_oldest' 616 ], 0); 617 } 618} 619