1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5/** 6 * Exception thrown when logging should be ignored 7 */ 8class IgnoreException extends \RuntimeException 9{ 10} 11 12use DeviceDetector\DeviceDetector; 13use DeviceDetector\Parser\Client\Browser; 14use DeviceDetector\Parser\Device\AbstractDeviceParser; 15use DeviceDetector\Parser\OperatingSystem; 16use dokuwiki\HTTP\DokuHTTPClient; 17use dokuwiki\Input\Input; 18use dokuwiki\plugin\sqlite\SQLiteDB; 19use helper_plugin_popularity; 20use helper_plugin_statistics; 21 22class Logger 23{ 24 /** @var helper_plugin_statistics The statistics helper plugin instance */ 25 protected helper_plugin_statistics $hlp; 26 27 /** @var SQLiteDB The SQLite database instance */ 28 protected SQLiteDB $db; 29 30 /** @var string The full user agent string */ 31 protected string $uaAgent; 32 33 /** @var string The type of user agent (browser, robot, feedreader) */ 34 protected string $uaType = 'browser'; 35 36 /** @var string The browser/client name */ 37 protected string $uaName; 38 39 /** @var string The browser/client version */ 40 protected string $uaVersion; 41 42 /** @var string The operating system/platform */ 43 protected string $uaPlatform; 44 45 /** @var string|null The user name, if available */ 46 protected ?string $user = null; 47 48 /** @var string The unique user identifier */ 49 protected string $uid; 50 51 /** @var string The session identifier */ 52 protected string $session; 53 54 /** @var int|null The ID of the main access log entry if any */ 55 protected ?int $hit = null; 56 57 // region lifecycle 58 59 /** 60 * Constructor 61 * 62 * Parses browser info and set internal vars 63 */ 64 public function __construct(helper_plugin_statistics $hlp) 65 { 66 /** @var Input $INPUT */ 67 global $INPUT; 68 69 $this->hlp = $hlp; 70 $this->db = $this->hlp->getDB(); 71 72 // FIXME if we already have a session, we should not re-parse the user agent 73 74 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 75 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 76 $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers 77 $dd->discardBotInformation(); 78 $dd->parse(); 79 80 if ($dd->isFeedReader()) { 81 $this->uaType = 'feedreader'; 82 } elseif ($dd->isBot()) { 83 $this->uaType = 'robot'; 84 // for now ignore bots 85 throw new IgnoreException('Bot detected, not logging'); 86 } 87 88 $this->uaAgent = $ua; 89 $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown'; 90 $this->uaVersion = $dd->getClient('version') ?: '0'; 91 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown'; 92 $this->uid = $this->getUID(); 93 $this->session = $this->getSession(); 94 $this->user = $INPUT->server->str('REMOTE_USER', null, true); 95 } 96 97 /** 98 * Should be called before logging 99 * 100 * This starts a transaction, so all logging is done in one go. It also logs the user and session data. 101 */ 102 public function begin(): void 103 { 104 $this->hlp->getDB()->getPdo()->beginTransaction(); 105 106 $this->logUser(); 107 $this->logGroups(); 108 $this->logDomain(); 109 $this->logSession(); 110 } 111 112 /** 113 * Should be called after logging 114 * 115 * This commits the transaction started in begin() 116 */ 117 public function end(): void 118 { 119 $this->hlp->getDB()->getPdo()->commit(); 120 } 121 122 // endregion 123 // region data gathering 124 125 /** 126 * Get the unique user ID 127 * 128 * The user ID is stored in the user preferences and should stay there forever. 129 * @return string The unique user identifier 130 */ 131 protected function getUID(): string 132 { 133 if (!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) { 134 // when there is no session UID set, we assume this was deliberate and we simply abort all logging 135 // @todo we may later make UID generation optional 136 throw new IgnoreException('No user ID found'); 137 } 138 139 return $_SESSION[DOKU_COOKIE]['statistics']['uid']; 140 } 141 142 /** 143 * Return the user's session ID 144 * 145 * @return string The session identifier 146 */ 147 protected function getSession(): string 148 { 149 if (!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) { 150 // when there is no session ID set, we assume this was deliberate and we simply abort all logging 151 throw new IgnoreException('No session ID found'); 152 } 153 154 return $_SESSION[DOKU_COOKIE]['statistics']['id']; 155 } 156 157 // endregion 158 // region automatic logging 159 160 /** 161 * Log the user was seen 162 */ 163 protected function logUser(): void 164 { 165 if (!$this->user) return; 166 167 $this->db->exec( 168 'INSERT INTO users (user, dt) 169 VALUES (?, CURRENT_TIMESTAMP) 170 ON CONFLICT (user) DO UPDATE SET 171 dt = CURRENT_TIMESTAMP 172 WHERE excluded.user = users.user 173 ', 174 $this->user 175 ); 176 177 } 178 179 /** 180 * Log the session and user agent information 181 */ 182 protected function logSession(): void 183 { 184 $this->db->exec( 185 'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os) 186 VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?) 187 ON CONFLICT (session) DO UPDATE SET 188 end = CURRENT_TIMESTAMP, 189 user = excluded.user, 190 uid = excluded.uid 191 WHERE excluded.session = sessions.session 192 ', 193 $this->session, 194 $this->uid, 195 $this->user, 196 $this->uaAgent, 197 $this->uaName, 198 $this->uaType, 199 $this->uaVersion, 200 $this->uaPlatform 201 ); 202 } 203 204 /** 205 * Log all groups for the user 206 * 207 * @todo maybe this should be done only once per session? 208 */ 209 protected function logGroups(): void 210 { 211 global $USERINFO; 212 213 if (!$this->user) return; 214 if (!isset($USERINFO['grps'])) return; 215 if (!is_array($USERINFO['grps'])) return; 216 $groups = $USERINFO['grps']; 217 218 $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user); 219 220 if (empty($groups)) { 221 return; 222 } 223 224 $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)')); 225 $params = []; 226 $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders"; 227 foreach ($groups as $group) { 228 $params[] = $this->user; 229 $params[] = $group; 230 } 231 $this->db->exec($sql, $params); 232 } 233 234 /** 235 * Log email domain 236 * 237 * @todo maybe this should be done only once per session? 238 */ 239 protected function logDomain(): void 240 { 241 global $USERINFO; 242 if (!$this->user) return; 243 if (!isset($USERINFO['mail'])) return; 244 $mail = $USERINFO['mail']; 245 246 $pos = strrpos($mail, '@'); 247 if (!$pos) return; 248 $domain = substr($mail, $pos + 1); 249 if (empty($domain)) return; 250 251 $sql = 'UPDATE users SET domain = ? WHERE user = ?'; 252 $this->db->exec($sql, [$domain, $this->user]); 253 } 254 255 // endregion 256 // region internal loggers called by the dispatchers 257 258 /** 259 * Log the given referer URL 260 * 261 * Note: we DO log empty referers. These are external accesses that did not provide a referer URL. 262 * We do not log referers that are our own pages though. 263 * 264 * engine set -> a search engine referer 265 * no engine set, url empty -> a direct access (bookmark, direct link, etc.) 266 * no engine set, url not empty -> a referer from another page (not a wiki page) 267 * null returned -> referer was a wiki page 268 * 269 * @param $referer 270 * @return int|null The referer ID or null if no referer was logged 271 * @todo we could check against a blacklist here 272 */ 273 public function logReferer($referer): ?int 274 { 275 $referer = trim($referer); 276 277 // do not log our own pages as referers (empty referer is OK though) 278 if (!empty($referer)) { 279 $selfre = '^' . preg_quote(DOKU_URL, '/'); 280 if (preg_match("/$selfre/", $referer)) { 281 return null; 282 } 283 } 284 285 // is it a search engine? 286 $se = new SearchEngines($referer); 287 $engine = $se->getEngine(); 288 289 $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)'; 290 $this->db->exec($sql, [$referer, $engine]); 291 return (int)$this->db->queryValue('SELECT id FROM referers WHERE url = ?', $referer); 292 } 293 294 /** 295 * Resolve IP to country/city and store in database 296 * 297 * @return string The IP address as stored 298 */ 299 public function logIp(): string 300 { 301 $ip = clientIP(true); 302 303 // anonymize the IP address for storage? 304 if ($this->hlp->getConf('anonips')) { 305 $hash = md5($ip . strrev($ip)); // we use the reversed IP as salt to avoid common rainbow tables 306 $host = ''; 307 } else { 308 $hash = $ip; 309 $host = gethostbyaddr($ip); 310 } 311 312 if($this->hlp->getConf('nolocation')) { 313 // if we don't resolve location data, we just return the IP address 314 return $hash; 315 } 316 317 // check if IP already known and up-to-date 318 $result = $this->db->queryValue( 319 "SELECT ip 320 FROM iplocation 321 WHERE ip = ? 322 AND lastupd > date('now', '-30 days')", 323 $hash 324 ); 325 if ($result) return $hash; // already known and up-to-date 326 327 328 // resolve the IP address to location data 329 try { 330 $data = $this->hlp->resolveIP($ip); 331 } catch (IpResolverException $e) { 332 \dokuwiki\Logger::error('Statistics Plugin: ' . $e->getMessage(), $e->details); 333 $data = []; 334 } 335 336 $this->db->exec( 337 'INSERT OR REPLACE INTO iplocation ( 338 ip, country, code, city, host, lastupd 339 ) VALUES ( 340 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 341 )', 342 $hash, 343 $data['country'] ?? '', 344 $data['countryCode'] ?? '', 345 $data['city'] ?? '', 346 $host 347 ); 348 349 return $hash; 350 } 351 352 // endregion 353 // region log dispatchers 354 355 public function logPageView(): void 356 { 357 global $INPUT; 358 359 if (!$INPUT->str('p')) return; 360 361 362 $referer = $INPUT->filter('trim')->str('r'); 363 $ip = $this->logIp(); // resolve the IP address 364 365 $data = [ 366 'page' => $INPUT->filter('cleanID')->str('p'), 367 'ip' => $ip, 368 'ref_id' => $this->logReferer($referer), 369 'sx' => $INPUT->int('sx'), 370 'sy' => $INPUT->int('sy'), 371 'vx' => $INPUT->int('vx'), 372 'vy' => $INPUT->int('vy'), 373 'session' => $this->session, 374 ]; 375 376 $this->db->exec(' 377 INSERT INTO pageviews ( 378 dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session 379 ) VALUES ( 380 CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session 381 ) 382 ', 383 $data 384 ); 385 } 386 387 /** 388 * Log a click on an external link 389 * 390 * Called from log.php 391 */ 392 public function logOutgoing(): void 393 { 394 global $INPUT; 395 396 if (!$INPUT->str('ol')) return; 397 398 $link = $INPUT->filter('trim')->str('ol'); 399 $session = $this->session; 400 $page = $INPUT->filter('cleanID')->str('p'); 401 402 $this->db->exec( 403 'INSERT INTO outlinks ( 404 dt, session, page, link 405 ) VALUES ( 406 CURRENT_TIMESTAMP, ?, ?, ? 407 )', 408 $session, 409 $page, 410 $link 411 ); 412 } 413 414 /** 415 * Log access to a media file 416 * 417 * Called from action.php 418 * 419 * @param string $media The media ID 420 * @param string $mime The media's mime type 421 * @param bool $inline Is this displayed inline? 422 * @param int $size Size of the media file 423 */ 424 public function logMedia(string $media, string $mime, bool $inline, int $size): void 425 { 426 [$mime1, $mime2] = explode('/', strtolower($mime)); 427 $inline = $inline ? 1 : 0; 428 429 430 $data = [ 431 'media' => cleanID($media), 432 'ip' => $this->logIp(), // resolve the IP address 433 'session' => $this->session, 434 'size' => $size, 435 'mime1' => $mime1, 436 'mime2' => $mime2, 437 'inline' => $inline, 438 ]; 439 440 $this->db->exec(' 441 INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline ) 442 VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline) 443 ', 444 $data 445 ); 446 } 447 448 /** 449 * Log page edits 450 * 451 * called from action.php 452 * 453 * @param string $page The page that was edited 454 * @param string $type The type of edit (create, edit, etc.) 455 */ 456 public function logEdit(string $page, string $type): void 457 { 458 $data = [ 459 'page' => cleanID($page), 460 'type' => $type, 461 'ip' => $this->logIp(), // resolve the IP address 462 'session' => $this->session 463 ]; 464 465 $this->db->exec( 466 'INSERT INTO edits ( 467 dt, page, type, ip, session 468 ) VALUES ( 469 CURRENT_TIMESTAMP, :page, :type, :ip, :session 470 )', 471 $data 472 ); 473 } 474 475 /** 476 * Log login/logoffs and user creations 477 * 478 * @param string $type The type of login event (login, logout, create, failed) 479 * @param string $user The username 480 */ 481 public function logLogin(string $type, string $user = ''): void 482 { 483 global $INPUT; 484 485 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 486 487 $ip = clientIP(true); 488 489 $this->db->exec( 490 'INSERT INTO logins ( 491 dt, ip, user, type 492 ) VALUES ( 493 CURRENT_TIMESTAMP, ?, ?, ? 494 )', 495 $ip, 496 $user, 497 $type 498 ); 499 } 500 501 /** 502 * Log search data to the search related tables 503 * 504 * @param string $query The search query 505 * @param string[] $words The query split into words 506 */ 507 public function logSearch(string $query, array $words): void 508 { 509 if (!$query) return; 510 511 $sid = $this->db->exec( 512 'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)', 513 $this->logIp(), // resolve the IP address 514 $this->session, 515 $query, 516 ); 517 518 foreach ($words as $word) { 519 if (!$word) continue; 520 $this->db->exec( 521 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 522 $sid, 523 $word 524 ); 525 } 526 } 527 528 /** 529 * Log the current page count and size as today's history entry 530 */ 531 public function logHistoryPages(): void 532 { 533 global $conf; 534 535 // use the popularity plugin's search method to find the wanted data 536 /** @var helper_plugin_popularity $pop */ 537 $pop = plugin_load('helper', 'popularity'); 538 $list = $this->initEmptySearchList(); 539 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 540 $page_count = $list['file_count']; 541 $page_size = $list['file_size']; 542 543 $this->db->exec( 544 'INSERT OR REPLACE INTO history ( 545 info, value, dt 546 ) VALUES ( 547 ?, ?, CURRENT_TIMESTAMP 548 )', 549 'page_count', 550 $page_count 551 ); 552 $this->db->exec( 553 'INSERT OR REPLACE INTO history ( 554 info, value, dt 555 ) VALUES ( 556 ?, ?, CURRENT_TIMESTAMP 557 )', 558 'page_size', 559 $page_size 560 ); 561 } 562 563 /** 564 * Log the current media count and size as today's history entry 565 */ 566 public function logHistoryMedia(): void 567 { 568 global $conf; 569 570 // use the popularity plugin's search method to find the wanted data 571 /** @var helper_plugin_popularity $pop */ 572 $pop = plugin_load('helper', 'popularity'); 573 $list = $this->initEmptySearchList(); 574 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 575 $media_count = $list['file_count']; 576 $media_size = $list['file_size']; 577 578 $this->db->exec( 579 'INSERT OR REPLACE INTO history ( 580 info, value, dt 581 ) VALUES ( 582 ?, ?, CURRENT_TIMESTAMP 583 )', 584 'media_count', 585 $media_count 586 ); 587 $this->db->exec( 588 'INSERT OR REPLACE INTO history ( 589 info, value, dt 590 ) VALUES ( 591 ?, ?, CURRENT_TIMESTAMP 592 )', 593 'media_size', 594 $media_size 595 ); 596 } 597 598 // endregion 599 600 /** 601 * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public 602 * @return array 603 */ 604 protected function initEmptySearchList() 605 { 606 return array_fill_keys([ 607 'file_count', 608 'file_size', 609 'file_max', 610 'file_min', 611 'dir_count', 612 'dir_nest', 613 'file_oldest' 614 ], 0); 615 } 616} 617