1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5/** 6 * Exception thrown when logging should be ignored 7 */ 8class IgnoreException extends \RuntimeException 9{ 10} 11 12use DeviceDetector\DeviceDetector; 13use DeviceDetector\Parser\Client\Browser; 14use DeviceDetector\Parser\Device\AbstractDeviceParser; 15use DeviceDetector\Parser\OperatingSystem; 16use dokuwiki\HTTP\DokuHTTPClient; 17use dokuwiki\Input\Input; 18use dokuwiki\plugin\sqlite\SQLiteDB; 19use helper_plugin_popularity; 20use helper_plugin_statistics; 21 22class Logger 23{ 24 /** @var helper_plugin_statistics The statistics helper plugin instance */ 25 protected helper_plugin_statistics $hlp; 26 27 /** @var SQLiteDB The SQLite database instance */ 28 protected SQLiteDB $db; 29 30 /** @var string The full user agent string */ 31 protected string $uaAgent; 32 33 /** @var string The type of user agent (browser, robot, feedreader) */ 34 protected string $uaType = 'browser'; 35 36 /** @var string The browser/client name */ 37 protected string $uaName; 38 39 /** @var string The browser/client version */ 40 protected string $uaVersion; 41 42 /** @var string The operating system/platform */ 43 protected string $uaPlatform; 44 45 /** @var string|null The user name, if available */ 46 protected ?string $user = null; 47 48 /** @var string The unique user identifier */ 49 protected string $uid; 50 51 /** @var string The session identifier */ 52 protected string $session; 53 54 /** @var int|null The ID of the main access log entry if any */ 55 protected ?int $hit = null; 56 57 /** @var DokuHTTPClient|null The HTTP client instance for testing */ 58 protected ?DokuHTTPClient $httpClient = null; 59 60 // region lifecycle 61 62 /** 63 * Constructor 64 * 65 * Parses browser info and set internal vars 66 */ 67 public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null) 68 { 69 /** @var Input $INPUT */ 70 global $INPUT; 71 72 $this->hlp = $hlp; 73 $this->db = $this->hlp->getDB(); 74 $this->httpClient = $httpClient; 75 76 // FIXME if we already have a session, we should not re-parse the user agent 77 78 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 79 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 80 $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers 81 $dd->discardBotInformation(); 82 $dd->parse(); 83 84 if ($dd->isFeedReader()) { 85 $this->uaType = 'feedreader'; 86 } elseif ($dd->isBot()) { 87 $this->uaType = 'robot'; 88 // for now ignore bots 89 throw new IgnoreException('Bot detected, not logging'); 90 } 91 92 $this->uaAgent = $ua; 93 $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown'; 94 $this->uaVersion = $dd->getClient('version') ?: '0'; 95 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown'; 96 $this->uid = $this->getUID(); 97 $this->session = $this->getSession(); 98 $this->user = $INPUT->server->str('REMOTE_USER', null, true); 99 } 100 101 /** 102 * Should be called before logging 103 * 104 * This starts a transaction, so all logging is done in one go. It also logs the user and session data. 105 */ 106 public function begin(): void 107 { 108 $this->hlp->getDB()->getPdo()->beginTransaction(); 109 110 $this->logUser(); 111 $this->logGroups(); 112 $this->logDomain(); 113 $this->logSession(); 114 } 115 116 /** 117 * Should be called after logging 118 * 119 * This commits the transaction started in begin() 120 */ 121 public function end(): void 122 { 123 $this->hlp->getDB()->getPdo()->commit(); 124 } 125 126 // endregion 127 // region data gathering 128 129 /** 130 * Get the unique user ID 131 * 132 * The user ID is stored in the user preferences and should stay there forever. 133 * @return string The unique user identifier 134 */ 135 protected function getUID(): string 136 { 137 if (!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) { 138 // when there is no session UID set, we assume this was deliberate and we simply abort all logging 139 // @todo we may later make UID generation optional 140 throw new IgnoreException('No user ID found'); 141 } 142 143 return $_SESSION[DOKU_COOKIE]['statistics']['uid']; 144 } 145 146 /** 147 * Return the user's session ID 148 * 149 * @return string The session identifier 150 */ 151 protected function getSession(): string 152 { 153 if (!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) { 154 // when there is no session ID set, we assume this was deliberate and we simply abort all logging 155 throw new IgnoreException('No session ID found'); 156 } 157 158 return $_SESSION[DOKU_COOKIE]['statistics']['id']; 159 } 160 161 // endregion 162 // region automatic logging 163 164 /** 165 * Log the user was seen 166 */ 167 protected function logUser(): void 168 { 169 if (!$this->user) return; 170 171 $this->db->exec( 172 'INSERT INTO users (user, dt) 173 VALUES (?, CURRENT_TIMESTAMP) 174 ON CONFLICT (user) DO UPDATE SET 175 dt = CURRENT_TIMESTAMP 176 WHERE excluded.user = users.user 177 ', 178 $this->user 179 ); 180 181 } 182 183 /** 184 * Log the session and user agent information 185 */ 186 protected function logSession(): void 187 { 188 $this->db->exec( 189 'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os) 190 VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?) 191 ON CONFLICT (session) DO UPDATE SET 192 end = CURRENT_TIMESTAMP, 193 user = excluded.user, 194 uid = excluded.uid 195 WHERE excluded.session = sessions.session 196 ', 197 $this->session, 198 $this->uid, 199 $this->user, 200 $this->uaAgent, 201 $this->uaName, 202 $this->uaType, 203 $this->uaVersion, 204 $this->uaPlatform 205 ); 206 } 207 208 /** 209 * Log all groups for the user 210 * 211 * @todo maybe this should be done only once per session? 212 */ 213 protected function logGroups(): void 214 { 215 global $USERINFO; 216 217 if (!$this->user) return; 218 if (!isset($USERINFO['grps'])) return; 219 if (!is_array($USERINFO['grps'])) return; 220 $groups = $USERINFO['grps']; 221 222 $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user); 223 224 if (empty($groups)) { 225 return; 226 } 227 228 $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)')); 229 $params = []; 230 $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders"; 231 foreach ($groups as $group) { 232 $params[] = $this->user; 233 $params[] = $group; 234 } 235 $this->db->exec($sql, $params); 236 } 237 238 /** 239 * Log email domain 240 * 241 * @todo maybe this should be done only once per session? 242 */ 243 protected function logDomain(): void 244 { 245 global $USERINFO; 246 if (!$this->user) return; 247 if (!isset($USERINFO['mail'])) return; 248 $mail = $USERINFO['mail']; 249 250 $pos = strrpos($mail, '@'); 251 if (!$pos) return; 252 $domain = substr($mail, $pos + 1); 253 if (empty($domain)) return; 254 255 $sql = 'UPDATE users SET domain = ? WHERE user = ?'; 256 $this->db->exec($sql, [$domain, $this->user]); 257 } 258 259 // endregion 260 // region internal loggers called by the dispatchers 261 262 /** 263 * Log the given referer URL 264 * 265 * Note: we DO log empty referers. These are external accesses that did not provide a referer URL. 266 * We do not log referers that are our own pages though. 267 * 268 * engine set -> a search engine referer 269 * no engine set, url empty -> a direct access (bookmark, direct link, etc.) 270 * no engine set, url not empty -> a referer from another page (not a wiki page) 271 * null returned -> referer was a wiki page 272 * 273 * @param $referer 274 * @return int|null The referer ID or null if no referer was logged 275 * @todo we could check against a blacklist here 276 */ 277 public function logReferer($referer): ?int 278 { 279 $referer = trim($referer); 280 281 // do not log our own pages as referers (empty referer is OK though) 282 if (!empty($referer)) { 283 $selfre = '^' . preg_quote(DOKU_URL, '/'); 284 if (preg_match("/$selfre/", $referer)) { 285 return null; 286 } 287 } 288 289 // is it a search engine? 290 $se = new SearchEngines($referer); 291 $engine = $se->getEngine(); 292 293 $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)'; 294 $this->db->exec($sql, [$referer, $engine]); 295 return (int)$this->db->queryValue('SELECT id FROM referers WHERE url = ?', $referer); 296 } 297 298 /** 299 * Resolve IP to country/city and store in database 300 * 301 * @return string The IP address as stored 302 */ 303 public function logIp(): string 304 { 305 $ip = clientIP(true); 306 307 // anonymize the IP address for storage? 308 if ($this->hlp->getConf('anonips')) { 309 $hash = md5($ip . strrev($ip)); // we use the reversed IP as salt to avoid common rainbow tables 310 $host = ''; 311 } else { 312 $hash = $ip; 313 $host = gethostbyaddr($ip); 314 } 315 316 // check if IP already known and up-to-date 317 $result = $this->db->queryValue( 318 "SELECT ip 319 FROM iplocation 320 WHERE ip = ? 321 AND lastupd > date('now', '-30 days')", 322 $hash 323 ); 324 if ($result) return $hash; // already known and up-to-date 325 326 $http = $this->httpClient ?: new DokuHTTPClient(); 327 $http->timeout = 7; 328 $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only 329 330 if (!$json) { 331 \dokuwiki\Logger::error('Statistics Plugin - Failed talk to ip-api.com.'); 332 return $hash; 333 } 334 try { 335 $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR); 336 } catch (\JsonException $e) { 337 \dokuwiki\Logger::error('Statistics Plugin - Failed to decode JSON from ip-api.com.', $e); 338 return $hash; 339 } 340 if (!isset($data['status'])) { 341 \dokuwiki\Logger::error('Statistics Plugin - Invalid ip-api.com result' . $ip, $data); 342 return $hash; 343 } 344 345 // we do not check for 'success' status here. when the API can't resolve the IP we still log it 346 // without location data, so we won't re-query it in the next 30 days. 347 348 $this->db->exec( 349 'INSERT OR REPLACE INTO iplocation ( 350 ip, country, code, city, host, lastupd 351 ) VALUES ( 352 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 353 )', 354 $hash, 355 $data['country'] ?? '', 356 $data['countryCode'] ?? '', 357 $data['city'] ?? '', 358 $host 359 ); 360 361 return $hash; 362 } 363 364 // endregion 365 // region log dispatchers 366 367 public function logPageView(): void 368 { 369 global $INPUT; 370 371 if (!$INPUT->str('p')) return; 372 373 374 $referer = $INPUT->filter('trim')->str('r'); 375 $ip = $this->logIp(); // resolve the IP address 376 377 $data = [ 378 'page' => $INPUT->filter('cleanID')->str('p'), 379 'ip' => $ip, 380 'ref_id' => $this->logReferer($referer), 381 'sx' => $INPUT->int('sx'), 382 'sy' => $INPUT->int('sy'), 383 'vx' => $INPUT->int('vx'), 384 'vy' => $INPUT->int('vy'), 385 'session' => $this->session, 386 ]; 387 388 $this->db->exec(' 389 INSERT INTO pageviews ( 390 dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session 391 ) VALUES ( 392 CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session 393 ) 394 ', 395 $data 396 ); 397 } 398 399 /** 400 * Log a click on an external link 401 * 402 * Called from log.php 403 */ 404 public function logOutgoing(): void 405 { 406 global $INPUT; 407 408 if (!$INPUT->str('ol')) return; 409 410 $link = $INPUT->filter('trim')->str('ol'); 411 $session = $this->session; 412 $page = $INPUT->filter('cleanID')->str('p'); 413 414 $this->db->exec( 415 'INSERT INTO outlinks ( 416 dt, session, page, link 417 ) VALUES ( 418 CURRENT_TIMESTAMP, ?, ?, ? 419 )', 420 $session, 421 $page, 422 $link 423 ); 424 } 425 426 /** 427 * Log access to a media file 428 * 429 * Called from action.php 430 * 431 * @param string $media The media ID 432 * @param string $mime The media's mime type 433 * @param bool $inline Is this displayed inline? 434 * @param int $size Size of the media file 435 */ 436 public function logMedia(string $media, string $mime, bool $inline, int $size): void 437 { 438 [$mime1, $mime2] = explode('/', strtolower($mime)); 439 $inline = $inline ? 1 : 0; 440 441 442 $data = [ 443 'media' => cleanID($media), 444 'ip' => $this->logIp(), // resolve the IP address 445 'session' => $this->session, 446 'size' => $size, 447 'mime1' => $mime1, 448 'mime2' => $mime2, 449 'inline' => $inline, 450 ]; 451 452 $this->db->exec(' 453 INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline ) 454 VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline) 455 ', 456 $data 457 ); 458 } 459 460 /** 461 * Log page edits 462 * 463 * called from action.php 464 * 465 * @param string $page The page that was edited 466 * @param string $type The type of edit (create, edit, etc.) 467 */ 468 public function logEdit(string $page, string $type): void 469 { 470 $data = [ 471 'page' => cleanID($page), 472 'type' => $type, 473 'ip' => $this->logIp(), // resolve the IP address 474 'session' => $this->session 475 ]; 476 477 $this->db->exec( 478 'INSERT INTO edits ( 479 dt, page, type, ip, session 480 ) VALUES ( 481 CURRENT_TIMESTAMP, :page, :type, :ip, :session 482 )', 483 $data 484 ); 485 } 486 487 /** 488 * Log login/logoffs and user creations 489 * 490 * @param string $type The type of login event (login, logout, create, failed) 491 * @param string $user The username 492 */ 493 public function logLogin(string $type, string $user = ''): void 494 { 495 global $INPUT; 496 497 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 498 499 $ip = clientIP(true); 500 501 $this->db->exec( 502 'INSERT INTO logins ( 503 dt, ip, user, type 504 ) VALUES ( 505 CURRENT_TIMESTAMP, ?, ?, ? 506 )', 507 $ip, 508 $user, 509 $type 510 ); 511 } 512 513 /** 514 * Log search data to the search related tables 515 * 516 * @param string $query The search query 517 * @param string[] $words The query split into words 518 */ 519 public function logSearch(string $query, array $words): void 520 { 521 if (!$query) return; 522 523 $sid = $this->db->exec( 524 'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)', 525 $this->logIp(), // resolve the IP address 526 $this->session, 527 $query, 528 ); 529 530 foreach ($words as $word) { 531 if (!$word) continue; 532 $this->db->exec( 533 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 534 $sid, 535 $word 536 ); 537 } 538 } 539 540 /** 541 * Log the current page count and size as today's history entry 542 */ 543 public function logHistoryPages(): void 544 { 545 global $conf; 546 547 // use the popularity plugin's search method to find the wanted data 548 /** @var helper_plugin_popularity $pop */ 549 $pop = plugin_load('helper', 'popularity'); 550 $list = $this->initEmptySearchList(); 551 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 552 $page_count = $list['file_count']; 553 $page_size = $list['file_size']; 554 555 $this->db->exec( 556 'INSERT OR REPLACE INTO history ( 557 info, value, dt 558 ) VALUES ( 559 ?, ?, CURRENT_TIMESTAMP 560 )', 561 'page_count', 562 $page_count 563 ); 564 $this->db->exec( 565 'INSERT OR REPLACE INTO history ( 566 info, value, dt 567 ) VALUES ( 568 ?, ?, CURRENT_TIMESTAMP 569 )', 570 'page_size', 571 $page_size 572 ); 573 } 574 575 /** 576 * Log the current media count and size as today's history entry 577 */ 578 public function logHistoryMedia(): void 579 { 580 global $conf; 581 582 // use the popularity plugin's search method to find the wanted data 583 /** @var helper_plugin_popularity $pop */ 584 $pop = plugin_load('helper', 'popularity'); 585 $list = $this->initEmptySearchList(); 586 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 587 $media_count = $list['file_count']; 588 $media_size = $list['file_size']; 589 590 $this->db->exec( 591 'INSERT OR REPLACE INTO history ( 592 info, value, dt 593 ) VALUES ( 594 ?, ?, CURRENT_TIMESTAMP 595 )', 596 'media_count', 597 $media_count 598 ); 599 $this->db->exec( 600 'INSERT OR REPLACE INTO history ( 601 info, value, dt 602 ) VALUES ( 603 ?, ?, CURRENT_TIMESTAMP 604 )', 605 'media_size', 606 $media_size 607 ); 608 } 609 610 // endregion 611 612 /** 613 * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public 614 * @return array 615 */ 616 protected function initEmptySearchList() 617 { 618 return array_fill_keys([ 619 'file_count', 620 'file_size', 621 'file_max', 622 'file_min', 623 'dir_count', 624 'dir_nest', 625 'file_oldest' 626 ], 0); 627 } 628} 629