1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5use DeviceDetector\ClientHints; 6use DeviceDetector\DeviceDetector; 7use DeviceDetector\Parser\Client\Browser; 8use DeviceDetector\Parser\Device\AbstractDeviceParser; 9use DeviceDetector\Parser\OperatingSystem; 10use dokuwiki\Input\Input; 11use dokuwiki\plugin\sqlite\SQLiteDB; 12use helper_plugin_popularity; 13use helper_plugin_statistics; 14 15class Logger 16{ 17 /** @var helper_plugin_statistics The statistics helper plugin instance */ 18 protected helper_plugin_statistics $hlp; 19 20 /** @var SQLiteDB The SQLite database instance */ 21 protected SQLiteDB $db; 22 23 /** @var string The full user agent string */ 24 protected string $uaAgent; 25 26 /** @var string The type of user agent (browser, robot, feedreader) */ 27 protected string $uaType = 'browser'; 28 29 /** @var string The browser/client name */ 30 protected string $uaName; 31 32 /** @var string The browser/client version */ 33 protected string $uaVersion; 34 35 /** @var string The operating system/platform */ 36 protected string $uaPlatform; 37 38 /** @var string|null The user name, if available */ 39 protected ?string $user = null; 40 41 /** @var string The unique user identifier */ 42 protected string $uid; 43 44 /** @var string The session identifier */ 45 protected string $session; 46 47 /** @var int|null The ID of the main access log entry if any */ 48 protected ?int $hit = null; 49 50 // region lifecycle 51 52 /** 53 * Constructor 54 * 55 * Parses browser info and set internal vars 56 */ 57 public function __construct(helper_plugin_statistics $hlp) 58 { 59 /** @var Input $INPUT */ 60 global $INPUT; 61 62 $this->hlp = $hlp; 63 $this->db = $this->hlp->getDB(); 64 65 // FIXME if we already have a session, we should not re-parse the user agent 66 67 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 68 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 69 $dd = new DeviceDetector($ua, ClientHints::factory($_SERVER)); 70 $dd->discardBotInformation(); 71 $dd->parse(); 72 73 if ($dd->isFeedReader()) { 74 $this->uaType = 'feedreader'; 75 } elseif ($dd->isBot()) { 76 $this->uaType = 'robot'; 77 // for now ignore bots 78 throw new IgnoreException('Bot detected, not logging'); 79 } 80 81 $this->uaAgent = $ua; 82 $this->uaName = $dd->getClient('name') ?: 'Unknown'; 83 $this->uaVersion = $dd->getClient('version') ?: '0'; 84 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown'; 85 $this->uid = $this->getUID(); 86 $this->session = $this->getSession(); 87 88 if (!$this->hlp->getConf('nousers')) { 89 $this->user = $INPUT->server->str('REMOTE_USER', null, true); 90 } 91 } 92 93 /** 94 * Should be called before logging 95 * 96 * This starts a transaction, so all logging is done in one go. It also logs the user and session data. 97 */ 98 public function begin(): void 99 { 100 $this->hlp->getDB()->getPdo()->beginTransaction(); 101 102 $this->logUser(); 103 $this->logGroups(); 104 $this->logDomain(); 105 $this->logSession(); 106 } 107 108 /** 109 * Should be called after logging 110 * 111 * This commits the transaction started in begin() 112 */ 113 public function end(): void 114 { 115 $this->hlp->getDB()->getPdo()->commit(); 116 } 117 118 // endregion 119 // region data gathering 120 121 /** 122 * Get the unique user ID 123 * 124 * The user ID is stored in the user preferences and should stay there forever. 125 * @return string The unique user identifier 126 */ 127 protected function getUID(): string 128 { 129 if (!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) { 130 // when there is no session UID set, we assume this was deliberate and we simply abort all logging 131 // @todo we may later make UID generation optional 132 throw new IgnoreException('No user ID found'); 133 } 134 135 return $_SESSION[DOKU_COOKIE]['statistics']['uid']; 136 } 137 138 /** 139 * Return the user's session ID 140 * 141 * @return string The session identifier 142 */ 143 protected function getSession(): string 144 { 145 if (!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) { 146 // when there is no session ID set, we assume this was deliberate and we simply abort all logging 147 throw new IgnoreException('No session ID found'); 148 } 149 150 return $_SESSION[DOKU_COOKIE]['statistics']['id']; 151 } 152 153 // endregion 154 // region automatic logging 155 156 /** 157 * Log the user was seen 158 */ 159 protected function logUser(): void 160 { 161 if (!$this->user) return; 162 163 $this->db->exec( 164 'INSERT INTO users (user, dt) 165 VALUES (?, CURRENT_TIMESTAMP) 166 ON CONFLICT (user) DO UPDATE SET 167 dt = CURRENT_TIMESTAMP 168 WHERE excluded.user = users.user 169 ', 170 $this->user 171 ); 172 } 173 174 /** 175 * Log the session and user agent information 176 */ 177 protected function logSession(): void 178 { 179 $this->db->exec( 180 'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os) 181 VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?) 182 ON CONFLICT (session) DO UPDATE SET 183 end = CURRENT_TIMESTAMP, 184 user = excluded.user, 185 uid = excluded.uid 186 WHERE excluded.session = sessions.session 187 ', 188 $this->session, 189 $this->uid, 190 $this->user, 191 $this->uaAgent, 192 $this->uaName, 193 $this->uaType, 194 $this->uaVersion, 195 $this->uaPlatform 196 ); 197 } 198 199 /** 200 * Log all groups for the user 201 * 202 * @todo maybe this should be done only once per session? 203 */ 204 protected function logGroups(): void 205 { 206 global $USERINFO; 207 208 if (!$this->user) return; 209 if (!isset($USERINFO['grps'])) return; 210 if (!is_array($USERINFO['grps'])) return; 211 $groups = $USERINFO['grps']; 212 213 $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user); 214 215 if ($groups === []) { 216 return; 217 } 218 219 $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)')); 220 $params = []; 221 $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders"; 222 foreach ($groups as $group) { 223 $params[] = $this->user; 224 $params[] = $group; 225 } 226 $this->db->exec($sql, $params); 227 } 228 229 /** 230 * Log email domain 231 * 232 * @todo maybe this should be done only once per session? 233 */ 234 protected function logDomain(): void 235 { 236 global $USERINFO; 237 if (!$this->user) return; 238 if (!isset($USERINFO['mail'])) return; 239 $mail = $USERINFO['mail']; 240 241 $pos = strrpos($mail, '@'); 242 if (!$pos) return; 243 $domain = substr($mail, $pos + 1); 244 if (empty($domain)) return; 245 246 $sql = 'UPDATE users SET domain = ? WHERE user = ?'; 247 $this->db->exec($sql, [$domain, $this->user]); 248 } 249 250 // endregion 251 // region internal loggers called by the dispatchers 252 253 /** 254 * Log the given referer URL 255 * 256 * Note: we DO log empty referers. These are external accesses that did not provide a referer URL. 257 * We do not log referers that are our own pages though. 258 * 259 * engine set -> a search engine referer 260 * no engine set, url empty -> a direct access (bookmark, direct link, etc.) 261 * no engine set, url not empty -> a referer from another page (not a wiki page) 262 * null returned -> referer was a wiki page 263 * 264 * @param $referer 265 * @return int|null The referer ID or null if no referer was logged 266 * @todo we could check against a blacklist here 267 */ 268 public function logReferer($referer): ?int 269 { 270 $referer = trim($referer); 271 272 // do not log our own pages as referers (empty referer is OK though) 273 if (!empty($referer)) { 274 $selfre = '^' . preg_quote(DOKU_URL, '/'); 275 if (preg_match("/$selfre/", $referer)) { 276 return null; 277 } 278 } 279 280 // is it a search engine? 281 $se = new SearchEngines($referer); 282 $engine = $se->getEngine(); 283 284 $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)'; 285 $this->db->exec($sql, [$referer, $engine]); 286 return (int)$this->db->queryValue('SELECT id FROM referers WHERE url = ?', $referer); 287 } 288 289 /** 290 * Resolve IP to country/city and store in database 291 * 292 * @return string The IP address as stored 293 */ 294 public function logIp(): string 295 { 296 $ip = clientIP(true); 297 298 // anonymize the IP address for storage? 299 if ($this->hlp->getConf('anonips')) { 300 $hash = md5($ip . strrev($ip)); // we use the reversed IP as salt to avoid common rainbow tables 301 $host = ''; 302 } else { 303 $hash = $ip; 304 $host = gethostbyaddr($ip); 305 } 306 307 if ($this->hlp->getConf('nolocation')) { 308 // if we don't resolve location data, we just return the IP address 309 return $hash; 310 } 311 312 // check if IP already known and up-to-date 313 $result = $this->db->queryValue( 314 "SELECT ip 315 FROM iplocation 316 WHERE ip = ? 317 AND dt > date('now', '-30 days')", 318 $hash 319 ); 320 if ($result) return $hash; // already known and up-to-date 321 322 323 // resolve the IP address to location data 324 try { 325 $data = $this->hlp->resolveIP($ip); 326 } catch (IpResolverException $e) { 327 \dokuwiki\Logger::error('Statistics Plugin: ' . $e->getMessage(), $e->details); 328 $data = []; 329 } 330 331 $this->db->exec( 332 'INSERT OR REPLACE INTO iplocation ( 333 ip, country, code, city, host, dt 334 ) VALUES ( 335 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 336 )', 337 $hash, 338 $data['country'] ?? '', 339 $data['countryCode'] ?? '', 340 $data['city'] ?? '', 341 $host 342 ); 343 344 return $hash; 345 } 346 347 // endregion 348 // region log dispatchers 349 350 public function logPageView(): void 351 { 352 global $INPUT; 353 354 if (!$INPUT->str('p')) return; 355 356 357 $referer = $INPUT->filter('trim')->str('r'); 358 $ip = $this->logIp(); // resolve the IP address 359 360 $data = [ 361 'page' => $INPUT->filter('cleanID')->str('p'), 362 'ip' => $ip, 363 'ref_id' => $this->logReferer($referer), 364 'sx' => $INPUT->int('sx'), 365 'sy' => $INPUT->int('sy'), 366 'vx' => $INPUT->int('vx'), 367 'vy' => $INPUT->int('vy'), 368 'session' => $this->session, 369 ]; 370 371 $this->db->exec( 372 ' 373 INSERT INTO pageviews ( 374 dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session 375 ) VALUES ( 376 CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session 377 ) 378 ', 379 $data 380 ); 381 } 382 383 /** 384 * Log a click on an external link 385 * 386 * Called from dispatch.php 387 */ 388 public function logOutgoing(): void 389 { 390 global $INPUT; 391 392 if (!$INPUT->str('ol')) return; 393 394 $link = $INPUT->filter('trim')->str('ol'); 395 $session = $this->session; 396 $page = $INPUT->filter('cleanID')->str('p'); 397 398 $this->db->exec( 399 'INSERT INTO outlinks ( 400 dt, session, page, link 401 ) VALUES ( 402 CURRENT_TIMESTAMP, ?, ?, ? 403 )', 404 $session, 405 $page, 406 $link 407 ); 408 } 409 410 /** 411 * Log access to a media file 412 * 413 * Called from action.php 414 * 415 * @param string $media The media ID 416 * @param string $mime The media's mime type 417 * @param bool $inline Is this displayed inline? 418 * @param int $size Size of the media file 419 */ 420 public function logMedia(string $media, string $mime, bool $inline, int $size): void 421 { 422 [$mime1, $mime2] = explode('/', strtolower($mime)); 423 $inline = $inline ? 1 : 0; 424 425 426 $data = [ 427 'media' => cleanID($media), 428 'ip' => $this->logIp(), // resolve the IP address 429 'session' => $this->session, 430 'size' => $size, 431 'mime1' => $mime1, 432 'mime2' => $mime2, 433 'inline' => $inline, 434 ]; 435 436 $this->db->exec( 437 ' 438 INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline ) 439 VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline) 440 ', 441 $data 442 ); 443 } 444 445 /** 446 * Log page edits 447 * 448 * called from action.php 449 * 450 * @param string $page The page that was edited 451 * @param string $type The type of edit (create, edit, etc.) 452 */ 453 public function logEdit(string $page, string $type): void 454 { 455 $data = [ 456 'page' => cleanID($page), 457 'type' => $type, 458 'ip' => $this->logIp(), // resolve the IP address 459 'session' => $this->session 460 ]; 461 462 $this->db->exec( 463 'INSERT INTO edits ( 464 dt, page, type, ip, session 465 ) VALUES ( 466 CURRENT_TIMESTAMP, :page, :type, :ip, :session 467 )', 468 $data 469 ); 470 } 471 472 /** 473 * Log login/logoffs and user creations 474 * 475 * @param string $type The type of login event (login, logout, create, failed) 476 * @param string $user The username 477 */ 478 public function logLogin(string $type, string $user = ''): void 479 { 480 global $INPUT; 481 482 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 483 484 $ip = clientIP(true); 485 486 $this->db->exec( 487 'INSERT INTO logins ( 488 dt, ip, user, type 489 ) VALUES ( 490 CURRENT_TIMESTAMP, ?, ?, ? 491 )', 492 $ip, 493 $user, 494 $type 495 ); 496 } 497 498 /** 499 * Log search data to the search related tables 500 * 501 * @param string $query The search query 502 * @param string[] $words The query split into words 503 */ 504 public function logSearch(string $query, array $words): void 505 { 506 if (!$query) return; 507 508 $sid = $this->db->exec( 509 'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)', 510 $this->logIp(), // resolve the IP address 511 $this->session, 512 $query, 513 ); 514 515 foreach ($words as $word) { 516 if (!$word) continue; 517 $this->db->exec( 518 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 519 $sid, 520 $word 521 ); 522 } 523 } 524 525 /** 526 * Log the current page count and size as today's history entry 527 */ 528 public function logHistoryPages(): void 529 { 530 global $conf; 531 532 // use the popularity plugin's search method to find the wanted data 533 /** @var helper_plugin_popularity $pop */ 534 $pop = plugin_load('helper', 'popularity'); 535 $list = $this->initEmptySearchList(); 536 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 537 $page_count = $list['file_count']; 538 $page_size = $list['file_size']; 539 540 $this->db->exec( 541 'INSERT OR REPLACE INTO history ( 542 info, value, dt 543 ) VALUES ( 544 ?, ?, CURRENT_TIMESTAMP 545 )', 546 'page_count', 547 $page_count 548 ); 549 $this->db->exec( 550 'INSERT OR REPLACE INTO history ( 551 info, value, dt 552 ) VALUES ( 553 ?, ?, CURRENT_TIMESTAMP 554 )', 555 'page_size', 556 $page_size 557 ); 558 } 559 560 /** 561 * Log the current media count and size as today's history entry 562 */ 563 public function logHistoryMedia(): void 564 { 565 global $conf; 566 567 // use the popularity plugin's search method to find the wanted data 568 /** @var helper_plugin_popularity $pop */ 569 $pop = plugin_load('helper', 'popularity'); 570 $list = $this->initEmptySearchList(); 571 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 572 $media_count = $list['file_count']; 573 $media_size = $list['file_size']; 574 575 $this->db->exec( 576 'INSERT OR REPLACE INTO history ( 577 info, value, dt 578 ) VALUES ( 579 ?, ?, CURRENT_TIMESTAMP 580 )', 581 'media_count', 582 $media_count 583 ); 584 $this->db->exec( 585 'INSERT OR REPLACE INTO history ( 586 info, value, dt 587 ) VALUES ( 588 ?, ?, CURRENT_TIMESTAMP 589 )', 590 'media_size', 591 $media_size 592 ); 593 } 594 595 // endregion 596 597 /** 598 * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public 599 * @return array 600 */ 601 protected function initEmptySearchList() 602 { 603 return array_fill_keys([ 604 'file_count', 605 'file_size', 606 'file_max', 607 'file_min', 608 'dir_count', 609 'dir_nest', 610 'file_oldest' 611 ], 0); 612 } 613} 614