1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5use DeviceDetector\ClientHints; 6use DeviceDetector\DeviceDetector; 7use DeviceDetector\Parser\AbstractParser; 8use DeviceDetector\Parser\Device\AbstractDeviceParser; 9use DeviceDetector\Parser\OperatingSystem; 10use dokuwiki\Input\Input; 11use dokuwiki\plugin\sqlite\SQLiteDB; 12use helper_plugin_popularity; 13use helper_plugin_statistics; 14 15class Logger 16{ 17 /** @var helper_plugin_statistics The statistics helper plugin instance */ 18 protected helper_plugin_statistics $hlp; 19 20 /** @var SQLiteDB The SQLite database instance */ 21 protected SQLiteDB $db; 22 23 /** @var string The full user agent string */ 24 protected string $uaAgent; 25 26 /** @var string The type of user agent (browser, robot, feedreader) */ 27 protected string $uaType = 'browser'; 28 29 /** @var string The browser/client name */ 30 protected string $uaName; 31 32 /** @var string The browser/client version */ 33 protected string $uaVersion; 34 35 /** @var string The operating system/platform */ 36 protected string $uaPlatform; 37 38 /** @var string|null The user name, if available */ 39 protected ?string $user = null; 40 41 /** @var string The unique user identifier */ 42 protected string $uid; 43 44 /** @var string The session identifier */ 45 protected string $session; 46 47 /** @var int|null The ID of the main access log entry if any */ 48 protected ?int $hit = null; 49 50 // region lifecycle 51 52 /** 53 * Constructor 54 * 55 * Parses browser info and set internal vars 56 * @throws IgnoreException 57 */ 58 public function __construct(helper_plugin_statistics $hlp) 59 { 60 /** @var Input $INPUT */ 61 global $INPUT; 62 63 $this->hlp = $hlp; 64 $this->db = $this->hlp->getDB(); 65 66 // FIXME if we already have a session, we should not re-parse the user agent 67 68 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 69 AbstractDeviceParser::setVersionTruncation(AbstractParser::VERSION_TRUNCATION_MAJOR); 70 $dd = new DeviceDetector($ua, ClientHints::factory($_SERVER)); 71 $dd->discardBotInformation(); 72 $dd->parse(); 73 74 if ($dd->isFeedReader()) { 75 $this->uaType = 'feedreader'; 76 } elseif ($dd->isBot()) { 77 $this->uaType = 'robot'; 78 // for now ignore bots 79 throw new IgnoreException('Bot detected, not logging'); 80 } 81 82 $this->uaAgent = $ua; 83 $this->uaName = $dd->getClient('name') ?: 'Unknown'; 84 $this->uaVersion = $dd->getClient('version') ?: '0'; 85 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown'; 86 $this->uid = $this->getUID(); 87 $this->session = $this->getSession(); 88 89 if (!$this->hlp->getConf('nousers')) { 90 $this->user = $INPUT->server->str('REMOTE_USER', null, true); 91 } 92 } 93 94 /** 95 * Should be called before logging 96 * 97 * This starts a transaction, so all logging is done in one go. It also logs the user and session data. 98 */ 99 public function begin(): void 100 { 101 $this->db->getPdo()->beginTransaction(); 102 103 $this->logUser(); 104 $this->logGroups(); 105 $this->logDomain(); 106 $this->logSession(); 107 $this->logCampaign(); 108 } 109 110 /** 111 * Should be called after logging 112 * 113 * This commits the transaction started in begin() 114 */ 115 public function end(): void 116 { 117 $this->db->getPdo()->commit(); 118 } 119 120 // endregion 121 // region data gathering 122 123 /** 124 * Get the unique user ID 125 * 126 * The user ID is stored in the user preferences and should stay there forever. 127 * @return string The unique user identifier 128 * @throws IgnoreException 129 */ 130 protected function getUID(): string 131 { 132 if (!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) { 133 // when there is no session UID set, we assume this was deliberate and we simply abort all logging 134 // @todo we may later make UID generation optional 135 throw new IgnoreException('No user ID found'); 136 } 137 138 return $_SESSION[DOKU_COOKIE]['statistics']['uid']; 139 } 140 141 /** 142 * Return the user's session ID 143 * 144 * @return string The session identifier 145 * @throws IgnoreException 146 */ 147 protected function getSession(): string 148 { 149 if (!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) { 150 // when there is no session ID set, we assume this was deliberate and we simply abort all logging 151 throw new IgnoreException('No session ID found'); 152 } 153 154 return $_SESSION[DOKU_COOKIE]['statistics']['id']; 155 } 156 157 // endregion 158 // region automatic logging 159 160 /** 161 * Log the user was seen 162 */ 163 protected function logUser(): void 164 { 165 if (!$this->user) return; 166 167 $this->db->exec( 168 'INSERT INTO users (user, dt) 169 VALUES (?, CURRENT_TIMESTAMP) 170 ON CONFLICT (user) DO UPDATE SET 171 dt = CURRENT_TIMESTAMP 172 WHERE excluded.user = users.user 173 ', 174 $this->user 175 ); 176 } 177 178 /** 179 * Log the session and user agent information 180 */ 181 protected function logSession(): void 182 { 183 $this->db->exec( 184 'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os) 185 VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?) 186 ON CONFLICT (session) DO UPDATE SET 187 end = CURRENT_TIMESTAMP, 188 user = excluded.user, 189 uid = excluded.uid 190 WHERE excluded.session = sessions.session 191 ', 192 $this->session, 193 $this->uid, 194 $this->user, 195 $this->uaAgent, 196 $this->uaName, 197 $this->uaType, 198 $this->uaVersion, 199 $this->uaPlatform 200 ); 201 } 202 203 /** 204 * Log UTM campaign data 205 * 206 * @return void 207 */ 208 protected function logCampaign(): void 209 { 210 global $INPUT; 211 212 $campaign = $INPUT->filter('trim')->str('utm_campaign', null, true); 213 $source = $INPUT->filter('trim')->str('utm_source', null, true); 214 $medium = $INPUT->filter('trim')->str('utm_medium', null, true); 215 216 if (!$campaign && !$source && !$medium) return; 217 218 $this->db->exec( 219 'INSERT OR IGNORE INTO campaigns (session, campaign, source, medium) 220 VALUES (?, ?, ?, ?)', 221 $this->session, 222 $campaign, 223 $source, 224 $medium 225 ); 226 } 227 228 /** 229 * Log all groups for the user 230 * 231 * @todo maybe this should be done only once per session? 232 */ 233 protected function logGroups(): void 234 { 235 global $USERINFO; 236 237 if (!$this->user) return; 238 if (!isset($USERINFO['grps'])) return; 239 if (!is_array($USERINFO['grps'])) return; 240 $groups = $USERINFO['grps']; 241 242 $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user); 243 244 if ($groups === []) { 245 return; 246 } 247 248 $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)')); 249 $params = []; 250 $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders"; 251 foreach ($groups as $group) { 252 $params[] = $this->user; 253 $params[] = $group; 254 } 255 $this->db->exec($sql, $params); 256 } 257 258 /** 259 * Log email domain 260 * 261 * @todo maybe this should be done only once per session? 262 */ 263 protected function logDomain(): void 264 { 265 global $USERINFO; 266 if (!$this->user) return; 267 if (!isset($USERINFO['mail'])) return; 268 $mail = $USERINFO['mail']; 269 270 $pos = strrpos($mail, '@'); 271 if (!$pos) return; 272 $domain = substr($mail, $pos + 1); 273 if (empty($domain)) return; 274 275 $sql = 'UPDATE users SET domain = ? WHERE user = ?'; 276 $this->db->exec($sql, [$domain, $this->user]); 277 } 278 279 // endregion 280 // region internal loggers called by the dispatchers 281 282 /** 283 * Log the given referer URL 284 * 285 * Note: we DO log empty referers. These are external accesses that did not provide a referer URL. 286 * We do not log referers that are our own pages though. 287 * 288 * engine set -> a search engine referer 289 * no engine set, url empty -> a direct access (bookmark, direct link, etc.) 290 * no engine set, url not empty -> a referer from another page (not a wiki page) 291 * null returned -> referer was a wiki page 292 * 293 * @param $referer 294 * @return int|null The referer ID or null if no referer was logged 295 * @todo we could check against a blacklist here 296 */ 297 public function logReferer($referer): ?int 298 { 299 $referer = trim($referer); 300 301 // do not log our own pages as referers (empty referer is OK though) 302 if (!empty($referer)) { 303 $selfre = '^' . preg_quote(DOKU_URL, '/'); 304 if (preg_match("/$selfre/", $referer)) { 305 return null; 306 } 307 } 308 309 // is it a search engine? 310 $se = new SearchEngines($referer); 311 $engine = $se->getEngine(); 312 313 $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)'; 314 $this->db->exec($sql, [$referer, $engine]); 315 return (int)$this->db->queryValue('SELECT id FROM referers WHERE url = ?', $referer); 316 } 317 318 /** 319 * Resolve IP to country/city and store in database 320 * 321 * @return string The IP address as stored 322 */ 323 public function logIp(): string 324 { 325 $ip = clientIP(true); 326 327 // anonymize the IP address for storage? 328 if ($this->hlp->getConf('anonips')) { 329 $hash = md5($ip . strrev($ip)); // we use the reversed IP as salt to avoid common rainbow tables 330 $host = ''; 331 } else { 332 $hash = $ip; 333 $host = gethostbyaddr($ip); 334 } 335 336 if ($this->hlp->getConf('nolocation')) { 337 // if we don't resolve location data, we just return the IP address 338 return $hash; 339 } 340 341 // check if IP already known and up-to-date 342 $result = $this->db->queryValue( 343 "SELECT ip 344 FROM iplocation 345 WHERE ip = ? 346 AND dt > date('now', '-30 days')", 347 $hash 348 ); 349 if ($result) return $hash; // already known and up-to-date 350 351 352 // resolve the IP address to location data 353 try { 354 $data = $this->hlp->resolveIP($ip); 355 } catch (IpResolverException $e) { 356 \dokuwiki\Logger::error('Statistics Plugin: ' . $e->getMessage(), $e->details); 357 $data = []; 358 } 359 360 $this->db->exec( 361 'INSERT OR REPLACE INTO iplocation ( 362 ip, country, code, city, host, dt 363 ) VALUES ( 364 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 365 )', 366 $hash, 367 $data['country'] ?? '', 368 $data['countryCode'] ?? '', 369 $data['city'] ?? '', 370 $host 371 ); 372 373 return $hash; 374 } 375 376 // endregion 377 // region log dispatchers 378 379 public function logPageView(): void 380 { 381 global $INPUT; 382 383 if (!$INPUT->str('p')) return; 384 385 386 $referer = $INPUT->filter('trim')->str('r'); 387 $ip = $this->logIp(); // resolve the IP address 388 389 $data = [ 390 'page' => $INPUT->filter('cleanID')->str('p'), 391 'ip' => $ip, 392 'ref_id' => $this->logReferer($referer), 393 'sx' => $INPUT->int('sx'), 394 'sy' => $INPUT->int('sy'), 395 'vx' => $INPUT->int('vx'), 396 'vy' => $INPUT->int('vy'), 397 'session' => $this->session, 398 ]; 399 400 $this->db->exec( 401 ' 402 INSERT INTO pageviews ( 403 dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session 404 ) VALUES ( 405 CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session 406 ) 407 ', 408 $data 409 ); 410 } 411 412 /** 413 * Log a click on an external link 414 * 415 * Called from dispatch.php 416 */ 417 public function logOutgoing(): void 418 { 419 global $INPUT; 420 421 if (!$INPUT->str('ol')) return; 422 423 $link = $INPUT->filter('trim')->str('ol'); 424 $session = $this->session; 425 $page = $INPUT->filter('cleanID')->str('p'); 426 427 $this->db->exec( 428 'INSERT INTO outlinks ( 429 dt, session, page, link 430 ) VALUES ( 431 CURRENT_TIMESTAMP, ?, ?, ? 432 )', 433 $session, 434 $page, 435 $link 436 ); 437 } 438 439 /** 440 * Log access to a media file 441 * 442 * Called from action.php 443 * 444 * @param string $media The media ID 445 * @param string $mime The media's mime type 446 * @param bool $inline Is this displayed inline? 447 * @param int $size Size of the media file 448 */ 449 public function logMedia(string $media, string $mime, bool $inline, int $size): void 450 { 451 [$mime1, $mime2] = explode('/', strtolower($mime)); 452 $inline = $inline ? 1 : 0; 453 454 455 $data = [ 456 'media' => cleanID($media), 457 'ip' => $this->logIp(), // resolve the IP address 458 'session' => $this->session, 459 'size' => $size, 460 'mime1' => $mime1, 461 'mime2' => $mime2, 462 'inline' => $inline, 463 ]; 464 465 $this->db->exec( 466 ' 467 INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline ) 468 VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline) 469 ', 470 $data 471 ); 472 } 473 474 /** 475 * Log page edits 476 * 477 * called from action.php 478 * 479 * @param string $page The page that was edited 480 * @param string $type The type of edit (create, edit, etc.) 481 */ 482 public function logEdit(string $page, string $type): void 483 { 484 $data = [ 485 'page' => cleanID($page), 486 'type' => $type, 487 'ip' => $this->logIp(), // resolve the IP address 488 'session' => $this->session 489 ]; 490 491 $this->db->exec( 492 'INSERT INTO edits ( 493 dt, page, type, ip, session 494 ) VALUES ( 495 CURRENT_TIMESTAMP, :page, :type, :ip, :session 496 )', 497 $data 498 ); 499 } 500 501 /** 502 * Log login/logoffs and user creations 503 * 504 * @param string $type The type of login event (login, logout, create, failed) 505 * @param string $user The username 506 */ 507 public function logLogin(string $type, string $user = ''): void 508 { 509 global $INPUT; 510 511 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 512 513 $ip = clientIP(true); 514 515 $this->db->exec( 516 'INSERT INTO logins ( 517 dt, ip, user, type 518 ) VALUES ( 519 CURRENT_TIMESTAMP, ?, ?, ? 520 )', 521 $ip, 522 $user, 523 $type 524 ); 525 } 526 527 /** 528 * Log search data to the search related tables 529 * 530 * @param string $query The search query 531 * @param string[] $words The query split into words 532 */ 533 public function logSearch(string $query, array $words): void 534 { 535 if (!$query) return; 536 537 $sid = $this->db->exec( 538 'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)', 539 $this->logIp(), // resolve the IP address 540 $this->session, 541 $query, 542 ); 543 544 foreach ($words as $word) { 545 if (!$word) continue; 546 $this->db->exec( 547 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 548 $sid, 549 $word 550 ); 551 } 552 } 553 554 /** 555 * Log the current page count and size as today's history entry 556 */ 557 public function logHistoryPages(): void 558 { 559 global $conf; 560 561 // use the popularity plugin's search method to find the wanted data 562 /** @var helper_plugin_popularity $pop */ 563 $pop = plugin_load('helper', 'popularity'); 564 $list = $this->initEmptySearchList(); 565 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 566 $page_count = $list['file_count']; 567 $page_size = $list['file_size']; 568 569 $this->db->exec( 570 'INSERT OR REPLACE INTO history ( 571 info, value, dt 572 ) VALUES ( 573 ?, ?, CURRENT_TIMESTAMP 574 )', 575 'page_count', 576 $page_count 577 ); 578 $this->db->exec( 579 'INSERT OR REPLACE INTO history ( 580 info, value, dt 581 ) VALUES ( 582 ?, ?, CURRENT_TIMESTAMP 583 )', 584 'page_size', 585 $page_size 586 ); 587 } 588 589 /** 590 * Log the current media count and size as today's history entry 591 */ 592 public function logHistoryMedia(): void 593 { 594 global $conf; 595 596 // use the popularity plugin's search method to find the wanted data 597 /** @var helper_plugin_popularity $pop */ 598 $pop = plugin_load('helper', 'popularity'); 599 $list = $this->initEmptySearchList(); 600 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 601 $media_count = $list['file_count']; 602 $media_size = $list['file_size']; 603 604 $this->db->exec( 605 'INSERT OR REPLACE INTO history ( 606 info, value, dt 607 ) VALUES ( 608 ?, ?, CURRENT_TIMESTAMP 609 )', 610 'media_count', 611 $media_count 612 ); 613 $this->db->exec( 614 'INSERT OR REPLACE INTO history ( 615 info, value, dt 616 ) VALUES ( 617 ?, ?, CURRENT_TIMESTAMP 618 )', 619 'media_size', 620 $media_size 621 ); 622 } 623 624 // endregion 625 626 /** 627 * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public 628 * @return array 629 */ 630 protected function initEmptySearchList() 631 { 632 return array_fill_keys([ 633 'file_count', 634 'file_size', 635 'file_max', 636 'file_min', 637 'dir_count', 638 'dir_nest', 639 'file_oldest' 640 ], 0); 641 } 642} 643