1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5use DeviceDetector\ClientHints; 6use DeviceDetector\DeviceDetector; 7use DeviceDetector\Parser\Device\AbstractDeviceParser; 8use DeviceDetector\Parser\OperatingSystem; 9use dokuwiki\Input\Input; 10use dokuwiki\plugin\sqlite\SQLiteDB; 11use helper_plugin_popularity; 12use helper_plugin_statistics; 13 14class Logger 15{ 16 /** @var helper_plugin_statistics The statistics helper plugin instance */ 17 protected helper_plugin_statistics $hlp; 18 19 /** @var SQLiteDB The SQLite database instance */ 20 protected SQLiteDB $db; 21 22 /** @var string The full user agent string */ 23 protected string $uaAgent; 24 25 /** @var string The type of user agent (browser, robot, feedreader) */ 26 protected string $uaType = 'browser'; 27 28 /** @var string The browser/client name */ 29 protected string $uaName; 30 31 /** @var string The browser/client version */ 32 protected string $uaVersion; 33 34 /** @var string The operating system/platform */ 35 protected string $uaPlatform; 36 37 /** @var string|null The user name, if available */ 38 protected ?string $user = null; 39 40 /** @var string The unique user identifier */ 41 protected string $uid; 42 43 /** @var string The session identifier */ 44 protected string $session; 45 46 /** @var int|null The ID of the main access log entry if any */ 47 protected ?int $hit = null; 48 49 // region lifecycle 50 51 /** 52 * Constructor 53 * 54 * Parses browser info and set internal vars 55 */ 56 public function __construct(helper_plugin_statistics $hlp) 57 { 58 /** @var Input $INPUT */ 59 global $INPUT; 60 61 $this->hlp = $hlp; 62 $this->db = $this->hlp->getDB(); 63 64 // FIXME if we already have a session, we should not re-parse the user agent 65 66 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 67 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 68 $dd = new DeviceDetector($ua, ClientHints::factory($_SERVER)); 69 $dd->discardBotInformation(); 70 $dd->parse(); 71 72 if ($dd->isFeedReader()) { 73 $this->uaType = 'feedreader'; 74 } elseif ($dd->isBot()) { 75 $this->uaType = 'robot'; 76 // for now ignore bots 77 throw new IgnoreException('Bot detected, not logging'); 78 } 79 80 $this->uaAgent = $ua; 81 $this->uaName = $dd->getClient('name') ?: 'Unknown'; 82 $this->uaVersion = $dd->getClient('version') ?: '0'; 83 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown'; 84 $this->uid = $this->getUID(); 85 $this->session = $this->getSession(); 86 87 if (!$this->hlp->getConf('nousers')) { 88 $this->user = $INPUT->server->str('REMOTE_USER', null, true); 89 } 90 } 91 92 /** 93 * Should be called before logging 94 * 95 * This starts a transaction, so all logging is done in one go. It also logs the user and session data. 96 */ 97 public function begin(): void 98 { 99 $this->hlp->getDB()->getPdo()->beginTransaction(); 100 101 $this->logUser(); 102 $this->logGroups(); 103 $this->logDomain(); 104 $this->logSession(); 105 $this->logCampaign(); 106 } 107 108 /** 109 * Should be called after logging 110 * 111 * This commits the transaction started in begin() 112 */ 113 public function end(): void 114 { 115 $this->hlp->getDB()->getPdo()->commit(); 116 } 117 118 // endregion 119 // region data gathering 120 121 /** 122 * Get the unique user ID 123 * 124 * The user ID is stored in the user preferences and should stay there forever. 125 * @return string The unique user identifier 126 */ 127 protected function getUID(): string 128 { 129 if (!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) { 130 // when there is no session UID set, we assume this was deliberate and we simply abort all logging 131 // @todo we may later make UID generation optional 132 throw new IgnoreException('No user ID found'); 133 } 134 135 return $_SESSION[DOKU_COOKIE]['statistics']['uid']; 136 } 137 138 /** 139 * Return the user's session ID 140 * 141 * @return string The session identifier 142 */ 143 protected function getSession(): string 144 { 145 if (!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) { 146 // when there is no session ID set, we assume this was deliberate and we simply abort all logging 147 throw new IgnoreException('No session ID found'); 148 } 149 150 return $_SESSION[DOKU_COOKIE]['statistics']['id']; 151 } 152 153 // endregion 154 // region automatic logging 155 156 /** 157 * Log the user was seen 158 */ 159 protected function logUser(): void 160 { 161 if (!$this->user) return; 162 163 $this->db->exec( 164 'INSERT INTO users (user, dt) 165 VALUES (?, CURRENT_TIMESTAMP) 166 ON CONFLICT (user) DO UPDATE SET 167 dt = CURRENT_TIMESTAMP 168 WHERE excluded.user = users.user 169 ', 170 $this->user 171 ); 172 } 173 174 /** 175 * Log the session and user agent information 176 */ 177 protected function logSession(): void 178 { 179 $this->db->exec( 180 'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os) 181 VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?) 182 ON CONFLICT (session) DO UPDATE SET 183 end = CURRENT_TIMESTAMP, 184 user = excluded.user, 185 uid = excluded.uid 186 WHERE excluded.session = sessions.session 187 ', 188 $this->session, 189 $this->uid, 190 $this->user, 191 $this->uaAgent, 192 $this->uaName, 193 $this->uaType, 194 $this->uaVersion, 195 $this->uaPlatform 196 ); 197 } 198 199 /** 200 * Log UTM campaign data 201 * 202 * @return void 203 */ 204 protected function logCampaign(): void 205 { 206 global $INPUT; 207 208 $campaign = $INPUT->filter('trim')->str('utm_campaign', null, true); 209 $source = $INPUT->filter('trim')->str('utm_source', null, true); 210 $medium = $INPUT->filter('trim')->str('utm_medium', null, true); 211 212 if (!$campaign) return; 213 214 $this->db->exec( 215 'INSERT OR IGNORE INTO campaigns (session, campaign, source, medium) 216 VALUES (?, ?, ?, ?)', 217 $this->session, 218 $campaign, 219 $source, 220 $medium 221 ); 222 } 223 224 /** 225 * Log all groups for the user 226 * 227 * @todo maybe this should be done only once per session? 228 */ 229 protected function logGroups(): void 230 { 231 global $USERINFO; 232 233 if (!$this->user) return; 234 if (!isset($USERINFO['grps'])) return; 235 if (!is_array($USERINFO['grps'])) return; 236 $groups = $USERINFO['grps']; 237 238 $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user); 239 240 if ($groups === []) { 241 return; 242 } 243 244 $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)')); 245 $params = []; 246 $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders"; 247 foreach ($groups as $group) { 248 $params[] = $this->user; 249 $params[] = $group; 250 } 251 $this->db->exec($sql, $params); 252 } 253 254 /** 255 * Log email domain 256 * 257 * @todo maybe this should be done only once per session? 258 */ 259 protected function logDomain(): void 260 { 261 global $USERINFO; 262 if (!$this->user) return; 263 if (!isset($USERINFO['mail'])) return; 264 $mail = $USERINFO['mail']; 265 266 $pos = strrpos($mail, '@'); 267 if (!$pos) return; 268 $domain = substr($mail, $pos + 1); 269 if (empty($domain)) return; 270 271 $sql = 'UPDATE users SET domain = ? WHERE user = ?'; 272 $this->db->exec($sql, [$domain, $this->user]); 273 } 274 275 // endregion 276 // region internal loggers called by the dispatchers 277 278 /** 279 * Log the given referer URL 280 * 281 * Note: we DO log empty referers. These are external accesses that did not provide a referer URL. 282 * We do not log referers that are our own pages though. 283 * 284 * engine set -> a search engine referer 285 * no engine set, url empty -> a direct access (bookmark, direct link, etc.) 286 * no engine set, url not empty -> a referer from another page (not a wiki page) 287 * null returned -> referer was a wiki page 288 * 289 * @param $referer 290 * @return int|null The referer ID or null if no referer was logged 291 * @todo we could check against a blacklist here 292 */ 293 public function logReferer($referer): ?int 294 { 295 $referer = trim($referer); 296 297 // do not log our own pages as referers (empty referer is OK though) 298 if (!empty($referer)) { 299 $selfre = '^' . preg_quote(DOKU_URL, '/'); 300 if (preg_match("/$selfre/", $referer)) { 301 return null; 302 } 303 } 304 305 // is it a search engine? 306 $se = new SearchEngines($referer); 307 $engine = $se->getEngine(); 308 309 $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)'; 310 $this->db->exec($sql, [$referer, $engine]); 311 return (int)$this->db->queryValue('SELECT id FROM referers WHERE url = ?', $referer); 312 } 313 314 /** 315 * Resolve IP to country/city and store in database 316 * 317 * @return string The IP address as stored 318 */ 319 public function logIp(): string 320 { 321 $ip = clientIP(true); 322 323 // anonymize the IP address for storage? 324 if ($this->hlp->getConf('anonips')) { 325 $hash = md5($ip . strrev($ip)); // we use the reversed IP as salt to avoid common rainbow tables 326 $host = ''; 327 } else { 328 $hash = $ip; 329 $host = gethostbyaddr($ip); 330 } 331 332 if ($this->hlp->getConf('nolocation')) { 333 // if we don't resolve location data, we just return the IP address 334 return $hash; 335 } 336 337 // check if IP already known and up-to-date 338 $result = $this->db->queryValue( 339 "SELECT ip 340 FROM iplocation 341 WHERE ip = ? 342 AND dt > date('now', '-30 days')", 343 $hash 344 ); 345 if ($result) return $hash; // already known and up-to-date 346 347 348 // resolve the IP address to location data 349 try { 350 $data = $this->hlp->resolveIP($ip); 351 } catch (IpResolverException $e) { 352 \dokuwiki\Logger::error('Statistics Plugin: ' . $e->getMessage(), $e->details); 353 $data = []; 354 } 355 356 $this->db->exec( 357 'INSERT OR REPLACE INTO iplocation ( 358 ip, country, code, city, host, dt 359 ) VALUES ( 360 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 361 )', 362 $hash, 363 $data['country'] ?? '', 364 $data['countryCode'] ?? '', 365 $data['city'] ?? '', 366 $host 367 ); 368 369 return $hash; 370 } 371 372 // endregion 373 // region log dispatchers 374 375 public function logPageView(): void 376 { 377 global $INPUT; 378 379 if (!$INPUT->str('p')) return; 380 381 382 $referer = $INPUT->filter('trim')->str('r'); 383 $ip = $this->logIp(); // resolve the IP address 384 385 $data = [ 386 'page' => $INPUT->filter('cleanID')->str('p'), 387 'ip' => $ip, 388 'ref_id' => $this->logReferer($referer), 389 'sx' => $INPUT->int('sx'), 390 'sy' => $INPUT->int('sy'), 391 'vx' => $INPUT->int('vx'), 392 'vy' => $INPUT->int('vy'), 393 'session' => $this->session, 394 ]; 395 396 $this->db->exec( 397 ' 398 INSERT INTO pageviews ( 399 dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session 400 ) VALUES ( 401 CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session 402 ) 403 ', 404 $data 405 ); 406 } 407 408 /** 409 * Log a click on an external link 410 * 411 * Called from dispatch.php 412 */ 413 public function logOutgoing(): void 414 { 415 global $INPUT; 416 417 if (!$INPUT->str('ol')) return; 418 419 $link = $INPUT->filter('trim')->str('ol'); 420 $session = $this->session; 421 $page = $INPUT->filter('cleanID')->str('p'); 422 423 $this->db->exec( 424 'INSERT INTO outlinks ( 425 dt, session, page, link 426 ) VALUES ( 427 CURRENT_TIMESTAMP, ?, ?, ? 428 )', 429 $session, 430 $page, 431 $link 432 ); 433 } 434 435 /** 436 * Log access to a media file 437 * 438 * Called from action.php 439 * 440 * @param string $media The media ID 441 * @param string $mime The media's mime type 442 * @param bool $inline Is this displayed inline? 443 * @param int $size Size of the media file 444 */ 445 public function logMedia(string $media, string $mime, bool $inline, int $size): void 446 { 447 [$mime1, $mime2] = explode('/', strtolower($mime)); 448 $inline = $inline ? 1 : 0; 449 450 451 $data = [ 452 'media' => cleanID($media), 453 'ip' => $this->logIp(), // resolve the IP address 454 'session' => $this->session, 455 'size' => $size, 456 'mime1' => $mime1, 457 'mime2' => $mime2, 458 'inline' => $inline, 459 ]; 460 461 $this->db->exec( 462 ' 463 INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline ) 464 VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline) 465 ', 466 $data 467 ); 468 } 469 470 /** 471 * Log page edits 472 * 473 * called from action.php 474 * 475 * @param string $page The page that was edited 476 * @param string $type The type of edit (create, edit, etc.) 477 */ 478 public function logEdit(string $page, string $type): void 479 { 480 $data = [ 481 'page' => cleanID($page), 482 'type' => $type, 483 'ip' => $this->logIp(), // resolve the IP address 484 'session' => $this->session 485 ]; 486 487 $this->db->exec( 488 'INSERT INTO edits ( 489 dt, page, type, ip, session 490 ) VALUES ( 491 CURRENT_TIMESTAMP, :page, :type, :ip, :session 492 )', 493 $data 494 ); 495 } 496 497 /** 498 * Log login/logoffs and user creations 499 * 500 * @param string $type The type of login event (login, logout, create, failed) 501 * @param string $user The username 502 */ 503 public function logLogin(string $type, string $user = ''): void 504 { 505 global $INPUT; 506 507 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 508 509 $ip = clientIP(true); 510 511 $this->db->exec( 512 'INSERT INTO logins ( 513 dt, ip, user, type 514 ) VALUES ( 515 CURRENT_TIMESTAMP, ?, ?, ? 516 )', 517 $ip, 518 $user, 519 $type 520 ); 521 } 522 523 /** 524 * Log search data to the search related tables 525 * 526 * @param string $query The search query 527 * @param string[] $words The query split into words 528 */ 529 public function logSearch(string $query, array $words): void 530 { 531 if (!$query) return; 532 533 $sid = $this->db->exec( 534 'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)', 535 $this->logIp(), // resolve the IP address 536 $this->session, 537 $query, 538 ); 539 540 foreach ($words as $word) { 541 if (!$word) continue; 542 $this->db->exec( 543 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 544 $sid, 545 $word 546 ); 547 } 548 } 549 550 /** 551 * Log the current page count and size as today's history entry 552 */ 553 public function logHistoryPages(): void 554 { 555 global $conf; 556 557 // use the popularity plugin's search method to find the wanted data 558 /** @var helper_plugin_popularity $pop */ 559 $pop = plugin_load('helper', 'popularity'); 560 $list = $this->initEmptySearchList(); 561 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 562 $page_count = $list['file_count']; 563 $page_size = $list['file_size']; 564 565 $this->db->exec( 566 'INSERT OR REPLACE INTO history ( 567 info, value, dt 568 ) VALUES ( 569 ?, ?, CURRENT_TIMESTAMP 570 )', 571 'page_count', 572 $page_count 573 ); 574 $this->db->exec( 575 'INSERT OR REPLACE INTO history ( 576 info, value, dt 577 ) VALUES ( 578 ?, ?, CURRENT_TIMESTAMP 579 )', 580 'page_size', 581 $page_size 582 ); 583 } 584 585 /** 586 * Log the current media count and size as today's history entry 587 */ 588 public function logHistoryMedia(): void 589 { 590 global $conf; 591 592 // use the popularity plugin's search method to find the wanted data 593 /** @var helper_plugin_popularity $pop */ 594 $pop = plugin_load('helper', 'popularity'); 595 $list = $this->initEmptySearchList(); 596 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 597 $media_count = $list['file_count']; 598 $media_size = $list['file_size']; 599 600 $this->db->exec( 601 'INSERT OR REPLACE INTO history ( 602 info, value, dt 603 ) VALUES ( 604 ?, ?, CURRENT_TIMESTAMP 605 )', 606 'media_count', 607 $media_count 608 ); 609 $this->db->exec( 610 'INSERT OR REPLACE INTO history ( 611 info, value, dt 612 ) VALUES ( 613 ?, ?, CURRENT_TIMESTAMP 614 )', 615 'media_size', 616 $media_size 617 ); 618 } 619 620 // endregion 621 622 /** 623 * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public 624 * @return array 625 */ 626 protected function initEmptySearchList() 627 { 628 return array_fill_keys([ 629 'file_count', 630 'file_size', 631 'file_max', 632 'file_min', 633 'dir_count', 634 'dir_nest', 635 'file_oldest' 636 ], 0); 637 } 638} 639