1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5use DeviceDetector\DeviceDetector; 6use DeviceDetector\Parser\Client\Browser; 7use DeviceDetector\Parser\Device\AbstractDeviceParser; 8use DeviceDetector\Parser\OperatingSystem; 9use dokuwiki\Input\Input; 10use dokuwiki\plugin\sqlite\SQLiteDB; 11use helper_plugin_popularity; 12use helper_plugin_statistics; 13 14class Logger 15{ 16 /** @var helper_plugin_statistics The statistics helper plugin instance */ 17 protected helper_plugin_statistics $hlp; 18 19 /** @var SQLiteDB The SQLite database instance */ 20 protected SQLiteDB $db; 21 22 /** @var string The full user agent string */ 23 protected string $uaAgent; 24 25 /** @var string The type of user agent (browser, robot, feedreader) */ 26 protected string $uaType = 'browser'; 27 28 /** @var string The browser/client name */ 29 protected string $uaName; 30 31 /** @var string The browser/client version */ 32 protected string $uaVersion; 33 34 /** @var string The operating system/platform */ 35 protected string $uaPlatform; 36 37 /** @var string|null The user name, if available */ 38 protected ?string $user = null; 39 40 /** @var string The unique user identifier */ 41 protected string $uid; 42 43 /** @var string The session identifier */ 44 protected string $session; 45 46 /** @var int|null The ID of the main access log entry if any */ 47 protected ?int $hit = null; 48 49 // region lifecycle 50 51 /** 52 * Constructor 53 * 54 * Parses browser info and set internal vars 55 */ 56 public function __construct(helper_plugin_statistics $hlp) 57 { 58 /** @var Input $INPUT */ 59 global $INPUT; 60 61 $this->hlp = $hlp; 62 $this->db = $this->hlp->getDB(); 63 64 // FIXME if we already have a session, we should not re-parse the user agent 65 66 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 67 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 68 $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers 69 $dd->discardBotInformation(); 70 $dd->parse(); 71 72 if ($dd->isFeedReader()) { 73 $this->uaType = 'feedreader'; 74 } elseif ($dd->isBot()) { 75 $this->uaType = 'robot'; 76 // for now ignore bots 77 throw new IgnoreException('Bot detected, not logging'); 78 } 79 80 $this->uaAgent = $ua; 81 $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown'; 82 $this->uaVersion = $dd->getClient('version') ?: '0'; 83 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown'; 84 $this->uid = $this->getUID(); 85 $this->session = $this->getSession(); 86 87 if (!$this->hlp->getConf('nousers')) { 88 $this->user = $INPUT->server->str('REMOTE_USER', null, true); 89 } 90 } 91 92 /** 93 * Should be called before logging 94 * 95 * This starts a transaction, so all logging is done in one go. It also logs the user and session data. 96 */ 97 public function begin(): void 98 { 99 $this->hlp->getDB()->getPdo()->beginTransaction(); 100 101 $this->logUser(); 102 $this->logGroups(); 103 $this->logDomain(); 104 $this->logSession(); 105 } 106 107 /** 108 * Should be called after logging 109 * 110 * This commits the transaction started in begin() 111 */ 112 public function end(): void 113 { 114 $this->hlp->getDB()->getPdo()->commit(); 115 } 116 117 // endregion 118 // region data gathering 119 120 /** 121 * Get the unique user ID 122 * 123 * The user ID is stored in the user preferences and should stay there forever. 124 * @return string The unique user identifier 125 */ 126 protected function getUID(): string 127 { 128 if (!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) { 129 // when there is no session UID set, we assume this was deliberate and we simply abort all logging 130 // @todo we may later make UID generation optional 131 throw new IgnoreException('No user ID found'); 132 } 133 134 return $_SESSION[DOKU_COOKIE]['statistics']['uid']; 135 } 136 137 /** 138 * Return the user's session ID 139 * 140 * @return string The session identifier 141 */ 142 protected function getSession(): string 143 { 144 if (!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) { 145 // when there is no session ID set, we assume this was deliberate and we simply abort all logging 146 throw new IgnoreException('No session ID found'); 147 } 148 149 return $_SESSION[DOKU_COOKIE]['statistics']['id']; 150 } 151 152 // endregion 153 // region automatic logging 154 155 /** 156 * Log the user was seen 157 */ 158 protected function logUser(): void 159 { 160 if (!$this->user) return; 161 162 $this->db->exec( 163 'INSERT INTO users (user, dt) 164 VALUES (?, CURRENT_TIMESTAMP) 165 ON CONFLICT (user) DO UPDATE SET 166 dt = CURRENT_TIMESTAMP 167 WHERE excluded.user = users.user 168 ', 169 $this->user 170 ); 171 } 172 173 /** 174 * Log the session and user agent information 175 */ 176 protected function logSession(): void 177 { 178 $this->db->exec( 179 'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os) 180 VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?) 181 ON CONFLICT (session) DO UPDATE SET 182 end = CURRENT_TIMESTAMP, 183 user = excluded.user, 184 uid = excluded.uid 185 WHERE excluded.session = sessions.session 186 ', 187 $this->session, 188 $this->uid, 189 $this->user, 190 $this->uaAgent, 191 $this->uaName, 192 $this->uaType, 193 $this->uaVersion, 194 $this->uaPlatform 195 ); 196 } 197 198 /** 199 * Log all groups for the user 200 * 201 * @todo maybe this should be done only once per session? 202 */ 203 protected function logGroups(): void 204 { 205 global $USERINFO; 206 207 if (!$this->user) return; 208 if (!isset($USERINFO['grps'])) return; 209 if (!is_array($USERINFO['grps'])) return; 210 $groups = $USERINFO['grps']; 211 212 $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user); 213 214 if ($groups === []) { 215 return; 216 } 217 218 $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)')); 219 $params = []; 220 $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders"; 221 foreach ($groups as $group) { 222 $params[] = $this->user; 223 $params[] = $group; 224 } 225 $this->db->exec($sql, $params); 226 } 227 228 /** 229 * Log email domain 230 * 231 * @todo maybe this should be done only once per session? 232 */ 233 protected function logDomain(): void 234 { 235 global $USERINFO; 236 if (!$this->user) return; 237 if (!isset($USERINFO['mail'])) return; 238 $mail = $USERINFO['mail']; 239 240 $pos = strrpos($mail, '@'); 241 if (!$pos) return; 242 $domain = substr($mail, $pos + 1); 243 if (empty($domain)) return; 244 245 $sql = 'UPDATE users SET domain = ? WHERE user = ?'; 246 $this->db->exec($sql, [$domain, $this->user]); 247 } 248 249 // endregion 250 // region internal loggers called by the dispatchers 251 252 /** 253 * Log the given referer URL 254 * 255 * Note: we DO log empty referers. These are external accesses that did not provide a referer URL. 256 * We do not log referers that are our own pages though. 257 * 258 * engine set -> a search engine referer 259 * no engine set, url empty -> a direct access (bookmark, direct link, etc.) 260 * no engine set, url not empty -> a referer from another page (not a wiki page) 261 * null returned -> referer was a wiki page 262 * 263 * @param $referer 264 * @return int|null The referer ID or null if no referer was logged 265 * @todo we could check against a blacklist here 266 */ 267 public function logReferer($referer): ?int 268 { 269 $referer = trim($referer); 270 271 // do not log our own pages as referers (empty referer is OK though) 272 if (!empty($referer)) { 273 $selfre = '^' . preg_quote(DOKU_URL, '/'); 274 if (preg_match("/$selfre/", $referer)) { 275 return null; 276 } 277 } 278 279 // is it a search engine? 280 $se = new SearchEngines($referer); 281 $engine = $se->getEngine(); 282 283 $sql = 'INSERT OR IGNORE INTO referers (url, engine, dt) VALUES (?, ?, CURRENT_TIMESTAMP)'; 284 $this->db->exec($sql, [$referer, $engine]); 285 return (int)$this->db->queryValue('SELECT id FROM referers WHERE url = ?', $referer); 286 } 287 288 /** 289 * Resolve IP to country/city and store in database 290 * 291 * @return string The IP address as stored 292 */ 293 public function logIp(): string 294 { 295 $ip = clientIP(true); 296 297 // anonymize the IP address for storage? 298 if ($this->hlp->getConf('anonips')) { 299 $hash = md5($ip . strrev($ip)); // we use the reversed IP as salt to avoid common rainbow tables 300 $host = ''; 301 } else { 302 $hash = $ip; 303 $host = gethostbyaddr($ip); 304 } 305 306 if ($this->hlp->getConf('nolocation')) { 307 // if we don't resolve location data, we just return the IP address 308 return $hash; 309 } 310 311 // check if IP already known and up-to-date 312 $result = $this->db->queryValue( 313 "SELECT ip 314 FROM iplocation 315 WHERE ip = ? 316 AND dt > date('now', '-30 days')", 317 $hash 318 ); 319 if ($result) return $hash; // already known and up-to-date 320 321 322 // resolve the IP address to location data 323 try { 324 $data = $this->hlp->resolveIP($ip); 325 } catch (IpResolverException $e) { 326 \dokuwiki\Logger::error('Statistics Plugin: ' . $e->getMessage(), $e->details); 327 $data = []; 328 } 329 330 $this->db->exec( 331 'INSERT OR REPLACE INTO iplocation ( 332 ip, country, code, city, host, dt 333 ) VALUES ( 334 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 335 )', 336 $hash, 337 $data['country'] ?? '', 338 $data['countryCode'] ?? '', 339 $data['city'] ?? '', 340 $host 341 ); 342 343 return $hash; 344 } 345 346 // endregion 347 // region log dispatchers 348 349 public function logPageView(): void 350 { 351 global $INPUT; 352 353 if (!$INPUT->str('p')) return; 354 355 356 $referer = $INPUT->filter('trim')->str('r'); 357 $ip = $this->logIp(); // resolve the IP address 358 359 $data = [ 360 'page' => $INPUT->filter('cleanID')->str('p'), 361 'ip' => $ip, 362 'ref_id' => $this->logReferer($referer), 363 'sx' => $INPUT->int('sx'), 364 'sy' => $INPUT->int('sy'), 365 'vx' => $INPUT->int('vx'), 366 'vy' => $INPUT->int('vy'), 367 'session' => $this->session, 368 ]; 369 370 $this->db->exec( 371 ' 372 INSERT INTO pageviews ( 373 dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session 374 ) VALUES ( 375 CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session 376 ) 377 ', 378 $data 379 ); 380 } 381 382 /** 383 * Log a click on an external link 384 * 385 * Called from dispatch.php 386 */ 387 public function logOutgoing(): void 388 { 389 global $INPUT; 390 391 if (!$INPUT->str('ol')) return; 392 393 $link = $INPUT->filter('trim')->str('ol'); 394 $session = $this->session; 395 $page = $INPUT->filter('cleanID')->str('p'); 396 397 $this->db->exec( 398 'INSERT INTO outlinks ( 399 dt, session, page, link 400 ) VALUES ( 401 CURRENT_TIMESTAMP, ?, ?, ? 402 )', 403 $session, 404 $page, 405 $link 406 ); 407 } 408 409 /** 410 * Log access to a media file 411 * 412 * Called from action.php 413 * 414 * @param string $media The media ID 415 * @param string $mime The media's mime type 416 * @param bool $inline Is this displayed inline? 417 * @param int $size Size of the media file 418 */ 419 public function logMedia(string $media, string $mime, bool $inline, int $size): void 420 { 421 [$mime1, $mime2] = explode('/', strtolower($mime)); 422 $inline = $inline ? 1 : 0; 423 424 425 $data = [ 426 'media' => cleanID($media), 427 'ip' => $this->logIp(), // resolve the IP address 428 'session' => $this->session, 429 'size' => $size, 430 'mime1' => $mime1, 431 'mime2' => $mime2, 432 'inline' => $inline, 433 ]; 434 435 $this->db->exec( 436 ' 437 INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline ) 438 VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline) 439 ', 440 $data 441 ); 442 } 443 444 /** 445 * Log page edits 446 * 447 * called from action.php 448 * 449 * @param string $page The page that was edited 450 * @param string $type The type of edit (create, edit, etc.) 451 */ 452 public function logEdit(string $page, string $type): void 453 { 454 $data = [ 455 'page' => cleanID($page), 456 'type' => $type, 457 'ip' => $this->logIp(), // resolve the IP address 458 'session' => $this->session 459 ]; 460 461 $this->db->exec( 462 'INSERT INTO edits ( 463 dt, page, type, ip, session 464 ) VALUES ( 465 CURRENT_TIMESTAMP, :page, :type, :ip, :session 466 )', 467 $data 468 ); 469 } 470 471 /** 472 * Log login/logoffs and user creations 473 * 474 * @param string $type The type of login event (login, logout, create, failed) 475 * @param string $user The username 476 */ 477 public function logLogin(string $type, string $user = ''): void 478 { 479 global $INPUT; 480 481 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 482 483 $ip = clientIP(true); 484 485 $this->db->exec( 486 'INSERT INTO logins ( 487 dt, ip, user, type 488 ) VALUES ( 489 CURRENT_TIMESTAMP, ?, ?, ? 490 )', 491 $ip, 492 $user, 493 $type 494 ); 495 } 496 497 /** 498 * Log search data to the search related tables 499 * 500 * @param string $query The search query 501 * @param string[] $words The query split into words 502 */ 503 public function logSearch(string $query, array $words): void 504 { 505 if (!$query) return; 506 507 $sid = $this->db->exec( 508 'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)', 509 $this->logIp(), // resolve the IP address 510 $this->session, 511 $query, 512 ); 513 514 foreach ($words as $word) { 515 if (!$word) continue; 516 $this->db->exec( 517 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 518 $sid, 519 $word 520 ); 521 } 522 } 523 524 /** 525 * Log the current page count and size as today's history entry 526 */ 527 public function logHistoryPages(): void 528 { 529 global $conf; 530 531 // use the popularity plugin's search method to find the wanted data 532 /** @var helper_plugin_popularity $pop */ 533 $pop = plugin_load('helper', 'popularity'); 534 $list = $this->initEmptySearchList(); 535 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 536 $page_count = $list['file_count']; 537 $page_size = $list['file_size']; 538 539 $this->db->exec( 540 'INSERT OR REPLACE INTO history ( 541 info, value, dt 542 ) VALUES ( 543 ?, ?, CURRENT_TIMESTAMP 544 )', 545 'page_count', 546 $page_count 547 ); 548 $this->db->exec( 549 'INSERT OR REPLACE INTO history ( 550 info, value, dt 551 ) VALUES ( 552 ?, ?, CURRENT_TIMESTAMP 553 )', 554 'page_size', 555 $page_size 556 ); 557 } 558 559 /** 560 * Log the current media count and size as today's history entry 561 */ 562 public function logHistoryMedia(): void 563 { 564 global $conf; 565 566 // use the popularity plugin's search method to find the wanted data 567 /** @var helper_plugin_popularity $pop */ 568 $pop = plugin_load('helper', 'popularity'); 569 $list = $this->initEmptySearchList(); 570 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 571 $media_count = $list['file_count']; 572 $media_size = $list['file_size']; 573 574 $this->db->exec( 575 'INSERT OR REPLACE INTO history ( 576 info, value, dt 577 ) VALUES ( 578 ?, ?, CURRENT_TIMESTAMP 579 )', 580 'media_count', 581 $media_count 582 ); 583 $this->db->exec( 584 'INSERT OR REPLACE INTO history ( 585 info, value, dt 586 ) VALUES ( 587 ?, ?, CURRENT_TIMESTAMP 588 )', 589 'media_size', 590 $media_size 591 ); 592 } 593 594 // endregion 595 596 /** 597 * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public 598 * @return array 599 */ 600 protected function initEmptySearchList() 601 { 602 return array_fill_keys([ 603 'file_count', 604 'file_size', 605 'file_max', 606 'file_min', 607 'dir_count', 608 'dir_nest', 609 'file_oldest' 610 ], 0); 611 } 612} 613