1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5use DeviceDetector\DeviceDetector; 6use DeviceDetector\Parser\Client\Browser; 7use DeviceDetector\Parser\Device\AbstractDeviceParser; 8use DeviceDetector\Parser\OperatingSystem; 9use dokuwiki\ErrorHandler; 10use dokuwiki\HTTP\DokuHTTPClient; 11use dokuwiki\plugin\sqlite\SQLiteDB; 12use dokuwiki\Utf8\Clean; 13use helper_plugin_popularity; 14use helper_plugin_statistics; 15 16class Logger 17{ 18 /** @var helper_plugin_statistics The statistics helper plugin instance */ 19 protected helper_plugin_statistics $hlp; 20 21 /** @var SQLiteDB The SQLite database instance */ 22 protected SQLiteDB $db; 23 24 /** @var string The full user agent string */ 25 protected string $uaAgent; 26 27 /** @var string The type of user agent (browser, robot, feedreader) */ 28 protected string $uaType = 'browser'; 29 30 /** @var string The browser/client name */ 31 protected string $uaName; 32 33 /** @var string The browser/client version */ 34 protected string $uaVersion; 35 36 /** @var string The operating system/platform */ 37 protected string $uaPlatform; 38 39 /** @var string|null The user name, if available */ 40 protected ?string $user = null; 41 42 /** @var string The unique user identifier */ 43 protected string $uid; 44 45 /** @var string The session identifier */ 46 protected string $session; 47 48 /** @var int|null The ID of the main access log entry if any */ 49 protected ?int $hit = null; 50 51 /** @var DokuHTTPClient|null The HTTP client instance for testing */ 52 protected ?DokuHTTPClient $httpClient = null; 53 54 // region lifecycle 55 56 /** 57 * Constructor 58 * 59 * Parses browser info and set internal vars 60 */ 61 public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null) 62 { 63 global $INPUT; 64 65 $this->hlp = $hlp; 66 $this->db = $this->hlp->getDB(); 67 $this->httpClient = $httpClient; 68 69 // FIXME if we already have a session, we should not re-parse the user agent 70 71 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 72 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 73 $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers 74 $dd->discardBotInformation(); 75 $dd->parse(); 76 77 if ($dd->isFeedReader()) { 78 $this->uaType = 'feedreader'; 79 } elseif ($dd->isBot()) { 80 $this->uaType = 'robot'; 81 // for now ignore bots 82 throw new IgnoreException('Bot detected, not logging'); 83 } 84 85 $this->uaAgent = $ua; 86 $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown'; 87 $this->uaVersion = $dd->getClient('version') ?: '0'; 88 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown'; 89 $this->uid = $this->getUID(); 90 $this->session = $this->getSession(); 91 $this->user = $INPUT->server->str('REMOTE_USER') ?: null; 92 } 93 94 /** 95 * Should be called before logging 96 * 97 * This starts a transaction, so all logging is done in one go. It also logs the user and session data. 98 */ 99 public function begin(): void 100 { 101 $this->hlp->getDB()->getPdo()->beginTransaction(); 102 103 $this->logUser(); 104 $this->logGroups(); 105 $this->logDomain(); 106 $this->logSession(); 107 } 108 109 /** 110 * Should be called after logging 111 * 112 * This commits the transaction started in begin() 113 */ 114 public function end(): void 115 { 116 $this->hlp->getDB()->getPdo()->commit(); 117 } 118 119 // endregion 120 // region data gathering 121 122 /** 123 * Get the unique user ID 124 * 125 * @return string The unique user identifier 126 */ 127 protected function getUID(): string 128 { 129 global $INPUT; 130 131 $uid = $INPUT->str('uid'); 132 if (!$uid) $uid = get_doku_pref('plgstats', false); 133 if (!$uid) $uid = session_id(); 134 set_doku_pref('plgstats', $uid); 135 return $uid; 136 } 137 138 /** 139 * Return the user's session ID 140 * 141 * This is usually our own managed session, not a PHP session (only in fallback) 142 * 143 * @return string The session identifier 144 */ 145 protected function getSession(): string 146 { 147 global $INPUT; 148 149 150 151 152 // FIXME session setting needs work. It should be reset on user change, maybe we do rely on the PHP session? 153 // We also want to store the user agent in the session table, so this needs also change the session ID 154 $ses = $INPUT->str('ses'); 155 if (!$ses) $ses = get_doku_pref('plgstatsses', false); 156 if (!$ses) $ses = session_id(); 157 set_doku_pref('plgstatsses', $ses); 158 return $ses; 159 } 160 161 // endregion 162 // region automatic logging 163 164 /** 165 * Log the user was seen 166 */ 167 protected function logUser(): void 168 { 169 if (!$this->user) return; 170 171 $this->db->exec( 172 'INSERT INTO users (user, dt) 173 VALUES (?, CURRENT_TIMESTAMP) 174 ON CONFLICT (user) DO UPDATE SET 175 dt = CURRENT_TIMESTAMP 176 WHERE excluded.user = users.user 177 ', 178 $this->user 179 ); 180 181 } 182 183 /** 184 * Log the session and user agent information 185 */ 186 protected function logSession(): void 187 { 188 $this->db->exec( 189 'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os) 190 VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?) 191 ON CONFLICT (session) DO UPDATE SET 192 end = CURRENT_TIMESTAMP 193 WHERE excluded.session = sessions.session 194 ', 195 $this->session, 196 $this->uid, 197 $this->user, 198 $this->uaAgent, 199 $this->uaName, 200 $this->uaType, 201 $this->uaVersion, 202 $this->uaPlatform 203 ); 204 } 205 206 /** 207 * Log all groups for the user 208 * 209 * @todo maybe this should be done only once per session? 210 */ 211 protected function logGroups(): void 212 { 213 global $USERINFO; 214 215 if (!$this->user) return; 216 if (!isset($USERINFO['grps'])) return; 217 if (!is_array($USERINFO['grps'])) return; 218 $groups = $USERINFO['grps']; 219 220 $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user); 221 222 $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)')); 223 $params = []; 224 $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders"; 225 foreach ($groups as $group) { 226 $params[] = $this->user; 227 $params[] = $group; 228 } 229 $this->db->exec($sql, $params); 230 } 231 232 /** 233 * Log email domain 234 * 235 * @todo maybe this should be done only once per session? 236 */ 237 protected function logDomain(): void 238 { 239 global $USERINFO; 240 if (!$this->user) return; 241 if (!isset($USERINFO['mail'])) return; 242 $mail = $USERINFO['mail']; 243 244 $pos = strrpos($mail, '@'); 245 if (!$pos) return; 246 $domain = substr($mail, $pos + 1); 247 if (empty($domain)) return; 248 249 $sql = 'UPDATE users SET domain = ? WHERE user = ?'; 250 $this->db->exec($sql, [$domain, $this->user]); 251 } 252 253 // endregion 254 // region internal loggers called by the dispatchers 255 256 /** 257 * Log the given referer URL 258 * 259 * @param $referer 260 * @return int|null The referer ID or null if no referer was given 261 */ 262 public function logReferer($referer): ?int 263 { 264 if (!$referer) return null; 265 266 // FIXME we could check against a blacklist here 267 268 $se = new SearchEngines($referer); 269 $type = $se->isSearchEngine() ? 'search' : 'external'; 270 271 $sql = 'INSERT OR IGNORE INTO referers (url, type, dt) VALUES (?, ?, CURRENT_TIMESTAMP)'; 272 return $this->db->exec($sql, [$referer, $type]); // returns ID even if the insert was ignored 273 } 274 275 /** 276 * Resolve IP to country/city and store in database 277 * 278 * @return string The IP address as stored 279 */ 280 public function logIp(): string 281 { 282 $ip = clientIP(true); 283 $hash = $ip; // @todo we could anonymize here 284 285 // check if IP already known and up-to-date 286 $result = $this->db->queryValue( 287 "SELECT ip 288 FROM iplocation 289 WHERE ip = ? 290 AND lastupd > date('now', '-30 days')", 291 $hash 292 ); 293 if ($result) return $hash; // already known and up-to-date 294 295 $http = $this->httpClient ?: new DokuHTTPClient(); 296 $http->timeout = 7; 297 $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only 298 299 if (!$json) { 300 \dokuwiki\Logger::error('Statistics Plugin - Failed talk to ip-api.com.'); 301 return $hash; 302 } 303 try { 304 $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR); 305 } catch (\JsonException $e) { 306 \dokuwiki\Logger::error('Statistics Plugin - Failed to decode JSON from ip-api.com.', $e); 307 return $hash; 308 } 309 if (!isset($data['status'])) { 310 \dokuwiki\Logger::error('Statistics Plugin - Invalid ip-api.com result' . $ip, $data); 311 return $hash; 312 }; 313 314 // we do not check for 'success' status here. when the API can't resolve the IP we still log it 315 // without location data, so we won't re-query it in the next 30 days. 316 317 $host = gethostbyaddr($ip); // @todo if we anonymize the IP, we should not do this 318 $this->db->exec( 319 'INSERT OR REPLACE INTO iplocation ( 320 ip, country, code, city, host, lastupd 321 ) VALUES ( 322 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 323 )', 324 $hash, 325 $data['country'] ?? '', 326 $data['countryCode'] ?? '', 327 $data['city'] ?? '', 328 $host 329 ); 330 331 return $hash; 332 } 333 334 // endregion 335 // region log dispatchers 336 337 public function logPageView(): void 338 { 339 global $INPUT; 340 341 if (!$INPUT->str('p')) return; 342 343 344 $referer = $INPUT->filter('trim')->str('r'); 345 $ip = $this->logIp(); // resolve the IP address 346 347 $data = [ 348 'page' => $INPUT->filter('cleanID')->str('p'), 349 'ip' => $ip, 350 'ref_id' => $this->logReferer($referer), 351 'sx' => $INPUT->int('sx'), 352 'sy' => $INPUT->int('sy'), 353 'vx' => $INPUT->int('vx'), 354 'vy' => $INPUT->int('vy'), 355 'session' => $this->session, 356 ]; 357 358 $this->db->exec(' 359 INSERT INTO pageviews ( 360 dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session 361 ) VALUES ( 362 CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session 363 ) 364 ', 365 $data 366 ); 367 } 368 369 /** 370 * Log a click on an external link 371 * 372 * Called from log.php 373 */ 374 public function logOutgoing(): void 375 { 376 global $INPUT; 377 378 if (!$INPUT->str('ol')) return; 379 380 $link = $INPUT->filter('trim')->str('ol'); 381 $session = $this->session; 382 $page = $INPUT->filter('cleanID')->str('p'); 383 384 $this->db->exec( 385 'INSERT INTO outlinks ( 386 dt, session, page, link 387 ) VALUES ( 388 CURRENT_TIMESTAMP, ?, ?, ?, ? 389 )', 390 $session, 391 $page, 392 $link 393 ); 394 } 395 396 /** 397 * Log access to a media file 398 * 399 * Called from action.php 400 * 401 * @param string $media The media ID 402 * @param string $mime The media's mime type 403 * @param bool $inline Is this displayed inline? 404 * @param int $size Size of the media file 405 */ 406 public function logMedia(string $media, string $mime, bool $inline, int $size): void 407 { 408 [$mime1, $mime2] = explode('/', strtolower($mime)); 409 $inline = $inline ? 1 : 0; 410 411 412 $data = [ 413 'media' => cleanID($media), 414 'ip' => $this->logIp(), // resolve the IP address 415 'session' => $this->session, 416 'size' => $size, 417 'mime1' => $mime1, 418 'mime2' => $mime2, 419 'inline' => $inline, 420 ]; 421 422 $this->db->exec(' 423 INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline ) 424 VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline) 425 ', 426 $data 427 ); 428 } 429 430 /** 431 * Log page edits 432 * 433 * called from action.php 434 * 435 * @param string $page The page that was edited 436 * @param string $type The type of edit (create, edit, etc.) 437 */ 438 public function logEdit(string $page, string $type): void 439 { 440 $data = [ 441 'page' => cleanID($page), 442 'type' => $type, 443 'ip' => $this->logIp(), // resolve the IP address 444 'session' => $this->session 445 ]; 446 447 $editId = $this->db->exec( 448 'INSERT INTO edits ( 449 dt, page, type, ip, session 450 ) VALUES ( 451 CURRENT_TIMESTAMP, :page, :type, :ip, :session 452 )', 453 $data 454 ); 455 } 456 457 /** 458 * Log login/logoffs and user creations 459 * 460 * @param string $type The type of login event (login, logout, create) 461 * @param string $user The username (optional, will use current user if empty) 462 * @fixme this is still broken, I need to figure out the session handling first 463 */ 464 public function logLogin(string $type, string $user = ''): void 465 { 466 global $INPUT; 467 468 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 469 470 $ip = clientIP(true); 471 $session = $this->session; 472 473 $this->db->exec( 474 'INSERT INTO logins ( 475 dt, type, ip, session 476 ) VALUES ( 477 CURRENT_TIMESTAMP, ?, ?, ?, ?, ? 478 )', 479 $type, 480 $ip, 481 $user, 482 $session, 483 $this->uid 484 ); 485 } 486 487 /** 488 * Log search data to the search related tables 489 * 490 * @param string $query The search query 491 * @param string[] $words The query split into words 492 */ 493 public function logSearch(string $query, array $words): void 494 { 495 if(!$query) return; 496 497 $sid = $this->db->exec( 498 'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)', 499 $this->logIp(), // resolve the IP address 500 $this->session, 501 $query, 502 ); 503 504 foreach ($words as $word) { 505 if (!$word) continue; 506 $this->db->exec( 507 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 508 $sid, 509 $word 510 ); 511 } 512 } 513 514 /** 515 * Log the current page count and size as today's history entry 516 */ 517 public function logHistoryPages(): void 518 { 519 global $conf; 520 521 // use the popularity plugin's search method to find the wanted data 522 /** @var helper_plugin_popularity $pop */ 523 $pop = plugin_load('helper', 'popularity'); 524 $list = $this->initEmptySearchList(); 525 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 526 $page_count = $list['file_count']; 527 $page_size = $list['file_size']; 528 529 $this->db->exec( 530 'INSERT OR REPLACE INTO history ( 531 info, value, dt 532 ) VALUES ( 533 ?, ?, CURRENT_TIMESTAMP 534 )', 535 'page_count', 536 $page_count 537 ); 538 $this->db->exec( 539 'INSERT OR REPLACE INTO history ( 540 info, value, dt 541 ) VALUES ( 542 ?, ?, CURRENT_TIMESTAMP 543 )', 544 'page_size', 545 $page_size 546 ); 547 } 548 549 /** 550 * Log the current media count and size as today's history entry 551 */ 552 public function logHistoryMedia(): void 553 { 554 global $conf; 555 556 // use the popularity plugin's search method to find the wanted data 557 /** @var helper_plugin_popularity $pop */ 558 $pop = plugin_load('helper', 'popularity'); 559 $list = $this->initEmptySearchList(); 560 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 561 $media_count = $list['file_count']; 562 $media_size = $list['file_size']; 563 564 $this->db->exec( 565 'INSERT OR REPLACE INTO history ( 566 info, value, dt 567 ) VALUES ( 568 ?, ?, CURRENT_TIMESTAMP 569 )', 570 'media_count', 571 $media_count 572 ); 573 $this->db->exec( 574 'INSERT OR REPLACE INTO history ( 575 info, value, dt 576 ) VALUES ( 577 ?, ?, CURRENT_TIMESTAMP 578 )', 579 'media_size', 580 $media_size 581 ); 582 } 583 584 // endregion 585 586 /** 587 * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public 588 * @return array 589 */ 590 protected function initEmptySearchList() 591 { 592 return array_fill_keys([ 593 'file_count', 594 'file_size', 595 'file_max', 596 'file_min', 597 'dir_count', 598 'dir_nest', 599 'file_oldest' 600 ], 0); 601 } 602} 603