1<?php 2 3namespace dokuwiki\plugin\statistics; 4 5use DeviceDetector\DeviceDetector; 6use DeviceDetector\Parser\Client\Browser; 7use DeviceDetector\Parser\Device\AbstractDeviceParser; 8use DeviceDetector\Parser\OperatingSystem; 9use dokuwiki\HTTP\DokuHTTPClient; 10use dokuwiki\plugin\sqlite\SQLiteDB; 11use helper_plugin_popularity; 12use helper_plugin_statistics; 13 14class Logger 15{ 16 /** @var helper_plugin_statistics The statistics helper plugin instance */ 17 protected helper_plugin_statistics $hlp; 18 19 /** @var SQLiteDB The SQLite database instance */ 20 protected SQLiteDB $db; 21 22 /** @var string The full user agent string */ 23 protected string $uaAgent; 24 25 /** @var string The type of user agent (browser, robot, feedreader) */ 26 protected string $uaType = 'browser'; 27 28 /** @var string The browser/client name */ 29 protected string $uaName; 30 31 /** @var string The browser/client version */ 32 protected string $uaVersion; 33 34 /** @var string The operating system/platform */ 35 protected string $uaPlatform; 36 37 /** @var string|null The user name, if available */ 38 protected ?string $user = null; 39 40 /** @var string The unique user identifier */ 41 protected string $uid; 42 43 /** @var string The session identifier */ 44 protected string $session; 45 46 /** @var int|null The ID of the main access log entry if any */ 47 protected ?int $hit = null; 48 49 /** @var DokuHTTPClient|null The HTTP client instance for testing */ 50 protected ?DokuHTTPClient $httpClient = null; 51 52 // region lifecycle 53 54 /** 55 * Constructor 56 * 57 * Parses browser info and set internal vars 58 */ 59 public function __construct(helper_plugin_statistics $hlp, ?DokuHTTPClient $httpClient = null) 60 { 61 global $INPUT; 62 63 $this->hlp = $hlp; 64 $this->db = $this->hlp->getDB(); 65 $this->httpClient = $httpClient; 66 67 // FIXME if we already have a session, we should not re-parse the user agent 68 69 $ua = trim($INPUT->server->str('HTTP_USER_AGENT')); 70 AbstractDeviceParser::setVersionTruncation(AbstractDeviceParser::VERSION_TRUNCATION_MAJOR); 71 $dd = new DeviceDetector($ua); // FIXME we could use client hints, but need to add headers 72 $dd->discardBotInformation(); 73 $dd->parse(); 74 75 if ($dd->isFeedReader()) { 76 $this->uaType = 'feedreader'; 77 } elseif ($dd->isBot()) { 78 $this->uaType = 'robot'; 79 // for now ignore bots 80 throw new IgnoreException('Bot detected, not logging'); 81 } 82 83 $this->uaAgent = $ua; 84 $this->uaName = Browser::getBrowserFamily($dd->getClient('name')) ?: 'Unknown'; 85 $this->uaVersion = $dd->getClient('version') ?: '0'; 86 $this->uaPlatform = OperatingSystem::getOsFamily($dd->getOs('name')) ?: 'Unknown'; 87 $this->uid = $this->getUID(); 88 $this->session = $this->getSession(); 89 $this->user = $INPUT->server->str('REMOTE_USER') ?: null; 90 } 91 92 /** 93 * Should be called before logging 94 * 95 * This starts a transaction, so all logging is done in one go. It also logs the user and session data. 96 */ 97 public function begin(): void 98 { 99 $this->hlp->getDB()->getPdo()->beginTransaction(); 100 101 $this->logUser(); 102 $this->logGroups(); 103 $this->logDomain(); 104 $this->logSession(); 105 } 106 107 /** 108 * Should be called after logging 109 * 110 * This commits the transaction started in begin() 111 */ 112 public function end(): void 113 { 114 $this->hlp->getDB()->getPdo()->commit(); 115 } 116 117 // endregion 118 // region data gathering 119 120 /** 121 * Get the unique user ID 122 * 123 * The user ID is stored in the user preferences and should stay there forever. 124 * @return string The unique user identifier 125 */ 126 protected function getUID(): string 127 { 128 if(!isset($_SESSION[DOKU_COOKIE]['statistics']['uid'])) { 129 // when there is no session UID set, we assume this was deliberate and we simply abort all logging 130 // @todo we may later make UID generation optional 131 throw new IgnoreException('No user ID found'); 132 } 133 134 return $_SESSION[DOKU_COOKIE]['statistics']['uid']; 135 } 136 137 /** 138 * Return the user's session ID 139 * 140 * @return string The session identifier 141 */ 142 protected function getSession(): string 143 { 144 if(!isset($_SESSION[DOKU_COOKIE]['statistics']['id'])) { 145 // when there is no session ID set, we assume this was deliberate and we simply abort all logging 146 throw new IgnoreException('No session ID found'); 147 } 148 149 return $_SESSION[DOKU_COOKIE]['statistics']['id']; 150 } 151 152 // endregion 153 // region automatic logging 154 155 /** 156 * Log the user was seen 157 */ 158 protected function logUser(): void 159 { 160 if (!$this->user) return; 161 162 $this->db->exec( 163 'INSERT INTO users (user, dt) 164 VALUES (?, CURRENT_TIMESTAMP) 165 ON CONFLICT (user) DO UPDATE SET 166 dt = CURRENT_TIMESTAMP 167 WHERE excluded.user = users.user 168 ', 169 $this->user 170 ); 171 172 } 173 174 /** 175 * Log the session and user agent information 176 */ 177 protected function logSession(): void 178 { 179 $this->db->exec( 180 'INSERT INTO sessions (session, dt, end, uid, user, ua, ua_info, ua_type, ua_ver, os) 181 VALUES (?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP, ?, ?, ?, ?, ?, ?, ?) 182 ON CONFLICT (session) DO UPDATE SET 183 end = CURRENT_TIMESTAMP 184 WHERE excluded.session = sessions.session 185 ', 186 $this->session, 187 $this->uid, 188 $this->user, 189 $this->uaAgent, 190 $this->uaName, 191 $this->uaType, 192 $this->uaVersion, 193 $this->uaPlatform 194 ); 195 } 196 197 /** 198 * Log all groups for the user 199 * 200 * @todo maybe this should be done only once per session? 201 */ 202 protected function logGroups(): void 203 { 204 global $USERINFO; 205 206 if (!$this->user) return; 207 if (!isset($USERINFO['grps'])) return; 208 if (!is_array($USERINFO['grps'])) return; 209 $groups = $USERINFO['grps']; 210 211 $this->db->exec('DELETE FROM groups WHERE user = ?', $this->user); 212 213 $placeholders = implode(',', array_fill(0, count($groups), '(?, ?)')); 214 $params = []; 215 $sql = "INSERT INTO groups (`user`, `group`) VALUES $placeholders"; 216 foreach ($groups as $group) { 217 $params[] = $this->user; 218 $params[] = $group; 219 } 220 $this->db->exec($sql, $params); 221 } 222 223 /** 224 * Log email domain 225 * 226 * @todo maybe this should be done only once per session? 227 */ 228 protected function logDomain(): void 229 { 230 global $USERINFO; 231 if (!$this->user) return; 232 if (!isset($USERINFO['mail'])) return; 233 $mail = $USERINFO['mail']; 234 235 $pos = strrpos($mail, '@'); 236 if (!$pos) return; 237 $domain = substr($mail, $pos + 1); 238 if (empty($domain)) return; 239 240 $sql = 'UPDATE users SET domain = ? WHERE user = ?'; 241 $this->db->exec($sql, [$domain, $this->user]); 242 } 243 244 // endregion 245 // region internal loggers called by the dispatchers 246 247 /** 248 * Log the given referer URL 249 * 250 * @param $referer 251 * @return int|null The referer ID or null if no referer was given 252 */ 253 public function logReferer($referer): ?int 254 { 255 if (!$referer) return null; 256 257 // FIXME we could check against a blacklist here 258 259 $se = new SearchEngines($referer); 260 $type = $se->isSearchEngine() ? 'search' : 'external'; 261 262 $sql = 'INSERT OR IGNORE INTO referers (url, type, dt) VALUES (?, ?, CURRENT_TIMESTAMP)'; 263 return $this->db->exec($sql, [$referer, $type]); // returns ID even if the insert was ignored 264 } 265 266 /** 267 * Resolve IP to country/city and store in database 268 * 269 * @return string The IP address as stored 270 */ 271 public function logIp(): string 272 { 273 $ip = clientIP(true); 274 $hash = $ip; // @todo we could anonymize here 275 276 // check if IP already known and up-to-date 277 $result = $this->db->queryValue( 278 "SELECT ip 279 FROM iplocation 280 WHERE ip = ? 281 AND lastupd > date('now', '-30 days')", 282 $hash 283 ); 284 if ($result) return $hash; // already known and up-to-date 285 286 $http = $this->httpClient ?: new DokuHTTPClient(); 287 $http->timeout = 7; 288 $json = $http->get('http://ip-api.com/json/' . $ip); // yes, it's HTTP only 289 290 if (!$json) { 291 \dokuwiki\Logger::error('Statistics Plugin - Failed talk to ip-api.com.'); 292 return $hash; 293 } 294 try { 295 $data = json_decode($json, true, 512, JSON_THROW_ON_ERROR); 296 } catch (\JsonException $e) { 297 \dokuwiki\Logger::error('Statistics Plugin - Failed to decode JSON from ip-api.com.', $e); 298 return $hash; 299 } 300 if (!isset($data['status'])) { 301 \dokuwiki\Logger::error('Statistics Plugin - Invalid ip-api.com result' . $ip, $data); 302 return $hash; 303 }; 304 305 // we do not check for 'success' status here. when the API can't resolve the IP we still log it 306 // without location data, so we won't re-query it in the next 30 days. 307 308 $host = gethostbyaddr($ip); // @todo if we anonymize the IP, we should not do this 309 $this->db->exec( 310 'INSERT OR REPLACE INTO iplocation ( 311 ip, country, code, city, host, lastupd 312 ) VALUES ( 313 ?, ?, ?, ?, ?, CURRENT_TIMESTAMP 314 )', 315 $hash, 316 $data['country'] ?? '', 317 $data['countryCode'] ?? '', 318 $data['city'] ?? '', 319 $host 320 ); 321 322 return $hash; 323 } 324 325 // endregion 326 // region log dispatchers 327 328 public function logPageView(): void 329 { 330 global $INPUT; 331 332 if (!$INPUT->str('p')) return; 333 334 335 $referer = $INPUT->filter('trim')->str('r'); 336 $ip = $this->logIp(); // resolve the IP address 337 338 $data = [ 339 'page' => $INPUT->filter('cleanID')->str('p'), 340 'ip' => $ip, 341 'ref_id' => $this->logReferer($referer), 342 'sx' => $INPUT->int('sx'), 343 'sy' => $INPUT->int('sy'), 344 'vx' => $INPUT->int('vx'), 345 'vy' => $INPUT->int('vy'), 346 'session' => $this->session, 347 ]; 348 349 $this->db->exec(' 350 INSERT INTO pageviews ( 351 dt, page, ip, ref_id, screen_x, screen_y, view_x, view_y, session 352 ) VALUES ( 353 CURRENT_TIMESTAMP, :page, :ip, :ref_id, :sx, :sy, :vx, :vy, :session 354 ) 355 ', 356 $data 357 ); 358 } 359 360 /** 361 * Log a click on an external link 362 * 363 * Called from log.php 364 */ 365 public function logOutgoing(): void 366 { 367 global $INPUT; 368 369 if (!$INPUT->str('ol')) return; 370 371 $link = $INPUT->filter('trim')->str('ol'); 372 $session = $this->session; 373 $page = $INPUT->filter('cleanID')->str('p'); 374 375 $this->db->exec( 376 'INSERT INTO outlinks ( 377 dt, session, page, link 378 ) VALUES ( 379 CURRENT_TIMESTAMP, ?, ?, ?, ? 380 )', 381 $session, 382 $page, 383 $link 384 ); 385 } 386 387 /** 388 * Log access to a media file 389 * 390 * Called from action.php 391 * 392 * @param string $media The media ID 393 * @param string $mime The media's mime type 394 * @param bool $inline Is this displayed inline? 395 * @param int $size Size of the media file 396 */ 397 public function logMedia(string $media, string $mime, bool $inline, int $size): void 398 { 399 [$mime1, $mime2] = explode('/', strtolower($mime)); 400 $inline = $inline ? 1 : 0; 401 402 403 $data = [ 404 'media' => cleanID($media), 405 'ip' => $this->logIp(), // resolve the IP address 406 'session' => $this->session, 407 'size' => $size, 408 'mime1' => $mime1, 409 'mime2' => $mime2, 410 'inline' => $inline, 411 ]; 412 413 $this->db->exec(' 414 INSERT INTO media ( dt, media, ip, session, size, mime1, mime2, inline ) 415 VALUES (CURRENT_TIMESTAMP, :media, :ip, :session, :size, :mime1, :mime2, :inline) 416 ', 417 $data 418 ); 419 } 420 421 /** 422 * Log page edits 423 * 424 * called from action.php 425 * 426 * @param string $page The page that was edited 427 * @param string $type The type of edit (create, edit, etc.) 428 */ 429 public function logEdit(string $page, string $type): void 430 { 431 $data = [ 432 'page' => cleanID($page), 433 'type' => $type, 434 'ip' => $this->logIp(), // resolve the IP address 435 'session' => $this->session 436 ]; 437 438 $editId = $this->db->exec( 439 'INSERT INTO edits ( 440 dt, page, type, ip, session 441 ) VALUES ( 442 CURRENT_TIMESTAMP, :page, :type, :ip, :session 443 )', 444 $data 445 ); 446 } 447 448 /** 449 * Log login/logoffs and user creations 450 * 451 * @param string $type The type of login event (login, logout, create) 452 * @param string $user The username (optional, will use current user if empty) 453 * @fixme this is still broken, I need to figure out the session handling first 454 */ 455 public function logLogin(string $type, string $user = ''): void 456 { 457 global $INPUT; 458 459 if (!$user) $user = $INPUT->server->str('REMOTE_USER'); 460 461 $ip = clientIP(true); 462 $session = $this->session; 463 464 $this->db->exec( 465 'INSERT INTO logins ( 466 dt, type, ip, session 467 ) VALUES ( 468 CURRENT_TIMESTAMP, ?, ?, ?, ?, ? 469 )', 470 $type, 471 $ip, 472 $user, 473 $session, 474 $this->uid 475 ); 476 } 477 478 /** 479 * Log search data to the search related tables 480 * 481 * @param string $query The search query 482 * @param string[] $words The query split into words 483 */ 484 public function logSearch(string $query, array $words): void 485 { 486 if (!$query) return; 487 488 $sid = $this->db->exec( 489 'INSERT INTO search (dt, ip, session, query) VALUES (CURRENT_TIMESTAMP, ?, ? , ?)', 490 $this->logIp(), // resolve the IP address 491 $this->session, 492 $query, 493 ); 494 495 foreach ($words as $word) { 496 if (!$word) continue; 497 $this->db->exec( 498 'INSERT INTO searchwords (sid, word) VALUES (?, ?)', 499 $sid, 500 $word 501 ); 502 } 503 } 504 505 /** 506 * Log the current page count and size as today's history entry 507 */ 508 public function logHistoryPages(): void 509 { 510 global $conf; 511 512 // use the popularity plugin's search method to find the wanted data 513 /** @var helper_plugin_popularity $pop */ 514 $pop = plugin_load('helper', 'popularity'); 515 $list = $this->initEmptySearchList(); 516 search($list, $conf['datadir'], [$pop, 'searchCountCallback'], ['all' => false], ''); 517 $page_count = $list['file_count']; 518 $page_size = $list['file_size']; 519 520 $this->db->exec( 521 'INSERT OR REPLACE INTO history ( 522 info, value, dt 523 ) VALUES ( 524 ?, ?, CURRENT_TIMESTAMP 525 )', 526 'page_count', 527 $page_count 528 ); 529 $this->db->exec( 530 'INSERT OR REPLACE INTO history ( 531 info, value, dt 532 ) VALUES ( 533 ?, ?, CURRENT_TIMESTAMP 534 )', 535 'page_size', 536 $page_size 537 ); 538 } 539 540 /** 541 * Log the current media count and size as today's history entry 542 */ 543 public function logHistoryMedia(): void 544 { 545 global $conf; 546 547 // use the popularity plugin's search method to find the wanted data 548 /** @var helper_plugin_popularity $pop */ 549 $pop = plugin_load('helper', 'popularity'); 550 $list = $this->initEmptySearchList(); 551 search($list, $conf['mediadir'], [$pop, 'searchCountCallback'], ['all' => true], ''); 552 $media_count = $list['file_count']; 553 $media_size = $list['file_size']; 554 555 $this->db->exec( 556 'INSERT OR REPLACE INTO history ( 557 info, value, dt 558 ) VALUES ( 559 ?, ?, CURRENT_TIMESTAMP 560 )', 561 'media_count', 562 $media_count 563 ); 564 $this->db->exec( 565 'INSERT OR REPLACE INTO history ( 566 info, value, dt 567 ) VALUES ( 568 ?, ?, CURRENT_TIMESTAMP 569 )', 570 'media_size', 571 $media_size 572 ); 573 } 574 575 // endregion 576 577 /** 578 * @todo can be dropped in favor of helper_plugin_popularity::initEmptySearchList() once it's public 579 * @return array 580 */ 581 protected function initEmptySearchList() 582 { 583 return array_fill_keys([ 584 'file_count', 585 'file_size', 586 'file_max', 587 'file_min', 588 'dir_count', 589 'dir_nest', 590 'file_oldest' 591 ], 0); 592 } 593} 594